codex_memory/performance/
dashboard.rs

1use anyhow::Result;
2use chrono::{DateTime, Utc};
3use prometheus::{Counter, Gauge, Histogram, Registry};
4use serde::{Deserialize, Serialize};
5use sqlx::PgPool;
6use std::collections::HashMap;
7use std::sync::Arc;
8use tokio::sync::RwLock;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct PerformanceAlert {
12    pub id: String,
13    pub metric_name: String,
14    pub threshold_type: ThresholdType,
15    pub threshold_value: f64,
16    pub current_value: f64,
17    pub severity: AlertSeverity,
18    pub message: String,
19    pub timestamp: DateTime<Utc>,
20    pub resolved: bool,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub enum ThresholdType {
25    GreaterThan,
26    LessThan,
27    PercentageIncrease,
28    PercentageDecrease,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub enum AlertSeverity {
33    Info,
34    Warning,
35    Critical,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct DashboardConfig {
40    pub alert_thresholds: HashMap<String, AlertThreshold>,
41    pub monitoring_interval_seconds: u64,
42    pub retention_days: u32,
43    pub enable_auto_scaling: bool,
44    pub performance_targets: PerformanceTargets,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct AlertThreshold {
49    pub warning_threshold: f64,
50    pub critical_threshold: f64,
51    pub threshold_type: ThresholdType,
52    pub enabled: bool,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct PerformanceTargets {
57    pub p95_latency_ms: f64,          // Story 10: < 2 seconds
58    pub token_reduction_percent: f64, // Story 10: 90%
59    pub memory_headroom_percent: f64, // Story 10: 20%
60    pub batch_throughput_ops_sec: f64,
61    pub cache_hit_ratio: f64,
62    pub connection_pool_usage: f64,
63}
64
65#[derive(Debug)]
66pub struct PerformanceDashboard {
67    config: DashboardConfig,
68    pool: PgPool,
69    registry: Arc<Registry>,
70    alerts: Arc<RwLock<Vec<PerformanceAlert>>>,
71
72    // Metrics
73    latency_histogram: Histogram,
74    throughput_gauge: Gauge,
75    memory_usage_gauge: Gauge,
76    token_reduction_gauge: Gauge,
77    alert_counter: Counter,
78}
79
80impl Default for DashboardConfig {
81    fn default() -> Self {
82        let mut alert_thresholds = HashMap::new();
83
84        // Story 10 Performance Requirements
85        alert_thresholds.insert(
86            "p95_latency_ms".to_string(),
87            AlertThreshold {
88                warning_threshold: 1500.0,  // 1.5s warning
89                critical_threshold: 2000.0, // 2s critical (Story 10 requirement)
90                threshold_type: ThresholdType::GreaterThan,
91                enabled: true,
92            },
93        );
94
95        alert_thresholds.insert(
96            "memory_headroom_percent".to_string(),
97            AlertThreshold {
98                warning_threshold: 25.0,  // 25% warning
99                critical_threshold: 20.0, // 20% critical (Story 10 requirement)
100                threshold_type: ThresholdType::LessThan,
101                enabled: true,
102            },
103        );
104
105        alert_thresholds.insert(
106            "token_reduction_percent".to_string(),
107            AlertThreshold {
108                warning_threshold: 85.0,  // 85% warning
109                critical_threshold: 90.0, // 90% critical (Story 10 requirement)
110                threshold_type: ThresholdType::LessThan,
111                enabled: true,
112            },
113        );
114
115        alert_thresholds.insert(
116            "connection_pool_usage".to_string(),
117            AlertThreshold {
118                warning_threshold: 70.0,  // 70% warning
119                critical_threshold: 85.0, // 85% critical
120                threshold_type: ThresholdType::GreaterThan,
121                enabled: true,
122            },
123        );
124
125        alert_thresholds.insert(
126            "batch_throughput_regression".to_string(),
127            AlertThreshold {
128                warning_threshold: 15.0,  // 15% regression warning
129                critical_threshold: 25.0, // 25% regression critical
130                threshold_type: ThresholdType::PercentageDecrease,
131                enabled: true,
132            },
133        );
134
135        Self {
136            alert_thresholds,
137            monitoring_interval_seconds: 60,
138            retention_days: 30,
139            enable_auto_scaling: true,
140            performance_targets: PerformanceTargets {
141                p95_latency_ms: 2000.0,
142                token_reduction_percent: 90.0,
143                memory_headroom_percent: 20.0,
144                batch_throughput_ops_sec: 1000.0,
145                cache_hit_ratio: 0.9,
146                connection_pool_usage: 0.7,
147            },
148        }
149    }
150}
151
152impl PerformanceDashboard {
153    pub fn new(config: DashboardConfig, pool: PgPool) -> Result<Self> {
154        let registry = Arc::new(Registry::new());
155
156        let latency_histogram = Histogram::with_opts(
157            prometheus::HistogramOpts::new(
158                "memory_operation_duration_seconds",
159                "Duration of memory operations in seconds",
160            )
161            .buckets(vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0]),
162        )?;
163
164        let throughput_gauge = Gauge::with_opts(prometheus::Opts::new(
165            "memory_operations_per_second",
166            "Number of memory operations per second",
167        ))?;
168
169        let memory_usage_gauge = Gauge::with_opts(prometheus::Opts::new(
170            "memory_headroom_percentage",
171            "Available memory headroom as percentage",
172        ))?;
173
174        let token_reduction_gauge = Gauge::with_opts(prometheus::Opts::new(
175            "token_reduction_percentage",
176            "Token reduction percentage vs full context",
177        ))?;
178
179        let alert_counter = Counter::with_opts(prometheus::Opts::new(
180            "performance_alerts_total",
181            "Total number of performance alerts triggered",
182        ))?;
183
184        registry.register(Box::new(latency_histogram.clone()))?;
185        registry.register(Box::new(throughput_gauge.clone()))?;
186        registry.register(Box::new(memory_usage_gauge.clone()))?;
187        registry.register(Box::new(token_reduction_gauge.clone()))?;
188        registry.register(Box::new(alert_counter.clone()))?;
189
190        Ok(Self {
191            config,
192            pool,
193            registry,
194            alerts: Arc::new(RwLock::new(Vec::new())),
195            latency_histogram,
196            throughput_gauge,
197            memory_usage_gauge,
198            token_reduction_gauge,
199            alert_counter,
200        })
201    }
202
203    /// Record operation latency
204    pub fn record_latency(&self, duration_seconds: f64) {
205        self.latency_histogram.observe(duration_seconds);
206
207        // Check for latency alerts
208        let p95_latency_ms = duration_seconds * 1000.0;
209        tokio::spawn({
210            let dashboard = self.clone();
211            async move {
212                dashboard.check_latency_threshold(p95_latency_ms).await;
213            }
214        });
215    }
216
217    /// Update throughput metric
218    pub fn record_throughput(&self, ops_per_second: f64) {
219        self.throughput_gauge.set(ops_per_second);
220    }
221
222    /// Update memory headroom
223    pub fn record_memory_headroom(&self, headroom_percent: f64) {
224        self.memory_usage_gauge.set(headroom_percent);
225
226        // Check for memory headroom alerts
227        tokio::spawn({
228            let dashboard = self.clone();
229            async move {
230                dashboard
231                    .check_memory_headroom_threshold(headroom_percent)
232                    .await;
233            }
234        });
235    }
236
237    /// Update token reduction metrics
238    pub fn record_token_reduction(&self, reduction_percent: f64) {
239        self.token_reduction_gauge.set(reduction_percent);
240
241        // Check for token reduction alerts
242        tokio::spawn({
243            let dashboard = self.clone();
244            async move {
245                dashboard
246                    .check_token_reduction_threshold(reduction_percent)
247                    .await;
248            }
249        });
250    }
251
252    /// Check latency threshold and create alerts
253    async fn check_latency_threshold(&self, latency_ms: f64) {
254        if let Some(threshold) = self.config.alert_thresholds.get("p95_latency_ms") {
255            if !threshold.enabled {
256                return;
257            }
258
259            let severity = if latency_ms > threshold.critical_threshold {
260                AlertSeverity::Critical
261            } else if latency_ms > threshold.warning_threshold {
262                AlertSeverity::Warning
263            } else {
264                return;
265            };
266
267            let alert = PerformanceAlert {
268                id: uuid::Uuid::new_v4().to_string(),
269                metric_name: "p95_latency_ms".to_string(),
270                threshold_type: ThresholdType::GreaterThan,
271                threshold_value: match severity {
272                    AlertSeverity::Critical => threshold.critical_threshold,
273                    _ => threshold.warning_threshold,
274                },
275                current_value: latency_ms,
276                severity,
277                message: format!(
278                    "P95 latency {latency_ms:.1}ms exceeds threshold. Story 10 requirement: <2000ms"
279                ),
280                timestamp: Utc::now(),
281                resolved: false,
282            };
283
284            self.trigger_alert(alert).await;
285        }
286    }
287
288    /// Check memory headroom threshold
289    async fn check_memory_headroom_threshold(&self, headroom_percent: f64) {
290        if let Some(threshold) = self.config.alert_thresholds.get("memory_headroom_percent") {
291            if !threshold.enabled || headroom_percent >= threshold.warning_threshold {
292                return;
293            }
294
295            let severity = if headroom_percent < threshold.critical_threshold {
296                AlertSeverity::Critical
297            } else {
298                AlertSeverity::Warning
299            };
300
301            let alert = PerformanceAlert {
302                id: uuid::Uuid::new_v4().to_string(),
303                metric_name: "memory_headroom_percent".to_string(),
304                threshold_type: ThresholdType::LessThan,
305                threshold_value: match severity {
306                    AlertSeverity::Critical => threshold.critical_threshold,
307                    _ => threshold.warning_threshold,
308                },
309                current_value: headroom_percent,
310                severity,
311                message: format!(
312                    "Memory headroom {headroom_percent:.1}% below threshold. Story 10 requirement: ≥20%"
313                ),
314                timestamp: Utc::now(),
315                resolved: false,
316            };
317
318            self.trigger_alert(alert).await;
319        }
320    }
321
322    /// Check token reduction threshold
323    async fn check_token_reduction_threshold(&self, reduction_percent: f64) {
324        if let Some(threshold) = self.config.alert_thresholds.get("token_reduction_percent") {
325            if !threshold.enabled || reduction_percent >= threshold.critical_threshold {
326                return;
327            }
328
329            let severity = if reduction_percent < threshold.critical_threshold {
330                AlertSeverity::Critical
331            } else if reduction_percent < threshold.warning_threshold {
332                AlertSeverity::Warning
333            } else {
334                return;
335            };
336
337            let alert = PerformanceAlert {
338                id: uuid::Uuid::new_v4().to_string(),
339                metric_name: "token_reduction_percent".to_string(),
340                threshold_type: ThresholdType::LessThan,
341                threshold_value: match severity {
342                    AlertSeverity::Critical => threshold.critical_threshold,
343                    _ => threshold.warning_threshold,
344                },
345                current_value: reduction_percent,
346                severity,
347                message: format!(
348                    "Token reduction {reduction_percent:.1}% below target. Story 10 requirement: ≥90%"
349                ),
350                timestamp: Utc::now(),
351                resolved: false,
352            };
353
354            self.trigger_alert(alert).await;
355        }
356    }
357
358    /// Trigger a performance alert
359    async fn trigger_alert(&self, alert: PerformanceAlert) {
360        self.alert_counter.inc();
361
362        // Store alert
363        {
364            let mut alerts = self.alerts.write().await;
365            alerts.push(alert.clone());
366
367            // Keep only recent alerts to prevent memory bloat
368            alerts.retain(|a| {
369                let age = Utc::now().signed_duration_since(a.timestamp);
370                age.num_days() <= self.config.retention_days as i64
371            });
372        }
373
374        // Log alert
375        match alert.severity {
376            AlertSeverity::Critical => {
377                tracing::error!(
378                    "🚨 CRITICAL Performance Alert: {} - {}",
379                    alert.metric_name,
380                    alert.message
381                );
382            }
383            AlertSeverity::Warning => {
384                tracing::warn!(
385                    "⚠️  WARNING Performance Alert: {} - {}",
386                    alert.metric_name,
387                    alert.message
388                );
389            }
390            AlertSeverity::Info => {
391                tracing::info!(
392                    "ℹ️  INFO Performance Alert: {} - {}",
393                    alert.metric_name,
394                    alert.message
395                );
396            }
397        }
398
399        // Store in database for persistence
400        if let Err(e) = self.store_alert_in_db(&alert).await {
401            tracing::error!("Failed to store alert in database: {}", e);
402        }
403    }
404
405    /// Store alert in database
406    async fn store_alert_in_db(&self, alert: &PerformanceAlert) -> Result<()> {
407        // Note: Will be enabled after migration is applied
408        let _ = sqlx::query(
409            r#"
410            INSERT INTO performance_alerts (
411                id, metric_name, threshold_type, threshold_value, current_value,
412                severity, message, timestamp, resolved
413            ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
414            ON CONFLICT (id) DO NOTHING
415            "#,
416        )
417        .bind(&alert.id)
418        .bind(&alert.metric_name)
419        .bind(serde_json::to_string(&alert.threshold_type)?)
420        .bind(alert.threshold_value)
421        .bind(alert.current_value)
422        .bind(serde_json::to_string(&alert.severity)?)
423        .bind(&alert.message)
424        .bind(alert.timestamp)
425        .bind(alert.resolved)
426        .execute(&self.pool)
427        .await;
428
429        Ok(())
430    }
431
432    /// Get current performance summary
433    pub async fn get_performance_summary(&self) -> Result<PerformanceSummary> {
434        let current_metrics = self.get_current_metrics().await?;
435        let recent_alerts = {
436            let alerts = self.alerts.read().await;
437            alerts
438                .iter()
439                .filter(|a| !a.resolved)
440                .cloned()
441                .collect::<Vec<_>>()
442        };
443
444        let story10_compliance = self.check_story10_compliance(&current_metrics);
445
446        Ok(PerformanceSummary {
447            current_metrics,
448            recent_alerts,
449            story10_compliance,
450            last_updated: Utc::now(),
451        })
452    }
453
454    /// Get current performance metrics
455    async fn get_current_metrics(&self) -> Result<HashMap<String, f64>> {
456        let mut metrics = HashMap::new();
457
458        // Get latest metrics from Prometheus
459        metrics.insert(
460            "memory_headroom_percent".to_string(),
461            self.memory_usage_gauge.get(),
462        );
463        metrics.insert(
464            "token_reduction_percent".to_string(),
465            self.token_reduction_gauge.get(),
466        );
467        metrics.insert(
468            "throughput_ops_sec".to_string(),
469            self.throughput_gauge.get(),
470        );
471
472        // Calculate P95 latency from histogram (approximation using average)
473        let sample_count = self.latency_histogram.get_sample_count();
474        let p95_latency = if sample_count > 0 {
475            self.latency_histogram.get_sample_sum() / sample_count as f64 * 1000.0
476        } else {
477            0.0
478        };
479        metrics.insert("p95_latency_ms".to_string(), p95_latency);
480
481        // Get connection pool usage from database
482        let pool_stats = self.pool.size() as f64;
483        let pool_usage = (pool_stats - self.pool.num_idle() as f64) / pool_stats;
484        metrics.insert(
485            "connection_pool_usage_percent".to_string(),
486            pool_usage * 100.0,
487        );
488
489        Ok(metrics)
490    }
491
492    /// Check Story 10 compliance
493    fn check_story10_compliance(&self, metrics: &HashMap<String, f64>) -> Story10Compliance {
494        let p95_latency_compliant = metrics
495            .get("p95_latency_ms")
496            .map(|&v| v < self.config.performance_targets.p95_latency_ms)
497            .unwrap_or(false);
498
499        let token_reduction_compliant = metrics
500            .get("token_reduction_percent")
501            .map(|&v| v >= self.config.performance_targets.token_reduction_percent)
502            .unwrap_or(false);
503
504        let memory_headroom_compliant = metrics
505            .get("memory_headroom_percent")
506            .map(|&v| v >= self.config.performance_targets.memory_headroom_percent)
507            .unwrap_or(false);
508
509        Story10Compliance {
510            p95_latency_compliant,
511            token_reduction_compliant,
512            memory_headroom_compliant,
513            overall_compliant: p95_latency_compliant
514                && token_reduction_compliant
515                && memory_headroom_compliant,
516        }
517    }
518
519    /// Export Prometheus metrics
520    pub fn export_prometheus_metrics(&self) -> String {
521        let encoder = prometheus::TextEncoder::new();
522        let metric_families = self.registry.gather();
523        encoder
524            .encode_to_string(&metric_families)
525            .unwrap_or_default()
526    }
527}
528
529impl Clone for PerformanceDashboard {
530    fn clone(&self) -> Self {
531        Self {
532            config: self.config.clone(),
533            pool: self.pool.clone(),
534            registry: self.registry.clone(),
535            alerts: self.alerts.clone(),
536            latency_histogram: self.latency_histogram.clone(),
537            throughput_gauge: self.throughput_gauge.clone(),
538            memory_usage_gauge: self.memory_usage_gauge.clone(),
539            token_reduction_gauge: self.token_reduction_gauge.clone(),
540            alert_counter: self.alert_counter.clone(),
541        }
542    }
543}
544
545#[derive(Debug, Serialize, Deserialize)]
546pub struct PerformanceSummary {
547    pub current_metrics: HashMap<String, f64>,
548    pub recent_alerts: Vec<PerformanceAlert>,
549    pub story10_compliance: Story10Compliance,
550    pub last_updated: DateTime<Utc>,
551}
552
553#[derive(Debug, Serialize, Deserialize)]
554pub struct Story10Compliance {
555    pub p95_latency_compliant: bool,     // < 2 seconds
556    pub token_reduction_compliant: bool, // ≥ 90%
557    pub memory_headroom_compliant: bool, // ≥ 20%
558    pub overall_compliant: bool,
559}