use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, VecDeque};
use std::sync::{Arc, Mutex, RwLock};
use std::time::{Duration, Instant};
use std::thread;
use tracing::{debug, error, info, warn};
use super::super::types::*;
use crate::device_info::{MobileDeviceDetector, MobileDeviceInfo, ThermalState};
#[derive(Debug)]
pub struct RealTimeMonitor {
config: RealTimeMonitoringConfig,
current_state: RealTimeState,
alert_manager: AlertManager,
live_metrics: Arc<RwLock<VecDeque<MobileMetricsSnapshot>>>,
trending_metrics: TrendingMetrics,
system_health: SystemHealth,
monitor_stats: MonitoringStats,
_monitor_thread: Option<thread::JoinHandle<()>>,
}
#[derive(Debug, Clone)]
pub struct RealTimeState {
pub performance_score: f32,
pub active_alerts: Vec<PerformanceAlert>,
pub trending_metrics: TrendingMetrics,
pub system_health: SystemHealth,
pub last_update: Option<Instant>,
pub uptime: Duration,
}
#[derive(Debug, Clone, Default)]
pub struct MonitoringStats {
pub total_monitor_time: Duration,
pub alerts_generated: u64,
pub critical_alerts: u64,
pub avg_alert_response_time_ms: f32,
pub monitoring_accuracy: f32,
pub false_alarm_rate: f32,
}
#[derive(Debug)]
pub struct AlertManager {
config: AlertManagerConfig,
active_alerts: HashMap<String, PerformanceAlert>,
alert_history: VecDeque<AlertRecord>,
alert_rules: Vec<AlertRule>,
notification_handlers: Vec<Box<dyn NotificationHandler + Send + Sync>>,
}
#[derive(Debug, Clone)]
pub struct AlertRecord {
pub alert_id: String,
pub timestamp: std::time::Instant,
pub alert_type: String,
pub severity: String,
pub message: String,
pub resolved: bool,
pub resolution_time: Option<std::time::Instant>,
}
#[derive(Debug, Clone)]
pub struct AlertRule {
pub rule_id: String,
pub name: String,
pub condition: String,
pub threshold_value: f32,
pub severity: String,
pub enabled: bool,
pub created_at: std::time::Instant,
}
pub trait NotificationHandler: std::fmt::Debug {
fn send_notification(&self, alert: &PerformanceAlert) -> Result<()>;
fn handler_type(&self) -> &str;
fn is_available(&self) -> bool;
}
impl RealTimeMonitor {
pub fn new(config: RealTimeMonitoringConfig) -> Result<Self> {
let alert_manager = AlertManager::new(AlertManagerConfig::default())?;
Ok(Self {
config,
current_state: RealTimeState::new(),
alert_manager,
live_metrics: Arc::new(RwLock::new(VecDeque::new())),
trending_metrics: TrendingMetrics::default(),
system_health: SystemHealth::default(),
monitor_stats: MonitoringStats::default(),
_monitor_thread: None,
})
}
pub fn start_monitoring(&mut self) -> Result<()> {
info!("Starting real-time performance monitoring");
self.current_state = RealTimeState::new();
self.current_state.last_update = Some(Instant::now());
if self.config.enable_background_monitoring {
self.start_background_monitoring()?;
}
Ok(())
}
pub fn stop_monitoring(&mut self) -> Result<()> {
info!("Stopping real-time performance monitoring");
if let Some(last_update) = self.current_state.last_update {
let monitoring_duration = last_update.elapsed();
self.monitor_stats.total_monitor_time += monitoring_duration;
}
Ok(())
}
pub fn update_metrics(&mut self, metrics: MobileMetricsSnapshot) -> Result<()> {
{
let mut live_metrics = self.live_metrics.write()
.expect("live_metrics lock should not be poisoned");
live_metrics.push_back(metrics.clone());
if live_metrics.len() > self.config.max_metrics_buffer_size {
live_metrics.pop_front();
}
}
self.update_performance_score(&metrics)?;
self.update_trending_metrics(&metrics)?;
self.update_system_health(&metrics)?;
self.check_for_alerts(&metrics)?;
self.current_state.last_update = Some(Instant::now());
Ok(())
}
pub fn get_current_state(&self) -> &RealTimeState {
&self.current_state
}
pub fn get_monitoring_stats(&self) -> &MonitoringStats {
&self.monitor_stats
}
pub fn get_recent_metrics(&self, limit: usize) -> Vec<MobileMetricsSnapshot> {
let live_metrics = self.live_metrics.read()
.expect("live_metrics lock should not be poisoned");
live_metrics.iter().rev().take(limit).cloned().collect()
}
fn start_background_monitoring(&mut self) -> Result<()> {
let live_metrics_clone = self.live_metrics.clone();
let monitoring_interval = self.config.monitoring_interval;
let handle = thread::spawn(move || {
loop {
thread::sleep(monitoring_interval);
debug!("Background monitoring tick");
}
});
self._monitor_thread = Some(handle);
Ok(())
}
fn update_performance_score(&mut self, metrics: &MobileMetricsSnapshot) -> Result<()> {
let memory_score = 100.0 - metrics.memory_usage_percent;
let cpu_score = 100.0 - metrics.cpu_usage_percent;
let latency_score = (1000.0 - metrics.inference_latency_ms.min(1000.0)) / 10.0;
let thermal_score = match metrics.thermal_state {
ThermalState::Nominal => 100.0,
ThermalState::Fair => 80.0,
ThermalState::Serious => 60.0,
ThermalState::Hot => 40.0,
ThermalState::Critical => 20.0,
};
self.current_state.performance_score = (
memory_score * 0.3 +
cpu_score * 0.3 +
latency_score * 0.3 +
thermal_score * 0.1
).max(0.0).min(100.0);
Ok(())
}
fn update_trending_metrics(&mut self, metrics: &MobileMetricsSnapshot) -> Result<()> {
self.trending_metrics.memory_trend = self.calculate_trend(
metrics.memory_usage_percent,
self.trending_metrics.memory_trend,
);
self.trending_metrics.cpu_trend = self.calculate_trend(
metrics.cpu_usage_percent,
self.trending_metrics.cpu_trend,
);
self.trending_metrics.latency_trend = self.calculate_trend(
metrics.inference_latency_ms,
self.trending_metrics.latency_trend,
);
Ok(())
}
fn calculate_trend(&self, current_value: f32, previous_trend: TrendDirection) -> TrendDirection {
let threshold = 5.0;
match previous_trend {
TrendDirection::Increasing => TrendDirection::Stable, TrendDirection::Decreasing => TrendDirection::Stable, TrendDirection::Stable => TrendDirection::Stable,
}
}
fn update_system_health(&mut self, metrics: &MobileMetricsSnapshot) -> Result<()> {
let health_score = self.calculate_health_score(metrics);
self.system_health.overall_status = if health_score > 80.0 {
HealthStatus::Excellent
} else if health_score > 60.0 {
HealthStatus::Good
} else if health_score > 40.0 {
HealthStatus::Fair
} else if health_score > 20.0 {
HealthStatus::Poor
} else {
HealthStatus::Critical
};
self.system_health.last_check = Some(Instant::now());
Ok(())
}
fn calculate_health_score(&self, metrics: &MobileMetricsSnapshot) -> f32 {
let memory_health = if metrics.memory_usage_percent < 70.0 { 100.0 }
else if metrics.memory_usage_percent < 85.0 { 60.0 }
else { 20.0 };
let cpu_health = if metrics.cpu_usage_percent < 70.0 { 100.0 }
else if metrics.cpu_usage_percent < 90.0 { 60.0 }
else { 20.0 };
let thermal_health = match metrics.thermal_state {
ThermalState::Nominal => 100.0,
ThermalState::Fair => 80.0,
ThermalState::Serious => 50.0,
ThermalState::Hot => 20.0,
ThermalState::Critical => 0.0,
};
(memory_health + cpu_health + thermal_health) / 3.0
}
fn check_for_alerts(&mut self, metrics: &MobileMetricsSnapshot) -> Result<()> {
let alerts = self.alert_manager.evaluate_alerts(metrics)?;
for alert in alerts {
self.current_state.active_alerts.push(alert.clone());
self.monitor_stats.alerts_generated += 1;
if alert.severity == AlertSeverity::Critical {
self.monitor_stats.critical_alerts += 1;
}
debug!("Performance alert triggered: {}", alert.title);
}
Ok(())
}
}
impl RealTimeState {
pub fn new() -> Self {
Self {
performance_score: 100.0,
active_alerts: Vec::new(),
trending_metrics: TrendingMetrics::default(),
system_health: SystemHealth::default(),
last_update: None,
uptime: Duration::from_secs(0),
}
}
pub fn update_uptime(&mut self, start_time: Instant) {
self.uptime = start_time.elapsed();
}
}
impl AlertManager {
pub fn new(config: AlertManagerConfig) -> Result<Self> {
let alert_rules = Self::initialize_default_alert_rules();
Ok(Self {
config,
active_alerts: HashMap::new(),
alert_history: VecDeque::new(),
alert_rules,
notification_handlers: Vec::new(),
})
}
pub fn evaluate_alerts(&mut self, metrics: &MobileMetricsSnapshot) -> Result<Vec<PerformanceAlert>> {
let mut triggered_alerts = Vec::new();
for rule in &self.alert_rules {
if !rule.enabled {
continue;
}
if self.evaluate_alert_rule(rule, metrics)? {
let alert = self.create_alert_from_rule(rule, metrics)?;
triggered_alerts.push(alert.clone());
self.active_alerts.insert(rule.rule_id.clone(), alert);
let record = AlertRecord {
alert_id: rule.rule_id.clone(),
timestamp: Instant::now(),
alert_type: rule.name.clone(),
severity: rule.severity.clone(),
message: format!("Alert triggered: {}", rule.name),
resolved: false,
resolution_time: None,
};
self.alert_history.push_back(record);
}
}
Ok(triggered_alerts)
}
fn initialize_default_alert_rules() -> Vec<AlertRule> {
vec![
AlertRule {
rule_id: "high_memory_usage".to_string(),
name: "High Memory Usage".to_string(),
condition: "memory_usage_percent > threshold".to_string(),
threshold_value: 90.0,
severity: "High".to_string(),
enabled: true,
created_at: Instant::now(),
},
AlertRule {
rule_id: "high_cpu_usage".to_string(),
name: "High CPU Usage".to_string(),
condition: "cpu_usage_percent > threshold".to_string(),
threshold_value: 95.0,
severity: "High".to_string(),
enabled: true,
created_at: Instant::now(),
},
AlertRule {
rule_id: "thermal_throttling".to_string(),
name: "Thermal Throttling".to_string(),
condition: "thermal_state >= Hot".to_string(),
threshold_value: 0.0, severity: "Critical".to_string(),
enabled: true,
created_at: Instant::now(),
},
]
}
fn evaluate_alert_rule(&self, rule: &AlertRule, metrics: &MobileMetricsSnapshot) -> Result<bool> {
match rule.rule_id.as_str() {
"high_memory_usage" => Ok(metrics.memory_usage_percent > rule.threshold_value),
"high_cpu_usage" => Ok(metrics.cpu_usage_percent > rule.threshold_value),
"thermal_throttling" => Ok(matches!(
metrics.thermal_state,
ThermalState::Hot | ThermalState::Critical
)),
_ => Ok(false),
}
}
fn create_alert_from_rule(&self, rule: &AlertRule, metrics: &MobileMetricsSnapshot) -> Result<PerformanceAlert> {
let severity = match rule.severity.as_str() {
"Low" => AlertSeverity::Low,
"Medium" => AlertSeverity::Medium,
"High" => AlertSeverity::High,
"Critical" => AlertSeverity::Critical,
_ => AlertSeverity::Medium,
};
Ok(PerformanceAlert {
alert_type: AlertType::PerformanceIssue,
severity,
title: rule.name.clone(),
description: format!("Alert condition triggered: {}", rule.condition),
timestamp: Instant::now(),
metrics_snapshot: Some(metrics.clone()),
suggested_actions: self.get_suggested_actions_for_rule(rule),
alert_id: Some(rule.rule_id.clone()),
resolution_status: AlertResolutionStatus::Open,
})
}
fn get_suggested_actions_for_rule(&self, rule: &AlertRule) -> Vec<String> {
match rule.rule_id.as_str() {
"high_memory_usage" => vec![
"Consider reducing batch size".to_string(),
"Enable memory optimization settings".to_string(),
"Monitor for memory leaks".to_string(),
],
"high_cpu_usage" => vec![
"Enable GPU acceleration if available".to_string(),
"Reduce thread count".to_string(),
"Optimize model operations".to_string(),
],
"thermal_throttling" => vec![
"Reduce inference frequency".to_string(),
"Implement thermal management".to_string(),
"Allow device cooldown".to_string(),
],
_ => vec!["Review performance metrics".to_string()],
}
}
}
impl Default for RealTimeMonitor {
fn default() -> Self {
Self::new(RealTimeMonitoringConfig::default())
.expect("Failed to create default real-time monitor")
}
}