use anyhow::Result;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt;
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{error, info, warn};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EnterpriseMonitoringConfig {
pub enabled: bool,
pub sla: SlaConfig,
pub alerting: AlertingConfig,
pub metrics: MetricsConfig,
pub health_checks: HealthCheckConfig,
pub profiling: ProfilingConfig,
}
impl Default for EnterpriseMonitoringConfig {
fn default() -> Self {
Self {
enabled: true,
sla: SlaConfig::default(),
alerting: AlertingConfig::default(),
metrics: MetricsConfig::default(),
health_checks: HealthCheckConfig::default(),
profiling: ProfilingConfig::default(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SlaConfig {
pub enabled: bool,
pub objectives: Vec<SlaObjective>,
pub reporting_interval_secs: u64,
pub breach_notification: BreachNotificationConfig,
}
impl Default for SlaConfig {
fn default() -> Self {
Self {
enabled: true,
objectives: vec![
SlaObjective {
name: "Availability".to_string(),
metric_type: SlaMetricType::Availability,
target_value: 99.99,
measurement_window: MeasurementWindow::Rolling30Days,
severity: SlaSeverity::Critical,
},
SlaObjective {
name: "Latency P99".to_string(),
metric_type: SlaMetricType::LatencyP99,
target_value: 10.0, measurement_window: MeasurementWindow::Rolling24Hours,
severity: SlaSeverity::High,
},
SlaObjective {
name: "Error Rate".to_string(),
metric_type: SlaMetricType::ErrorRate,
target_value: 0.01, measurement_window: MeasurementWindow::Rolling1Hour,
severity: SlaSeverity::High,
},
],
reporting_interval_secs: 300, breach_notification: BreachNotificationConfig::default(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SlaObjective {
pub name: String,
pub metric_type: SlaMetricType,
pub target_value: f64,
pub measurement_window: MeasurementWindow,
pub severity: SlaSeverity,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum SlaMetricType {
Availability,
LatencyP50,
LatencyP95,
LatencyP99,
Throughput,
ErrorRate,
ResponseTime,
}
impl fmt::Display for SlaMetricType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SlaMetricType::Availability => write!(f, "Availability"),
SlaMetricType::LatencyP50 => write!(f, "Latency P50"),
SlaMetricType::LatencyP95 => write!(f, "Latency P95"),
SlaMetricType::LatencyP99 => write!(f, "Latency P99"),
SlaMetricType::Throughput => write!(f, "Throughput"),
SlaMetricType::ErrorRate => write!(f, "Error Rate"),
SlaMetricType::ResponseTime => write!(f, "Response Time"),
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum MeasurementWindow {
RealTime,
Rolling1Hour,
Rolling24Hours,
Rolling7Days,
Rolling30Days,
Custom(u64),
}
impl fmt::Display for MeasurementWindow {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MeasurementWindow::RealTime => write!(f, "Real-time"),
MeasurementWindow::Rolling1Hour => write!(f, "1 hour"),
MeasurementWindow::Rolling24Hours => write!(f, "24 hours"),
MeasurementWindow::Rolling7Days => write!(f, "7 days"),
MeasurementWindow::Rolling30Days => write!(f, "30 days"),
MeasurementWindow::Custom(secs) => write!(f, "{} seconds", secs),
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub enum SlaSeverity {
Low,
Medium,
High,
Critical,
}
impl fmt::Display for SlaSeverity {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SlaSeverity::Low => write!(f, "LOW"),
SlaSeverity::Medium => write!(f, "MEDIUM"),
SlaSeverity::High => write!(f, "HIGH"),
SlaSeverity::Critical => write!(f, "CRITICAL"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BreachNotificationConfig {
pub enabled: bool,
pub channels: Vec<NotificationChannel>,
pub escalation: EscalationPolicy,
}
impl Default for BreachNotificationConfig {
fn default() -> Self {
Self {
enabled: true,
channels: vec![NotificationChannel::Email {
recipients: vec!["ops@example.com".to_string()],
}],
escalation: EscalationPolicy::default(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum NotificationChannel {
Email { recipients: Vec<String> },
Slack { webhook_url: String },
PagerDuty { service_key: String },
Webhook { url: String },
SMS { phone_numbers: Vec<String> },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EscalationPolicy {
pub levels: Vec<EscalationLevel>,
}
impl Default for EscalationPolicy {
fn default() -> Self {
Self {
levels: vec![
EscalationLevel {
level: 1,
wait_minutes: 5,
channels: vec![NotificationChannel::Email {
recipients: vec!["ops@example.com".to_string()],
}],
},
EscalationLevel {
level: 2,
wait_minutes: 15,
channels: vec![NotificationChannel::Email {
recipients: vec!["manager@example.com".to_string()],
}],
},
],
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EscalationLevel {
pub level: u32,
pub wait_minutes: u32,
pub channels: Vec<NotificationChannel>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertingConfig {
pub enabled: bool,
pub rules: Vec<AlertRule>,
pub aggregation_window_secs: u64,
pub deduplication_enabled: bool,
}
impl Default for AlertingConfig {
fn default() -> Self {
Self {
enabled: true,
rules: vec![],
aggregation_window_secs: 60,
deduplication_enabled: true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertRule {
pub id: String,
pub name: String,
pub condition: AlertCondition,
pub severity: AlertSeverity,
pub channels: Vec<NotificationChannel>,
pub enabled: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AlertCondition {
Threshold {
metric: String,
operator: ComparisonOperator,
value: f64,
duration_secs: u64,
},
Anomaly { metric: String, sensitivity: f64 },
RateOfChange {
metric: String,
threshold_percent: f64,
window_secs: u64,
},
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum ComparisonOperator {
GreaterThan,
LessThan,
Equals,
NotEquals,
GreaterThanOrEqual,
LessThanOrEqual,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub enum AlertSeverity {
Info,
Warning,
Error,
Critical,
}
impl fmt::Display for AlertSeverity {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AlertSeverity::Info => write!(f, "INFO"),
AlertSeverity::Warning => write!(f, "WARNING"),
AlertSeverity::Error => write!(f, "ERROR"),
AlertSeverity::Critical => write!(f, "CRITICAL"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricsConfig {
pub enabled: bool,
pub collection_interval_secs: u64,
pub metrics: Vec<MetricDefinition>,
pub export: MetricsExportConfig,
}
impl Default for MetricsConfig {
fn default() -> Self {
Self {
enabled: true,
collection_interval_secs: 10,
metrics: vec![],
export: MetricsExportConfig::default(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricDefinition {
pub name: String,
pub metric_type: MetricType,
pub description: String,
pub labels: Vec<String>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum MetricType {
Counter,
Gauge,
Histogram,
Summary,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricsExportConfig {
pub format: MetricsFormat,
pub endpoints: Vec<MetricsEndpoint>,
}
impl Default for MetricsExportConfig {
fn default() -> Self {
Self {
format: MetricsFormat::Prometheus,
endpoints: vec![],
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum MetricsFormat {
Prometheus,
OpenMetrics,
JSON,
StatsD,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricsEndpoint {
pub endpoint_type: MetricsEndpointType,
pub url: String,
pub push_interval_secs: Option<u64>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum MetricsEndpointType {
Pull,
Push,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthCheckConfig {
pub enabled: bool,
pub interval_secs: u64,
pub timeout_secs: u64,
pub endpoints: Vec<HealthCheckEndpoint>,
}
impl Default for HealthCheckConfig {
fn default() -> Self {
Self {
enabled: true,
interval_secs: 30,
timeout_secs: 5,
endpoints: vec![],
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthCheckEndpoint {
pub name: String,
pub check_type: HealthCheckType,
pub critical: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum HealthCheckType {
TcpConnect { host: String, port: u16 },
Http { url: String, expected_status: u16 },
Database { connection_string: String },
Custom { command: String },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProfilingConfig {
pub enabled: bool,
pub cpu_profiling: bool,
pub memory_profiling: bool,
pub sampling_rate: u32,
pub duration_secs: u64,
}
impl Default for ProfilingConfig {
fn default() -> Self {
Self {
enabled: false,
cpu_profiling: true,
memory_profiling: true,
sampling_rate: 100,
duration_secs: 30,
}
}
}
pub struct EnterpriseMonitoringSystem {
config: EnterpriseMonitoringConfig,
sla_tracker: Arc<RwLock<SlaTracker>>,
alert_manager: Arc<RwLock<AlertManager>>,
metrics_collector: Arc<RwLock<MetricsCollector>>,
}
pub struct SlaTracker {
objectives: Vec<SlaObjective>,
measurements: HashMap<String, Vec<SlaMeasurement>>,
breaches: Vec<SlaBreach>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SlaMeasurement {
pub timestamp: DateTime<Utc>,
pub metric_type: SlaMetricType,
pub value: f64,
pub meets_objective: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SlaBreach {
pub breach_id: String,
pub objective_name: String,
pub metric_type: SlaMetricType,
pub target_value: f64,
pub actual_value: f64,
pub timestamp: DateTime<Utc>,
pub severity: SlaSeverity,
pub resolved: bool,
pub resolved_at: Option<DateTime<Utc>>,
}
impl SlaTracker {
pub fn new(objectives: Vec<SlaObjective>) -> Self {
Self {
objectives,
measurements: HashMap::new(),
breaches: Vec::new(),
}
}
pub fn record_measurement(&mut self, measurement: SlaMeasurement) {
let key = measurement.metric_type.to_string();
self.measurements.entry(key).or_default().push(measurement);
}
pub fn check_objectives(&mut self) -> Vec<SlaBreach> {
let mut new_breaches = Vec::new();
for objective in &self.objectives {
let key = objective.metric_type.to_string();
if let Some(measurements) = self.measurements.get(&key) {
if let Some(latest) = measurements.last() {
if !latest.meets_objective {
new_breaches.push(SlaBreach {
breach_id: uuid::Uuid::new_v4().to_string(),
objective_name: objective.name.clone(),
metric_type: objective.metric_type,
target_value: objective.target_value,
actual_value: latest.value,
timestamp: latest.timestamp,
severity: objective.severity,
resolved: false,
resolved_at: None,
});
}
}
}
}
self.breaches.extend(new_breaches.clone());
new_breaches
}
}
pub struct AlertManager {
rules: Vec<AlertRule>,
active_alerts: Vec<Alert>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Alert {
pub alert_id: String,
pub rule_id: String,
pub name: String,
pub severity: AlertSeverity,
pub triggered_at: DateTime<Utc>,
pub resolved: bool,
pub resolved_at: Option<DateTime<Utc>>,
pub details: HashMap<String, String>,
}
impl AlertManager {
pub fn new(rules: Vec<AlertRule>) -> Self {
Self {
rules,
active_alerts: Vec::new(),
}
}
pub fn evaluate_rules(&mut self, metrics: &HashMap<String, f64>) -> Vec<Alert> {
let mut new_alerts = Vec::new();
for rule in &self.rules {
if !rule.enabled {
continue;
}
if self.should_trigger_alert(rule, metrics) {
let alert = Alert {
alert_id: uuid::Uuid::new_v4().to_string(),
rule_id: rule.id.clone(),
name: rule.name.clone(),
severity: rule.severity,
triggered_at: Utc::now(),
resolved: false,
resolved_at: None,
details: HashMap::new(),
};
new_alerts.push(alert.clone());
self.active_alerts.push(alert);
}
}
new_alerts
}
fn should_trigger_alert(&self, _rule: &AlertRule, _metrics: &HashMap<String, f64>) -> bool {
false
}
}
pub struct MetricsCollector {
metrics: HashMap<String, Vec<MetricValue>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricValue {
pub timestamp: DateTime<Utc>,
pub value: f64,
pub labels: HashMap<String, String>,
}
impl MetricsCollector {
pub fn new() -> Self {
Self {
metrics: HashMap::new(),
}
}
pub fn record_metric(&mut self, name: String, value: MetricValue) {
self.metrics.entry(name).or_default().push(value);
}
pub fn get_latest_values(&self) -> HashMap<String, f64> {
self.metrics
.iter()
.filter_map(|(name, values)| values.last().map(|v| (name.clone(), v.value)))
.collect()
}
}
impl Default for MetricsCollector {
fn default() -> Self {
Self::new()
}
}
impl EnterpriseMonitoringSystem {
pub fn new(config: EnterpriseMonitoringConfig) -> Self {
Self {
sla_tracker: Arc::new(RwLock::new(SlaTracker::new(config.sla.objectives.clone()))),
alert_manager: Arc::new(RwLock::new(AlertManager::new(
config.alerting.rules.clone(),
))),
metrics_collector: Arc::new(RwLock::new(MetricsCollector::new())),
config,
}
}
pub async fn initialize(&self) -> Result<()> {
if !self.config.enabled {
info!("Enterprise monitoring is disabled");
return Ok(());
}
info!("Initializing enterprise monitoring system");
Ok(())
}
pub async fn record_sla_measurement(&self, measurement: SlaMeasurement) -> Result<()> {
let mut tracker = self.sla_tracker.write().await;
tracker.record_measurement(measurement);
let breaches = tracker.check_objectives();
if !breaches.is_empty() {
warn!("SLA breaches detected: {}", breaches.len());
for breach in &breaches {
error!(
"SLA breach: {} - {} (target: {}, actual: {})",
breach.objective_name,
breach.metric_type,
breach.target_value,
breach.actual_value
);
}
}
Ok(())
}
pub async fn get_sla_status(&self) -> Result<SlaStatus> {
let tracker = self.sla_tracker.read().await;
Ok(SlaStatus {
total_objectives: tracker.objectives.len() as u64,
objectives_met: 0, objectives_breached: tracker.breaches.len() as u64,
active_breaches: tracker.breaches.iter().filter(|b| !b.resolved).count() as u64,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SlaStatus {
pub total_objectives: u64,
pub objectives_met: u64,
pub objectives_breached: u64,
pub active_breaches: u64,
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_monitoring_config_default() {
let config = EnterpriseMonitoringConfig::default();
assert!(config.enabled);
assert!(config.sla.enabled);
}
#[tokio::test]
async fn test_sla_severity_ordering() {
assert!(SlaSeverity::Critical > SlaSeverity::High);
assert!(SlaSeverity::High > SlaSeverity::Medium);
assert!(SlaSeverity::Medium > SlaSeverity::Low);
}
#[tokio::test]
async fn test_alert_severity_ordering() {
assert!(AlertSeverity::Critical > AlertSeverity::Error);
assert!(AlertSeverity::Error > AlertSeverity::Warning);
assert!(AlertSeverity::Warning > AlertSeverity::Info);
}
}