Skip to main content

cluster_monitoring/
cluster_monitoring.rs

1use conservation_checker::{ConservationChecker, Phase};
2use std::collections::HashMap;
3
4/// Simulated cluster node state
5struct Node {
6    name: String,
7    cpu_percent: f64,
8    memory_gb: f64,
9    network_mbps: f64,
10}
11
12/// Simulate one monitoring tick — each node's resources change slightly
13fn tick(nodes: &mut [Node], tick_id: u32) {
14    // Use deterministic-ish changes so results are reproducible
15    for (i, node) in nodes.iter_mut().enumerate() {
16        let seed = (tick_id as usize) * 37 + i * 13;
17
18        // CPU: varies ±5% per tick, with occasional heavy loads
19        if seed % 7 == 0 && tick_id > 0 {
20            node.cpu_percent = (node.cpu_percent - 15.0).max(0.0); // heavy load event
21        } else {
22            let delta = ((seed % 11) as f64) - 5.0; // -5 to +5
23            node.cpu_percent = (node.cpu_percent + delta).clamp(0.0, 100.0);
24        }
25
26        // Memory: creeps up slowly (memory leak detection)
27        node.memory_gb = (node.memory_gb + ((seed % 3) as f64) * 0.1).min(64.0);
28
29        // Network: bursty
30        if seed % 13 == 0 {
31            node.network_mbps = 500.0 + (seed % 200) as f64; // spike
32        } else {
33            node.network_mbps = (node.network_mbps + ((seed % 7) as f64) - 3.0).max(0.0);
34        }
35    }
36}
37
38fn main() {
39    // ── Setup: 10-node cluster ──────────────────────────────────────
40    let node_names = [
41        "web-01", "web-02", "web-03",
42        "db-01", "db-02",
43        "worker-01", "worker-02", "worker-03",
44        "cache-01", "monitor-01",
45    ];
46
47    let mut nodes: Vec<Node> = node_names
48        .iter()
49        .enumerate()
50        .map(|(i, name)| {
51            let seed = i * 7;
52            Node {
53                name: name.to_string(),
54                cpu_percent: (60.0 + (seed % 20) as f64).min(100.0),
55                memory_gb: 16.0 + (seed % 8) as f64,
56                network_mbps: 100.0 + (seed % 50) as f64,
57            }
58        })
59        .collect();
60
61    // ── Conservation laws ────────────────────────────────────────────
62    // CPU must not drop below 30% on any node
63    // Memory must not exceed 48 GB (reverse conservation — this crate
64    // only checks "must not decrease", so we invert: track "memory_remaining")
65    // Network throughput must not drop below 10 Mbps
66    let mut cluster = ConservationChecker::new();
67
68    for node in &nodes {
69        cluster.register(format!("{}.cpu", node.name), node.cpu_percent, 0.0);
70        // Invert memory: track remaining = 64 - used, so low remaining = violation
71        let mem_remaining = 64.0 - node.memory_gb;
72        cluster.register(format!("{}.mem_remaining", node.name), mem_remaining, 0.0);
73        cluster.register(format!("{}.net", node.name), node.network_mbps, 10.0);
74    }
75
76    println!("╔══════════════════════════════════════════════════════════╗");
77    println!("║  SRE Cluster Monitoring Simulation (10 nodes, 20 ticks) ║");
78    println!("╚══════════════════════════════════════════════════════════╝");
79    println!();
80
81    // ── Static CPU minimum threshold ────────────────────────────────
82    const CPU_MIN: f64 = 30.0;
83
84    // ── Run simulation ──────────────────────────────────────────────
85    for tick_id in 0..20 {
86        tick(&mut nodes, tick_id);
87
88        // Update all conservation values
89        for node in &nodes {
90            cluster.update(&format!("{}.cpu", node.name), node.cpu_percent);
91            let mem_remaining = 64.0 - node.memory_gb;
92            cluster.update(&format!("{}.mem_remaining", node.name), mem_remaining);
93            cluster.update(&format!("{}.net", node.name), node.network_mbps);
94        }
95
96        cluster.snapshot(); // Take a snapshot every tick
97
98        // Collect violations for this tick
99        let violations = cluster.violations();
100
101        // Collect additional SRE-relevant stats
102        let mut cpu_alerts = Vec::new();
103        let mut mem_alerts = Vec::new();
104        let mut net_alerts = Vec::new();
105        let mut phases_report = Vec::new();
106
107        for node in &nodes {
108            let cpu_ok = cluster.is_conserved(&format!("{}.cpu", node.name));
109            let cpu_pct = cluster.current_value(&format!("{}.cpu", node.name));
110            if cpu_pct < CPU_MIN {
111                cpu_alerts.push(format!("{}@{}%", node.name, cpu_pct));
112            }
113
114            let mem_ok = cluster.is_conserved(&format!("{}.mem_remaining", node.name));
115            if !mem_ok {
116                let used = 64.0 - cluster.current_value(&format!("{}.mem_remaining", node.name));
117                mem_alerts.push(format!("{}@{:.1}GB", node.name, used));
118            }
119
120            let net_ok = cluster.is_conserved(&format!("{}.net", node.name));
121            if !net_ok {
122                net_alerts.push(format!(
123                    "{}@{}Mbps",
124                    node.name,
125                    cluster.current_value(&format!("{}.net", node.name))
126                ));
127            }
128
129            // Track phases for nodes in transition
130            let cpu_phase = cluster.phase(&format!("{}.cpu", node.name));
131            let mem_phase = cluster.phase(&format!("{}.mem_remaining", node.name));
132            let net_phase = cluster.phase(&format!("{}.net", node.name));
133
134            if cpu_phase != Phase::Stable || mem_phase != Phase::Stable || net_phase != Phase::Stable {
135                phases_report.push(format!(
136                    "{}: cpu={}, mem={}, net={}",
137                    node.name, cpu_phase, mem_phase, net_phase
138                ));
139            }
140        }
141
142        // Print tick summary
143        if tick_id % 5 == 0 || !violations.is_empty() || !cpu_alerts.is_empty() || !mem_alerts.is_empty() {
144            println!("── Tick {:>2} ─────────────────────", tick_id);
145            println!("  Registered quantities: {}", cluster.registered().len());
146            println!("  Total violations: {}", violations.len());
147
148            if !cpu_alerts.is_empty() {
149                println!("  🔴 CPU low: {}", cpu_alerts.join(", "));
150            }
151            if !mem_alerts.is_empty() {
152                println!("  🟠 Memory high: {}", mem_alerts.join(", "));
153            }
154            if !net_alerts.is_empty() {
155                println!("  🟡 Network low: {}", net_alerts.join(", "));
156            }
157            if !phases_report.is_empty() {
158                for line in &phases_report {
159                    println!("  📊 {}", line);
160                }
161            }
162            println!();
163        }
164    }
165
166    // ── Final summary ────────────────────────────────────────────────
167    println!("═══════════════════════════════════════════════════════════════");
168    println!("  🏁 FINAL STATE");
169    println!("═══════════════════════════════════════════════════════════════");
170
171    let final_violations = cluster.violations();
172    println!("  Total violations at end: {}", final_violations.len());
173    for v in &final_violations {
174        println!("    ❌ {}", v);
175    }
176
177    // Phase distribution at end
178    println!();
179    println!("  Quantity phases at tick 20:");
180    for name in cluster.registered() {
181        let phase = cluster.phase(&name);
182        let current = cluster.current_value(&name);
183        let conserved = cluster.is_conserved(&name);
184        let drift = cluster.drift_rate(&name);
185        let snaps = cluster.snapshot_count(&name);
186        println!(
187            "    {} | value={:.1}, conserved={}, phase={}, drift={:+.2}/tick, {} snapshots",
188            name, current, conserved, phase, drift, snaps
189        );
190    }
191
192    // ── Serde snapshot ───────────────────────────────────────────────
193    #[cfg(feature = "serde")]
194    {
195        println!();
196        println!("  ┌─ Serde JSON snapshot ──────────────────────────┐");
197        let json = cluster.snapshot_json();
198        println!("  {}", json.replace('\n', "\n  "));
199        println!("  └────────────────────────────────────────────────┘");
200
201        // Demonstrate deserializing the whole ConservationChecker
202        let json_full = serde_json::to_string_pretty(&cluster).unwrap();
203        println!();
204        println!("  Full state (all history) available for archival:");
205        println!("  {} bytes", json_full.len());
206    }
207}