Skip to main content

peat_mesh/topology/
partition.rs

1//! Network partition detection and autonomous operation
2//!
3//! This module detects when a node is isolated from ALL higher hierarchy levels
4//! (network partition) as distinguished from temporary single parent failover.
5//!
6//! ## Partition vs Failover
7//!
8//! - **Parent Failover**: One parent becomes unavailable, but other parents exist
9//!   - Response: Select backup parent from remaining candidates
10//!   - Handled by TopologyBuilder's peer selection
11//!
12//! - **Network Partition**: Node is isolated from ALL higher hierarchy levels
13//!   - Response: Enter autonomous operation mode
14//!   - Handled by PartitionDetector
15//!
16//! ## Detection Strategy
17//!
18//! PartitionDetector uses exponential backoff with multiple attempts to avoid
19//! false positives from transient network issues:
20//!
21//! 1. Check beacon visibility for higher hierarchy levels
22//! 2. If no higher-level beacons visible, retry with exponential backoff
23//! 3. After N failed attempts, emit PartitionDetected event
24//! 4. Monitor for beacon recovery to emit PartitionHealed event
25//!
26//! ## Architecture
27//!
28//! ```text
29//! PartitionDetector
30//! ├── PartitionConfig (detection thresholds)
31//! ├── PartitionEvent (state change notifications)
32//! └── PartitionHandler trait (pluggable response strategies)
33//! ```
34
35use crate::beacon::{GeographicBeacon, HierarchyLevel};
36use std::collections::HashMap;
37use std::sync::Arc;
38use std::time::{Duration, Instant};
39
40/// Configuration for partition detection
41#[derive(Debug, Clone)]
42pub struct PartitionConfig {
43    /// Minimum number of detection attempts before declaring partition
44    pub min_detection_attempts: u32,
45
46    /// Initial backoff duration between detection attempts
47    pub initial_backoff: Duration,
48
49    /// Maximum backoff duration cap
50    pub max_backoff: Duration,
51
52    /// Backoff multiplier for exponential growth
53    pub backoff_multiplier: f64,
54
55    /// Minimum number of higher-level beacons required to consider connected
56    pub min_higher_level_beacons: usize,
57}
58
59impl Default for PartitionConfig {
60    fn default() -> Self {
61        Self {
62            min_detection_attempts: 3,
63            initial_backoff: Duration::from_secs(2),
64            max_backoff: Duration::from_secs(30),
65            backoff_multiplier: 2.0,
66            min_higher_level_beacons: 1,
67        }
68    }
69}
70
71impl PartitionConfig {
72    /// Calculate backoff duration for a given attempt number
73    pub fn calculate_backoff(&self, attempt: u32) -> Duration {
74        let multiplier = self.backoff_multiplier.powi(attempt as i32);
75        let backoff_secs = self.initial_backoff.as_secs_f64() * multiplier;
76        let max_secs = self.max_backoff.as_secs_f64();
77        Duration::from_secs_f64(backoff_secs.min(max_secs))
78    }
79}
80
81/// Partition detection events
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub enum PartitionEvent {
84    /// Network partition detected - node is isolated from all higher levels
85    PartitionDetected {
86        /// Number of detection attempts that confirmed partition
87        attempts: u32,
88        /// Duration of detection process
89        detection_duration: Duration,
90    },
91
92    /// Network partition healed - higher-level beacons visible again
93    PartitionHealed {
94        /// Number of higher-level beacons now visible
95        visible_beacons: usize,
96        /// Duration of partition
97        partition_duration: Duration,
98    },
99}
100
101/// Handler for partition events
102///
103/// Follows Ports & Adapters pattern like BeaconStorage and HierarchyStrategy.
104pub trait PartitionHandler: Send + Sync + std::fmt::Debug {
105    /// Handle partition detected event
106    fn on_partition_detected(&self, event: &PartitionEvent);
107
108    /// Handle partition healed event
109    fn on_partition_healed(&self, event: &PartitionEvent);
110}
111
112/// Network partition detector
113///
114/// Monitors beacon visibility to detect isolation from higher hierarchy levels.
115#[derive(Debug)]
116pub struct PartitionDetector {
117    config: PartitionConfig,
118    current_level: HierarchyLevel,
119    handler: Option<Arc<dyn PartitionHandler>>,
120
121    // Detection state
122    partitioned: bool,
123    detection_attempts: u32,
124    last_detection_attempt: Option<Instant>,
125    partition_start: Option<Instant>,
126}
127
128impl PartitionDetector {
129    /// Create a new partition detector
130    pub fn new(current_level: HierarchyLevel, config: PartitionConfig) -> Self {
131        Self {
132            config,
133            current_level,
134            handler: None,
135            partitioned: false,
136            detection_attempts: 0,
137            last_detection_attempt: None,
138            partition_start: None,
139        }
140    }
141
142    /// Set the partition event handler
143    pub fn with_handler(mut self, handler: Arc<dyn PartitionHandler>) -> Self {
144        self.handler = Some(handler);
145        self
146    }
147
148    /// Check for partition based on current beacon visibility
149    ///
150    /// # Arguments
151    /// * `beacons` - Currently visible beacons from BeaconObserver
152    ///
153    /// # Returns
154    /// Optional PartitionEvent if state changed
155    pub fn check_partition(
156        &mut self,
157        beacons: &HashMap<String, GeographicBeacon>,
158    ) -> Option<PartitionEvent> {
159        let higher_level_count = self.count_higher_level_beacons(beacons);
160        let has_connectivity = higher_level_count >= self.config.min_higher_level_beacons;
161
162        if has_connectivity {
163            // We have connectivity - check if we were previously partitioned
164            if self.partitioned {
165                return self.handle_partition_healed(higher_level_count);
166            } else {
167                // Reset detection state since we have connectivity
168                self.reset_detection_state();
169                return None;
170            }
171        }
172
173        // No connectivity - check if we should attempt detection
174        if !self.should_attempt_detection() {
175            return None;
176        }
177
178        self.detection_attempts += 1;
179        self.last_detection_attempt = Some(Instant::now());
180
181        // Have we reached minimum attempts threshold?
182        if self.detection_attempts >= self.config.min_detection_attempts {
183            return self.handle_partition_detected();
184        }
185
186        None
187    }
188
189    /// Count beacons at higher hierarchy levels than current node
190    fn count_higher_level_beacons(&self, beacons: &HashMap<String, GeographicBeacon>) -> usize {
191        beacons
192            .values()
193            .filter(|beacon| beacon.operational && beacon.hierarchy_level > self.current_level)
194            .count()
195    }
196
197    /// Check if we should attempt detection now based on backoff timing
198    fn should_attempt_detection(&self) -> bool {
199        match self.last_detection_attempt {
200            None => true, // First attempt
201            Some(last_attempt) => {
202                let backoff = self.config.calculate_backoff(self.detection_attempts);
203                last_attempt.elapsed() >= backoff
204            }
205        }
206    }
207
208    /// Handle partition detected
209    fn handle_partition_detected(&mut self) -> Option<PartitionEvent> {
210        if self.partitioned {
211            return None; // Already partitioned
212        }
213
214        let partition_start = self.last_detection_attempt.unwrap_or_else(Instant::now);
215
216        let detection_duration = partition_start.elapsed();
217
218        let event = PartitionEvent::PartitionDetected {
219            attempts: self.detection_attempts,
220            detection_duration,
221        };
222
223        self.partitioned = true;
224        self.partition_start = Some(partition_start);
225
226        if let Some(ref handler) = self.handler {
227            handler.on_partition_detected(&event);
228        }
229
230        Some(event)
231    }
232
233    /// Handle partition healed
234    fn handle_partition_healed(&mut self, visible_beacons: usize) -> Option<PartitionEvent> {
235        if !self.partitioned {
236            return None; // Wasn't partitioned
237        }
238
239        let partition_duration = self
240            .partition_start
241            .map(|start| start.elapsed())
242            .unwrap_or(Duration::ZERO);
243
244        let event = PartitionEvent::PartitionHealed {
245            visible_beacons,
246            partition_duration,
247        };
248
249        self.reset_detection_state();
250
251        if let Some(ref handler) = self.handler {
252            handler.on_partition_healed(&event);
253        }
254
255        Some(event)
256    }
257
258    /// Reset detection state
259    fn reset_detection_state(&mut self) {
260        self.partitioned = false;
261        self.detection_attempts = 0;
262        self.last_detection_attempt = None;
263        self.partition_start = None;
264    }
265
266    /// Check if currently in partitioned state
267    pub fn is_partitioned(&self) -> bool {
268        self.partitioned
269    }
270
271    /// Get current detection attempt count
272    pub fn detection_attempts(&self) -> u32 {
273        self.detection_attempts
274    }
275}
276
277#[cfg(test)]
278mod tests {
279    use super::*;
280
281    fn create_beacon(node_id: &str, level: HierarchyLevel, operational: bool) -> GeographicBeacon {
282        let position = crate::beacon::GeoPosition::new(37.7749, -122.4194);
283        let mut beacon = GeographicBeacon::new(node_id.to_string(), position, level);
284        beacon.operational = operational;
285        beacon
286    }
287
288    #[test]
289    fn test_partition_config_default() {
290        let config = PartitionConfig::default();
291        assert_eq!(config.min_detection_attempts, 3);
292        assert_eq!(config.initial_backoff, Duration::from_secs(2));
293        assert_eq!(config.max_backoff, Duration::from_secs(30));
294        assert_eq!(config.backoff_multiplier, 2.0);
295        assert_eq!(config.min_higher_level_beacons, 1);
296    }
297
298    #[test]
299    fn test_backoff_calculation() {
300        let config = PartitionConfig::default();
301
302        // Attempt 0: 2s * 2^0 = 2s
303        assert_eq!(config.calculate_backoff(0), Duration::from_secs(2));
304
305        // Attempt 1: 2s * 2^1 = 4s
306        assert_eq!(config.calculate_backoff(1), Duration::from_secs(4));
307
308        // Attempt 2: 2s * 2^2 = 8s
309        assert_eq!(config.calculate_backoff(2), Duration::from_secs(8));
310
311        // Attempt 10: Would be 2048s, but capped at max_backoff (30s)
312        assert_eq!(config.calculate_backoff(10), Duration::from_secs(30));
313    }
314
315    #[test]
316    fn test_count_higher_level_beacons() {
317        let detector = PartitionDetector::new(HierarchyLevel::Platform, PartitionConfig::default());
318
319        let mut beacons = HashMap::new();
320        beacons.insert(
321            "squad1".to_string(),
322            create_beacon("squad1", HierarchyLevel::Squad, true),
323        );
324        beacons.insert(
325            "platoon1".to_string(),
326            create_beacon("platoon1", HierarchyLevel::Platoon, true),
327        );
328        beacons.insert(
329            "platform2".to_string(),
330            create_beacon("platform2", HierarchyLevel::Platform, true),
331        );
332
333        // Platform level node should see Squad and Platoon as higher levels
334        assert_eq!(detector.count_higher_level_beacons(&beacons), 2);
335    }
336
337    #[test]
338    fn test_count_excludes_non_operational_beacons() {
339        let detector = PartitionDetector::new(HierarchyLevel::Platform, PartitionConfig::default());
340
341        let mut beacons = HashMap::new();
342        beacons.insert(
343            "squad1".to_string(),
344            create_beacon("squad1", HierarchyLevel::Squad, true),
345        );
346        beacons.insert(
347            "squad2".to_string(),
348            create_beacon("squad2", HierarchyLevel::Squad, false),
349        );
350
351        // Only operational beacons should be counted
352        assert_eq!(detector.count_higher_level_beacons(&beacons), 1);
353    }
354
355    #[test]
356    fn test_no_partition_with_connectivity() {
357        let mut detector =
358            PartitionDetector::new(HierarchyLevel::Platform, PartitionConfig::default());
359
360        let mut beacons = HashMap::new();
361        beacons.insert(
362            "squad1".to_string(),
363            create_beacon("squad1", HierarchyLevel::Squad, true),
364        );
365
366        // Should return None - no partition detected
367        let event = detector.check_partition(&beacons);
368        assert!(event.is_none());
369        assert!(!detector.is_partitioned());
370        assert_eq!(detector.detection_attempts(), 0);
371    }
372
373    #[test]
374    fn test_partition_detection_after_min_attempts() {
375        let config = PartitionConfig {
376            min_detection_attempts: 2,
377            initial_backoff: Duration::from_millis(1), // Very short for testing
378            ..Default::default()
379        };
380
381        let mut detector = PartitionDetector::new(HierarchyLevel::Platform, config);
382
383        let beacons = HashMap::new(); // No beacons visible
384
385        // First attempt - should return None
386        let event1 = detector.check_partition(&beacons);
387        assert!(event1.is_none());
388        assert_eq!(detector.detection_attempts(), 1);
389
390        // Wait for backoff
391        std::thread::sleep(Duration::from_millis(2));
392
393        // Second attempt - should detect partition
394        let event2 = detector.check_partition(&beacons);
395        assert!(event2.is_some());
396        assert!(detector.is_partitioned());
397
398        if let Some(PartitionEvent::PartitionDetected { attempts, .. }) = event2 {
399            assert_eq!(attempts, 2);
400        } else {
401            panic!("Expected PartitionDetected event");
402        }
403    }
404
405    #[test]
406    fn test_partition_healed_event() {
407        let config = PartitionConfig {
408            min_detection_attempts: 1,
409            initial_backoff: Duration::from_millis(1),
410            ..Default::default()
411        };
412
413        let mut detector = PartitionDetector::new(HierarchyLevel::Platform, config);
414
415        // Detect partition
416        let beacons_empty = HashMap::new();
417        let _ = detector.check_partition(&beacons_empty);
418        assert!(detector.is_partitioned());
419
420        // Heal partition
421        let mut beacons_with_parent = HashMap::new();
422        beacons_with_parent.insert(
423            "squad1".to_string(),
424            create_beacon("squad1", HierarchyLevel::Squad, true),
425        );
426
427        let event = detector.check_partition(&beacons_with_parent);
428        assert!(event.is_some());
429
430        if let Some(PartitionEvent::PartitionHealed {
431            visible_beacons, ..
432        }) = event
433        {
434            assert_eq!(visible_beacons, 1);
435        } else {
436            panic!("Expected PartitionHealed event");
437        }
438
439        assert!(!detector.is_partitioned());
440        assert_eq!(detector.detection_attempts(), 0);
441    }
442
443    #[test]
444    fn test_backoff_prevents_rapid_detection_attempts() {
445        let config = PartitionConfig {
446            min_detection_attempts: 3,
447            initial_backoff: Duration::from_secs(10), // Long backoff
448            ..Default::default()
449        };
450
451        let mut detector = PartitionDetector::new(HierarchyLevel::Platform, config);
452
453        let beacons = HashMap::new();
454
455        // First attempt
456        let _ = detector.check_partition(&beacons);
457        assert_eq!(detector.detection_attempts(), 1);
458
459        // Immediate second call should not increment attempts (backoff not elapsed)
460        let _ = detector.check_partition(&beacons);
461        assert_eq!(detector.detection_attempts(), 1);
462
463        // Still should be 1 attempt
464        assert_eq!(detector.detection_attempts(), 1);
465    }
466
467    #[test]
468    fn test_company_level_node_has_no_higher_levels() {
469        let detector = PartitionDetector::new(HierarchyLevel::Company, PartitionConfig::default());
470
471        let mut beacons = HashMap::new();
472        beacons.insert(
473            "platoon1".to_string(),
474            create_beacon("platoon1", HierarchyLevel::Platoon, true),
475        );
476        beacons.insert(
477            "squad1".to_string(),
478            create_beacon("squad1", HierarchyLevel::Squad, true),
479        );
480
481        // Company is top level, so no beacons are higher
482        assert_eq!(detector.count_higher_level_beacons(&beacons), 0);
483    }
484
485    #[test]
486    fn test_min_higher_level_beacons_threshold() {
487        let config = PartitionConfig {
488            min_higher_level_beacons: 2, // Require at least 2 higher-level beacons
489            min_detection_attempts: 1,
490            initial_backoff: Duration::from_millis(1),
491            ..Default::default()
492        };
493
494        let mut detector = PartitionDetector::new(HierarchyLevel::Platform, config);
495
496        // Only 1 higher-level beacon (below threshold)
497        let mut beacons = HashMap::new();
498        beacons.insert(
499            "squad1".to_string(),
500            create_beacon("squad1", HierarchyLevel::Squad, true),
501        );
502
503        let event = detector.check_partition(&beacons);
504        // Should detect partition since we have 1 beacon but need 2
505        assert!(event.is_some());
506    }
507}