saorsa_core/placement/
errors.rs

1// Copyright (c) 2025 Saorsa Labs Limited
2//
3// This file is part of the Saorsa P2P network.
4//
5// Licensed under the AGPL-3.0 license:
6// <https://www.gnu.org/licenses/agpl-3.0.html>
7
8//! Error types for the placement system
9//!
10//! Comprehensive error handling for placement operations with detailed
11//! error categories, severity levels, and recovery guidance.
12
13use std::time::Duration;
14
15use serde::{Deserialize, Serialize};
16use thiserror::Error;
17
18use crate::adaptive::NodeId;
19
20/// Result type for placement operations
21pub type PlacementResult<T> = Result<T, PlacementError>;
22
23/// Comprehensive error types for placement operations
24#[derive(Debug, Error, Clone, PartialEq, Serialize, Deserialize)]
25pub enum PlacementError {
26    /// Insufficient nodes available for placement
27    #[error("Insufficient nodes: required {required}, available {available}")]
28    InsufficientNodes { required: usize, available: usize },
29
30    /// Invalid replication factor
31    #[error("Invalid replication factor: {0}")]
32    InvalidReplicationFactor(u8),
33
34    /// Invalid configuration parameter
35    #[error("Invalid configuration - {field}: {reason}")]
36    InvalidConfiguration { field: String, reason: String },
37
38    /// Invalid metrics value
39    #[error("Invalid metrics - {field}: {value} ({reason})")]
40    InvalidMetrics {
41        field: String,
42        value: f64,
43        reason: String,
44    },
45
46    /// Invalid weight value for node
47    #[error("Invalid weight for node {node_id:?}: {weight} ({reason})")]
48    InvalidWeight {
49        node_id: NodeId,
50        weight: f64,
51        reason: String,
52    },
53
54    /// Node metadata not found
55    #[error("Node metadata not found: {0:?}")]
56    NodeMetadataNotFound(NodeId),
57
58    /// Placement timeout exceeded
59    #[error("Placement operation timed out")]
60    PlacementTimeout,
61
62    /// Diversity constraint violation
63    #[error("Diversity violation - {constraint}: {details}")]
64    DiversityViolation {
65        constraint: String,
66        nodes: Vec<NodeId>,
67        details: String,
68    },
69
70    /// Byzantine fault tolerance violation
71    #[error("Byzantine tolerance violation: required {required} nodes, available {available}")]
72    ByzantineToleranceViolation { required: usize, available: usize },
73
74    /// Capacity constraint violation
75    #[error("Capacity constraint violated for node {node_id:?}: {details}")]
76    CapacityViolation { node_id: NodeId, details: String },
77
78    /// Performance constraint violation
79    #[error(
80        "Performance constraint violated for node {node_id:?}: {metric} = {value} (min: {minimum})"
81    )]
82    PerformanceViolation {
83        node_id: NodeId,
84        metric: String,
85        value: f64,
86        minimum: f64,
87    },
88
89    /// Geographic constraint violation
90    #[error("Geographic constraint violated: {details}")]
91    GeographicViolation { details: String },
92
93    /// Network topology error
94    #[error("Network topology error: {0}")]
95    NetworkTopology(String),
96
97    /// Trust system error
98    #[error("Trust system error: {0}")]
99    TrustSystem(String),
100
101    /// Performance monitoring error
102    #[error("Performance monitoring error: {0}")]
103    PerformanceMonitoring(String),
104
105    /// Strategy execution error
106    #[error("Strategy execution error: {0}")]
107    StrategyExecution(String),
108
109    /// Node selection algorithm error
110    #[error("Node selection failed: {0}")]
111    NodeSelection(String),
112
113    /// Sampling algorithm error
114    #[error("Sampling algorithm error: {0}")]
115    SamplingError(String),
116
117    /// Validation error
118    #[error("Validation failed: {0}")]
119    ValidationFailed(String),
120
121    /// Reliability too low
122    #[error("Estimated reliability {estimated} below minimum {minimum}")]
123    ReliabilityTooLow { estimated: f64, minimum: f64 },
124
125    /// Resource exhaustion
126    #[error("Resource exhausted: {0}")]
127    ResourceExhausted(String),
128
129    /// Concurrent modification
130    #[error("Concurrent modification detected: {0}")]
131    ConcurrentModification(String),
132
133    /// Internal consistency error
134    #[error("Internal consistency error: {0}")]
135    InternalConsistency(String),
136
137    /// Serialization error
138    #[error("Serialization error: {0}")]
139    Serialization(String),
140
141    /// DHT operation error
142    #[error("DHT operation failed: {0}")]
143    DhtOperation(String),
144
145    /// Audit system error
146    #[error("Audit system error: {0}")]
147    AuditSystem(String),
148
149    /// Repair system error
150    #[error("Repair system error: {0}")]
151    RepairSystem(String),
152
153    /// Orchestration error
154    #[error("Orchestration error: {0}")]
155    Orchestration(String),
156
157    /// External dependency error
158    #[error("External dependency error: {0}")]
159    ExternalDependency(String),
160
161    /// Unknown error
162    #[error("Unknown error: {0}")]
163    Unknown(String),
164}
165
166impl PlacementError {
167    /// Get error severity level (1-5, where 5 is most severe)
168    pub fn severity(&self) -> u8 {
169        match self {
170            PlacementError::InsufficientNodes { .. } => 5,
171            PlacementError::InvalidReplicationFactor(_) => 4,
172            PlacementError::InvalidConfiguration { .. } => 4,
173            PlacementError::PlacementTimeout => 3,
174            PlacementError::ByzantineToleranceViolation { .. } => 5,
175            PlacementError::DiversityViolation { .. } => 2,
176            PlacementError::CapacityViolation { .. } => 3,
177            PlacementError::PerformanceViolation { .. } => 2,
178            PlacementError::GeographicViolation { .. } => 2,
179            PlacementError::NetworkTopology(_) => 3,
180            PlacementError::TrustSystem(_) => 3,
181            PlacementError::PerformanceMonitoring(_) => 2,
182            PlacementError::StrategyExecution(_) => 4,
183            PlacementError::NodeSelection(_) => 4,
184            PlacementError::SamplingError(_) => 3,
185            PlacementError::ValidationFailed(_) => 3,
186            PlacementError::ReliabilityTooLow { .. } => 4,
187            PlacementError::ResourceExhausted(_) => 3,
188            PlacementError::ConcurrentModification(_) => 2,
189            PlacementError::InternalConsistency(_) => 5,
190            PlacementError::Serialization(_) => 2,
191            PlacementError::DhtOperation(_) => 3,
192            PlacementError::AuditSystem(_) => 2,
193            PlacementError::RepairSystem(_) => 2,
194            PlacementError::Orchestration(_) => 4,
195            PlacementError::ExternalDependency(_) => 3,
196            PlacementError::InvalidMetrics { .. } => 2,
197            PlacementError::InvalidWeight { .. } => 2,
198            PlacementError::NodeMetadataNotFound(_) => 3,
199            PlacementError::Unknown(_) => 1,
200        }
201    }
202
203    /// Check if error is retryable
204    pub fn is_retryable(&self) -> bool {
205        match self {
206            PlacementError::InsufficientNodes { .. } => false,
207            PlacementError::InvalidReplicationFactor(_) => false,
208            PlacementError::InvalidConfiguration { .. } => false,
209            PlacementError::PlacementTimeout => true,
210            PlacementError::ByzantineToleranceViolation { .. } => false,
211            PlacementError::DiversityViolation { .. } => true,
212            PlacementError::CapacityViolation { .. } => true,
213            PlacementError::PerformanceViolation { .. } => true,
214            PlacementError::GeographicViolation { .. } => true,
215            PlacementError::NetworkTopology(_) => true,
216            PlacementError::TrustSystem(_) => true,
217            PlacementError::PerformanceMonitoring(_) => true,
218            PlacementError::StrategyExecution(_) => true,
219            PlacementError::NodeSelection(_) => true,
220            PlacementError::SamplingError(_) => true,
221            PlacementError::ValidationFailed(_) => false,
222            PlacementError::ReliabilityTooLow { .. } => true,
223            PlacementError::ResourceExhausted(_) => true,
224            PlacementError::ConcurrentModification(_) => true,
225            PlacementError::InternalConsistency(_) => false,
226            PlacementError::Serialization(_) => false,
227            PlacementError::DhtOperation(_) => true,
228            PlacementError::AuditSystem(_) => true,
229            PlacementError::RepairSystem(_) => true,
230            PlacementError::Orchestration(_) => true,
231            PlacementError::ExternalDependency(_) => true,
232            PlacementError::InvalidMetrics { .. } => false,
233            PlacementError::InvalidWeight { .. } => false,
234            PlacementError::NodeMetadataNotFound(_) => true,
235            PlacementError::Unknown(_) => false,
236        }
237    }
238
239    /// Get suggested retry delay
240    pub fn retry_delay(&self) -> Option<Duration> {
241        if !self.is_retryable() {
242            return None;
243        }
244
245        Some(match self {
246            PlacementError::PlacementTimeout => Duration::from_secs(5),
247            PlacementError::NetworkTopology(_) => Duration::from_secs(2),
248            PlacementError::TrustSystem(_) => Duration::from_secs(1),
249            PlacementError::PerformanceMonitoring(_) => Duration::from_secs(1),
250            PlacementError::ResourceExhausted(_) => Duration::from_secs(10),
251            PlacementError::ConcurrentModification(_) => Duration::from_millis(100),
252            PlacementError::DhtOperation(_) => Duration::from_secs(3),
253            PlacementError::ExternalDependency(_) => Duration::from_secs(5),
254            _ => Duration::from_secs(1),
255        })
256    }
257
258    /// Get error category
259    pub fn category(&self) -> ErrorCategory {
260        match self {
261            PlacementError::InsufficientNodes { .. } | PlacementError::NodeMetadataNotFound(_) => {
262                ErrorCategory::NodeAvailability
263            }
264
265            PlacementError::InvalidReplicationFactor(_)
266            | PlacementError::InvalidConfiguration { .. }
267            | PlacementError::InvalidMetrics { .. }
268            | PlacementError::InvalidWeight { .. } => ErrorCategory::Configuration,
269
270            PlacementError::DiversityViolation { .. }
271            | PlacementError::GeographicViolation { .. }
272            | PlacementError::ByzantineToleranceViolation { .. } => ErrorCategory::Constraints,
273
274            PlacementError::CapacityViolation { .. }
275            | PlacementError::PerformanceViolation { .. }
276            | PlacementError::ReliabilityTooLow { .. } => ErrorCategory::Performance,
277
278            PlacementError::NetworkTopology(_) | PlacementError::DhtOperation(_) => {
279                ErrorCategory::Network
280            }
281
282            PlacementError::TrustSystem(_) => ErrorCategory::Trust,
283
284            PlacementError::StrategyExecution(_)
285            | PlacementError::NodeSelection(_)
286            | PlacementError::SamplingError(_) => ErrorCategory::Algorithm,
287
288            PlacementError::PlacementTimeout | PlacementError::ResourceExhausted(_) => {
289                ErrorCategory::Resource
290            }
291
292            PlacementError::ValidationFailed(_) | PlacementError::InternalConsistency(_) => {
293                ErrorCategory::Validation
294            }
295
296            PlacementError::Serialization(_) => ErrorCategory::Serialization,
297
298            PlacementError::AuditSystem(_)
299            | PlacementError::RepairSystem(_)
300            | PlacementError::Orchestration(_) => ErrorCategory::System,
301
302            PlacementError::ConcurrentModification(_) => ErrorCategory::Concurrency,
303
304            PlacementError::PerformanceMonitoring(_) | PlacementError::ExternalDependency(_) => {
305                ErrorCategory::External
306            }
307
308            PlacementError::Unknown(_) => ErrorCategory::Unknown,
309        }
310    }
311
312    /// Get recovery suggestion
313    pub fn recovery_suggestion(&self) -> &'static str {
314        match self {
315            PlacementError::InsufficientNodes { .. } => {
316                "Add more nodes to the network or reduce replication factor"
317            }
318            PlacementError::InvalidReplicationFactor(_) => {
319                "Use a valid replication factor within configured bounds"
320            }
321            PlacementError::InvalidConfiguration { .. } => {
322                "Fix configuration parameters and restart"
323            }
324            PlacementError::PlacementTimeout => {
325                "Increase placement timeout or optimize network performance"
326            }
327            PlacementError::ByzantineToleranceViolation { .. } => {
328                "Add more nodes or reduce Byzantine fault tolerance requirements"
329            }
330            PlacementError::DiversityViolation { .. } => {
331                "Relax diversity constraints or add nodes in different regions"
332            }
333            PlacementError::CapacityViolation { .. } => {
334                "Add nodes with more capacity or reduce storage requirements"
335            }
336            PlacementError::PerformanceViolation { .. } => {
337                "Improve node performance or relax performance constraints"
338            }
339            PlacementError::ReliabilityTooLow { .. } => {
340                "Improve node reliability or increase replication factor"
341            }
342            PlacementError::ResourceExhausted(_) => {
343                "Wait for resources to become available or add more capacity"
344            }
345            PlacementError::ConcurrentModification(_) => "Retry the operation with updated state",
346            _ => "Check logs for details and consider retrying",
347        }
348    }
349
350    /// Convert to user-friendly message
351    pub fn user_message(&self) -> String {
352        match self {
353            PlacementError::InsufficientNodes {
354                required,
355                available,
356            } => {
357                format!(
358                    "Not enough nodes available for placement. Need {} but only {} available.",
359                    required, available
360                )
361            }
362            PlacementError::PlacementTimeout => {
363                "Placement took too long to complete. The network may be busy.".to_string()
364            }
365            PlacementError::DiversityViolation { constraint, .. } => {
366                format!("Placement violates {} diversity requirement.", constraint)
367            }
368            PlacementError::ReliabilityTooLow { estimated, minimum } => {
369                format!(
370                    "Estimated reliability {:.1}% is below minimum {:.1}%.",
371                    estimated * 100.0,
372                    minimum * 100.0
373                )
374            }
375            _ => "Placement operation failed. Please try again.".to_string(),
376        }
377    }
378}
379
380/// Error categories for grouping and handling
381#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
382pub enum ErrorCategory {
383    /// Node availability issues
384    NodeAvailability,
385    /// Configuration problems
386    Configuration,
387    /// Constraint violations
388    Constraints,
389    /// Performance issues
390    Performance,
391    /// Network problems
392    Network,
393    /// Trust system issues
394    Trust,
395    /// Algorithm failures
396    Algorithm,
397    /// Resource problems
398    Resource,
399    /// Validation failures
400    Validation,
401    /// Serialization issues
402    Serialization,
403    /// System component failures
404    System,
405    /// Concurrency issues
406    Concurrency,
407    /// External dependency failures
408    External,
409    /// Unknown/unclassified errors
410    Unknown,
411}
412
413impl ErrorCategory {
414    /// Get category priority (higher = more important)
415    pub fn priority(&self) -> u8 {
416        match self {
417            ErrorCategory::Configuration => 10,
418            ErrorCategory::NodeAvailability => 9,
419            ErrorCategory::Constraints => 8,
420            ErrorCategory::Performance => 7,
421            ErrorCategory::Algorithm => 6,
422            ErrorCategory::Network => 5,
423            ErrorCategory::Resource => 4,
424            ErrorCategory::Trust => 3,
425            ErrorCategory::Validation => 2,
426            ErrorCategory::System => 2,
427            ErrorCategory::Serialization => 1,
428            ErrorCategory::Concurrency => 1,
429            ErrorCategory::External => 1,
430            ErrorCategory::Unknown => 0,
431        }
432    }
433
434    /// Get category name
435    pub fn name(&self) -> &'static str {
436        match self {
437            ErrorCategory::NodeAvailability => "Node Availability",
438            ErrorCategory::Configuration => "Configuration",
439            ErrorCategory::Constraints => "Constraints",
440            ErrorCategory::Performance => "Performance",
441            ErrorCategory::Network => "Network",
442            ErrorCategory::Trust => "Trust",
443            ErrorCategory::Algorithm => "Algorithm",
444            ErrorCategory::Resource => "Resource",
445            ErrorCategory::Validation => "Validation",
446            ErrorCategory::Serialization => "Serialization",
447            ErrorCategory::System => "System",
448            ErrorCategory::Concurrency => "Concurrency",
449            ErrorCategory::External => "External",
450            ErrorCategory::Unknown => "Unknown",
451        }
452    }
453}
454
455/// Error context for debugging and monitoring
456#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
457pub struct ErrorContext {
458    /// Operation that failed
459    pub operation: String,
460    /// Component that generated the error
461    pub component: String,
462    /// Additional context data
463    pub context: std::collections::HashMap<String, String>,
464    /// Timestamp when error occurred
465    pub timestamp: u64,
466}
467
468impl ErrorContext {
469    /// Create new error context
470    pub fn new(operation: impl Into<String>, component: impl Into<String>) -> Self {
471        Self {
472            operation: operation.into(),
473            component: component.into(),
474            context: std::collections::HashMap::new(),
475            timestamp: std::time::SystemTime::now()
476                .duration_since(std::time::UNIX_EPOCH)
477                .unwrap_or_default()
478                .as_secs(),
479        }
480    }
481
482    /// Add context data
483    pub fn with_context(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
484        self.context.insert(key.into(), value.into());
485        self
486    }
487}
488
489/// Extension trait for adding context to placement results
490pub trait PlacementResultExt<T> {
491    /// Add error context
492    fn with_context(self, context: ErrorContext) -> PlacementResult<T>;
493
494    /// Add simple context
495    fn context(self, operation: &str, component: &str) -> PlacementResult<T>;
496}
497
498impl<T> PlacementResultExt<T> for PlacementResult<T> {
499    fn with_context(self, _context: ErrorContext) -> PlacementResult<T> {
500        // For now, just pass through the result
501        // In the future, we could wrap errors with context
502        self
503    }
504
505    fn context(self, _operation: &str, _component: &str) -> PlacementResult<T> {
506        // For now, just pass through the result
507        // In the future, we could wrap errors with context
508        self
509    }
510}
511
512#[cfg(test)]
513mod tests {
514    use super::*;
515
516    #[test]
517    fn test_error_severity() {
518        let error = PlacementError::InsufficientNodes {
519            required: 5,
520            available: 3,
521        };
522        assert_eq!(error.severity(), 5);
523
524        let error = PlacementError::DiversityViolation {
525            constraint: "geographic".to_string(),
526            nodes: vec![],
527            details: "too close".to_string(),
528        };
529        assert_eq!(error.severity(), 2);
530    }
531
532    #[test]
533    fn test_error_retryability() {
534        let error = PlacementError::PlacementTimeout;
535        assert!(error.is_retryable());
536        assert_eq!(error.retry_delay(), Some(Duration::from_secs(5)));
537
538        let error = PlacementError::InvalidReplicationFactor(0);
539        assert!(!error.is_retryable());
540        assert_eq!(error.retry_delay(), None);
541    }
542
543    #[test]
544    fn test_error_categories() {
545        let error = PlacementError::InsufficientNodes {
546            required: 5,
547            available: 3,
548        };
549        assert_eq!(error.category(), ErrorCategory::NodeAvailability);
550
551        let error = PlacementError::DiversityViolation {
552            constraint: "geographic".to_string(),
553            nodes: vec![],
554            details: "too close".to_string(),
555        };
556        assert_eq!(error.category(), ErrorCategory::Constraints);
557    }
558
559    #[test]
560    fn test_error_messages() {
561        let error = PlacementError::InsufficientNodes {
562            required: 5,
563            available: 3,
564        };
565        let message = error.user_message();
566        assert!(message.contains("Not enough nodes"));
567        assert!(message.contains("5"));
568        assert!(message.contains("3"));
569    }
570
571    #[test]
572    fn test_category_priority() {
573        assert!(ErrorCategory::Configuration.priority() > ErrorCategory::Performance.priority());
574        assert!(ErrorCategory::NodeAvailability.priority() > ErrorCategory::Network.priority());
575    }
576
577    #[test]
578    fn test_error_context() {
579        let context = ErrorContext::new("select_nodes", "placement_engine")
580            .with_context("replication_factor", "8")
581            .with_context("available_nodes", "5");
582
583        assert_eq!(context.operation, "select_nodes");
584        assert_eq!(context.component, "placement_engine");
585        assert_eq!(
586            context.context.get("replication_factor"),
587            Some(&"8".to_string())
588        );
589        assert_eq!(
590            context.context.get("available_nodes"),
591            Some(&"5".to_string())
592        );
593    }
594}