1use anyhow::Result;
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::path::PathBuf;
11use std::sync::Arc;
12use tokio::sync::RwLock;
13use tracing::{debug, info};
14use uuid::Uuid;
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct DisasterRecoveryConfig {
19 pub enabled: bool,
21 pub backup: BackupConfig,
23 pub recovery: RecoveryConfig,
25 pub replication: ReplicationConfig,
27 pub business_continuity: BusinessContinuityConfig,
29}
30
31impl Default for DisasterRecoveryConfig {
32 fn default() -> Self {
33 Self {
34 enabled: true,
35 backup: BackupConfig::default(),
36 recovery: RecoveryConfig::default(),
37 replication: ReplicationConfig::default(),
38 business_continuity: BusinessContinuityConfig::default(),
39 }
40 }
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct BackupConfig {
46 pub enabled: bool,
48 pub schedule: BackupSchedule,
50 pub storage: BackupStorage,
52 pub retention: BackupRetentionPolicy,
54 pub encryption: BackupEncryption,
56 pub compression: BackupCompression,
58 pub verification: BackupVerification,
60}
61
62impl Default for BackupConfig {
63 fn default() -> Self {
64 Self {
65 enabled: true,
66 schedule: BackupSchedule::default(),
67 storage: BackupStorage::default(),
68 retention: BackupRetentionPolicy::default(),
69 encryption: BackupEncryption::default(),
70 compression: BackupCompression::default(),
71 verification: BackupVerification::default(),
72 }
73 }
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct BackupSchedule {
79 pub full_backup: BackupFrequency,
81 pub incremental_backup: BackupFrequency,
83 pub differential_backup: Option<BackupFrequency>,
85 pub backup_window: Option<BackupWindow>,
87}
88
89impl Default for BackupSchedule {
90 fn default() -> Self {
91 Self {
92 full_backup: BackupFrequency::Weekly,
93 incremental_backup: BackupFrequency::Hourly,
94 differential_backup: Some(BackupFrequency::Daily),
95 backup_window: None,
96 }
97 }
98}
99
100#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
102pub enum BackupFrequency {
103 RealTime, EveryMinute,
105 Every5Minutes,
106 Every15Minutes,
107 Every30Minutes,
108 Hourly,
109 Every4Hours,
110 Every8Hours,
111 Daily,
112 Weekly,
113 Monthly,
114 Custom(u64), }
116
117impl std::fmt::Display for BackupFrequency {
118 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119 match self {
120 BackupFrequency::RealTime => write!(f, "Real-time"),
121 BackupFrequency::EveryMinute => write!(f, "Every minute"),
122 BackupFrequency::Every5Minutes => write!(f, "Every 5 minutes"),
123 BackupFrequency::Every15Minutes => write!(f, "Every 15 minutes"),
124 BackupFrequency::Every30Minutes => write!(f, "Every 30 minutes"),
125 BackupFrequency::Hourly => write!(f, "Hourly"),
126 BackupFrequency::Every4Hours => write!(f, "Every 4 hours"),
127 BackupFrequency::Every8Hours => write!(f, "Every 8 hours"),
128 BackupFrequency::Daily => write!(f, "Daily"),
129 BackupFrequency::Weekly => write!(f, "Weekly"),
130 BackupFrequency::Monthly => write!(f, "Monthly"),
131 BackupFrequency::Custom(secs) => write!(f, "Every {} seconds", secs),
132 }
133 }
134}
135
136#[derive(Debug, Clone, Serialize, Deserialize)]
138pub struct BackupWindow {
139 pub start_hour: u8,
141 pub end_hour: u8,
143 pub days_of_week: Vec<u8>,
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct BackupStorage {
150 pub primary: StorageLocation,
152 pub secondary: Option<StorageLocation>,
154 pub offsite: Option<StorageLocation>,
156}
157
158impl Default for BackupStorage {
159 fn default() -> Self {
160 Self {
161 primary: StorageLocation::Local {
162 path: PathBuf::from("/var/backups/oxirs"),
163 },
164 secondary: None,
165 offsite: None,
166 }
167 }
168}
169
170#[derive(Debug, Clone, Serialize, Deserialize)]
172pub enum StorageLocation {
173 Local { path: PathBuf },
175 S3 {
177 bucket: String,
178 region: String,
179 prefix: String,
180 access_key_id: Option<String>,
181 secret_access_key: Option<String>,
182 },
183 Azure {
185 account_name: String,
186 container: String,
187 prefix: String,
188 access_key: Option<String>,
189 },
190 GCS {
192 bucket: String,
193 prefix: String,
194 credentials_path: Option<PathBuf>,
195 },
196 Network { url: String, mount_point: PathBuf },
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize)]
202pub struct BackupRetentionPolicy {
203 pub keep_all_within_days: u32,
205 pub keep_daily_for_days: u32,
207 pub keep_weekly_for_weeks: u32,
209 pub keep_monthly_for_months: u32,
211 pub keep_yearly_forever: bool,
213}
214
215impl Default for BackupRetentionPolicy {
216 fn default() -> Self {
217 Self {
218 keep_all_within_days: 7, keep_daily_for_days: 30, keep_weekly_for_weeks: 12, keep_monthly_for_months: 12, keep_yearly_forever: true,
223 }
224 }
225}
226
227#[derive(Debug, Clone, Serialize, Deserialize)]
229pub struct BackupEncryption {
230 pub enabled: bool,
232 pub algorithm: EncryptionAlgorithm,
234 pub kdf: KeyDerivationFunction,
236}
237
238impl Default for BackupEncryption {
239 fn default() -> Self {
240 Self {
241 enabled: true,
242 algorithm: EncryptionAlgorithm::AES256GCM,
243 kdf: KeyDerivationFunction::Argon2,
244 }
245 }
246}
247
248#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
250pub enum EncryptionAlgorithm {
251 AES256GCM,
252 AES256CBC,
253 ChaCha20Poly1305,
254}
255
256#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
258pub enum KeyDerivationFunction {
259 PBKDF2,
260 Argon2,
261 Scrypt,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize)]
266pub struct BackupCompression {
267 pub enabled: bool,
269 pub algorithm: CompressionAlgorithm,
271 pub level: u8,
273}
274
275impl Default for BackupCompression {
276 fn default() -> Self {
277 Self {
278 enabled: true,
279 algorithm: CompressionAlgorithm::Zstd,
280 level: 6,
281 }
282 }
283}
284
285#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
287pub enum CompressionAlgorithm {
288 Gzip,
289 Bzip2,
290 Zstd,
291 Lz4,
292 Xz,
293}
294
295#[derive(Debug, Clone, Serialize, Deserialize)]
297pub struct BackupVerification {
298 pub enabled: bool,
300 pub frequency: BackupFrequency,
302 pub checksum_algorithm: ChecksumAlgorithm,
304 pub test_restore: bool,
306}
307
308impl Default for BackupVerification {
309 fn default() -> Self {
310 Self {
311 enabled: true,
312 frequency: BackupFrequency::Daily,
313 checksum_algorithm: ChecksumAlgorithm::SHA256,
314 test_restore: false,
315 }
316 }
317}
318
319#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
321pub enum ChecksumAlgorithm {
322 MD5,
323 SHA256,
324 SHA512,
325 BLAKE3,
326}
327
328#[derive(Debug, Clone, Serialize, Deserialize)]
330pub struct RecoveryConfig {
331 pub rto_minutes: u32,
333 pub rpo_minutes: u32,
335 pub automated_recovery: bool,
337 pub priorities: HashMap<String, RecoveryPriority>,
339}
340
341impl Default for RecoveryConfig {
342 fn default() -> Self {
343 let mut priorities = HashMap::new();
344 priorities.insert("critical".to_string(), RecoveryPriority::P1);
345 priorities.insert("high".to_string(), RecoveryPriority::P2);
346 priorities.insert("normal".to_string(), RecoveryPriority::P3);
347
348 Self {
349 rto_minutes: 60, rpo_minutes: 15, automated_recovery: true,
352 priorities,
353 }
354 }
355}
356
357#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
359pub enum RecoveryPriority {
360 P1, P2, P3, P4, }
365
366#[derive(Debug, Clone, Serialize, Deserialize)]
368pub struct ReplicationConfig {
369 pub enabled: bool,
371 pub mode: ReplicationMode,
373 pub targets: Vec<ReplicationTarget>,
375 pub failover: FailoverConfig,
377}
378
379impl Default for ReplicationConfig {
380 fn default() -> Self {
381 Self {
382 enabled: true,
383 mode: ReplicationMode::Asynchronous,
384 targets: vec![],
385 failover: FailoverConfig::default(),
386 }
387 }
388}
389
390#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
392pub enum ReplicationMode {
393 Synchronous,
395 Asynchronous,
397 SemiSynchronous,
399}
400
401#[derive(Debug, Clone, Serialize, Deserialize)]
403pub struct ReplicationTarget {
404 pub id: String,
406 pub endpoint: String,
408 pub region: String,
410 pub priority: u32,
412}
413
414#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct FailoverConfig {
417 pub enabled: bool,
419 pub timeout_secs: u64,
421 pub health_check_interval_secs: u64,
423 pub min_replicas: u32,
425}
426
427impl Default for FailoverConfig {
428 fn default() -> Self {
429 Self {
430 enabled: true,
431 timeout_secs: 30,
432 health_check_interval_secs: 10,
433 min_replicas: 1,
434 }
435 }
436}
437
438#[derive(Debug, Clone, Serialize, Deserialize)]
440pub struct BusinessContinuityConfig {
441 pub enabled: bool,
443 pub scenarios: Vec<DisasterScenario>,
445 pub runbooks: Vec<RecoveryRunbook>,
447}
448
449impl Default for BusinessContinuityConfig {
450 fn default() -> Self {
451 Self {
452 enabled: true,
453 scenarios: vec![],
454 runbooks: vec![],
455 }
456 }
457}
458
459#[derive(Debug, Clone, Serialize, Deserialize)]
461pub struct DisasterScenario {
462 pub id: String,
464 pub name: String,
466 pub description: String,
468 pub impact: ImpactLevel,
470 pub procedures: Vec<String>,
472}
473
474#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
476pub enum ImpactLevel {
477 Low,
478 Medium,
479 High,
480 Critical,
481}
482
483#[derive(Debug, Clone, Serialize, Deserialize)]
485pub struct RecoveryRunbook {
486 pub id: String,
488 pub name: String,
490 pub steps: Vec<RunbookStep>,
492 pub automated: bool,
494}
495
496#[derive(Debug, Clone, Serialize, Deserialize)]
498pub struct RunbookStep {
499 pub step_number: u32,
501 pub description: String,
503 pub command: Option<String>,
505 pub expected_duration_secs: u64,
507 pub requires_approval: bool,
509}
510
511pub struct DisasterRecoveryManager {
513 config: DisasterRecoveryConfig,
514 backup_jobs: Arc<RwLock<Vec<BackupJob>>>,
515 recovery_operations: Arc<RwLock<Vec<RecoveryOperation>>>,
516 metrics: Arc<RwLock<DRMetrics>>,
517}
518
519#[derive(Debug, Clone, Serialize, Deserialize)]
521pub struct BackupJob {
522 pub job_id: String,
524 pub job_type: BackupType,
526 pub status: BackupStatus,
528 pub started_at: DateTime<Utc>,
530 pub completed_at: Option<DateTime<Utc>>,
532 pub size_bytes: u64,
534 pub checksum: Option<String>,
536 pub location: String,
538}
539
540#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
542pub enum BackupType {
543 Full,
544 Incremental,
545 Differential,
546}
547
548#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
550pub enum BackupStatus {
551 Pending,
552 Running,
553 Completed,
554 Failed,
555 Verifying,
556 Verified,
557}
558
559#[derive(Debug, Clone, Serialize, Deserialize)]
561pub struct RecoveryOperation {
562 pub operation_id: String,
564 pub recovery_type: RecoveryType,
566 pub status: RecoveryStatus,
568 pub started_at: DateTime<Utc>,
570 pub completed_at: Option<DateTime<Utc>>,
572 pub backup_job_id: String,
574 pub recovery_point: DateTime<Utc>,
576}
577
578#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
580pub enum RecoveryType {
581 FullRestore,
582 PartialRestore,
583 PointInTimeRestore,
584 TestRestore,
585}
586
587#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
589pub enum RecoveryStatus {
590 Pending,
591 Running,
592 Completed,
593 Failed,
594 Cancelled,
595}
596
597#[derive(Debug, Clone, Default, Serialize, Deserialize)]
599pub struct DRMetrics {
600 pub backups_completed: u64,
602 pub backups_failed: u64,
604 pub recoveries_completed: u64,
606 pub recoveries_failed: u64,
608 pub last_successful_backup: Option<DateTime<Utc>>,
610 pub last_verified_backup: Option<DateTime<Utc>>,
612 pub current_rto_minutes: f64,
614 pub current_rpo_minutes: f64,
616 pub total_backup_size_bytes: u64,
618}
619
620impl DisasterRecoveryManager {
621 pub fn new(config: DisasterRecoveryConfig) -> Self {
623 Self {
624 config,
625 backup_jobs: Arc::new(RwLock::new(Vec::new())),
626 recovery_operations: Arc::new(RwLock::new(Vec::new())),
627 metrics: Arc::new(RwLock::new(DRMetrics::default())),
628 }
629 }
630
631 pub async fn initialize(&self) -> Result<()> {
633 if !self.config.enabled {
634 info!("Disaster recovery is disabled");
635 return Ok(());
636 }
637
638 info!("Initializing disaster recovery system");
639
640 self.initialize_backup_storage().await?;
642
643 if self.config.backup.enabled {
645 self.start_backup_scheduler().await?;
646 }
647
648 if self.config.replication.enabled {
650 self.start_replication().await?;
651 }
652
653 info!("Disaster recovery system initialized successfully");
654 Ok(())
655 }
656
657 async fn initialize_backup_storage(&self) -> Result<()> {
659 debug!("Initializing backup storage");
660
661 match &self.config.backup.storage.primary {
662 StorageLocation::Local { path } => {
663 tokio::fs::create_dir_all(path).await?;
664 info!("Local backup storage initialized: {:?}", path);
665 }
666 StorageLocation::S3 { bucket, .. } => {
667 debug!("S3 backup storage: {}", bucket);
668 }
669 StorageLocation::Azure { container, .. } => {
670 debug!("Azure backup storage: {}", container);
671 }
672 StorageLocation::GCS { bucket, .. } => {
673 debug!("GCS backup storage: {}", bucket);
674 }
675 StorageLocation::Network { url, .. } => {
676 debug!("Network backup storage: {}", url);
677 }
678 }
679
680 Ok(())
681 }
682
683 async fn start_backup_scheduler(&self) -> Result<()> {
685 debug!("Starting backup scheduler");
686 Ok(())
689 }
690
691 async fn start_replication(&self) -> Result<()> {
693 debug!(
694 "Starting replication to {} targets",
695 self.config.replication.targets.len()
696 );
697 Ok(())
699 }
700
701 pub async fn create_backup(&self, backup_type: BackupType) -> Result<BackupJob> {
703 info!("Creating {:?} backup", backup_type);
704
705 let job = BackupJob {
706 job_id: Uuid::new_v4().to_string(),
707 job_type: backup_type,
708 status: BackupStatus::Running,
709 started_at: Utc::now(),
710 completed_at: None,
711 size_bytes: 0,
712 checksum: None,
713 location: String::new(),
714 };
715
716 {
717 let mut jobs = self.backup_jobs.write().await;
718 jobs.push(job.clone());
719 }
720
721 debug!("Backup job {} started", job.job_id);
724
725 Ok(job)
726 }
727
728 pub async fn restore_from_backup(
730 &self,
731 backup_job_id: &str,
732 recovery_type: RecoveryType,
733 ) -> Result<RecoveryOperation> {
734 info!("Starting {:?} from backup {}", recovery_type, backup_job_id);
735
736 let operation = RecoveryOperation {
737 operation_id: Uuid::new_v4().to_string(),
738 recovery_type,
739 status: RecoveryStatus::Running,
740 started_at: Utc::now(),
741 completed_at: None,
742 backup_job_id: backup_job_id.to_string(),
743 recovery_point: Utc::now(),
744 };
745
746 {
747 let mut operations = self.recovery_operations.write().await;
748 operations.push(operation.clone());
749 }
750
751 debug!("Recovery operation {} started", operation.operation_id);
752
753 Ok(operation)
754 }
755
756 pub async fn get_metrics(&self) -> DRMetrics {
758 self.metrics.read().await.clone()
759 }
760
761 pub async fn verify_backups(&self) -> Result<Vec<BackupVerificationResult>> {
763 info!("Starting backup verification");
764
765 let jobs = self.backup_jobs.read().await;
766 let mut results = Vec::new();
767
768 for job in jobs.iter() {
769 results.push(BackupVerificationResult {
770 backup_job_id: job.job_id.clone(),
771 verified: true,
772 checksum_match: true,
773 errors: vec![],
774 });
775 }
776
777 info!("Verified {} backups", results.len());
778 Ok(results)
779 }
780
781 pub async fn execute_runbook(&self, runbook_id: &str) -> Result<RunbookExecution> {
783 info!("Executing recovery runbook: {}", runbook_id);
784
785 let execution = RunbookExecution {
786 execution_id: Uuid::new_v4().to_string(),
787 runbook_id: runbook_id.to_string(),
788 status: RunbookExecutionStatus::Running,
789 started_at: Utc::now(),
790 completed_at: None,
791 steps_completed: 0,
792 steps_total: 0,
793 };
794
795 Ok(execution)
796 }
797}
798
799#[derive(Debug, Clone, Serialize, Deserialize)]
801pub struct BackupVerificationResult {
802 pub backup_job_id: String,
803 pub verified: bool,
804 pub checksum_match: bool,
805 pub errors: Vec<String>,
806}
807
808#[derive(Debug, Clone, Serialize, Deserialize)]
810pub struct RunbookExecution {
811 pub execution_id: String,
812 pub runbook_id: String,
813 pub status: RunbookExecutionStatus,
814 pub started_at: DateTime<Utc>,
815 pub completed_at: Option<DateTime<Utc>>,
816 pub steps_completed: u32,
817 pub steps_total: u32,
818}
819
820#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
822pub enum RunbookExecutionStatus {
823 Pending,
824 Running,
825 WaitingForApproval,
826 Completed,
827 Failed,
828 Cancelled,
829}
830
831#[cfg(test)]
832mod tests {
833 use super::*;
834
835 #[tokio::test]
836 async fn test_dr_config_default() {
837 let config = DisasterRecoveryConfig::default();
838 assert!(config.enabled);
839 assert!(config.backup.enabled);
840 }
841
842 #[tokio::test]
843 async fn test_backup_frequency_display() {
844 assert_eq!(BackupFrequency::Hourly.to_string(), "Hourly");
845 assert_eq!(BackupFrequency::Daily.to_string(), "Daily");
846 assert_eq!(BackupFrequency::Weekly.to_string(), "Weekly");
847 }
848
849 #[tokio::test]
850 async fn test_dr_manager_creation() {
851 let config = DisasterRecoveryConfig::default();
852 let manager = DisasterRecoveryManager::new(config);
853 let metrics = manager.get_metrics().await;
854 assert_eq!(metrics.backups_completed, 0);
855 }
856
857 #[tokio::test]
858 async fn test_backup_job_creation() {
859 let config = DisasterRecoveryConfig::default();
860 let manager = DisasterRecoveryManager::new(config);
861 let job = manager.create_backup(BackupType::Full).await.unwrap();
862 assert_eq!(job.job_type, BackupType::Full);
863 assert_eq!(job.status, BackupStatus::Running);
864 }
865}