Skip to main content

canic_backup/persistence/
mod.rs

1use crate::{
2    artifacts::{ArtifactChecksum, ArtifactChecksumError},
3    journal::{ArtifactState, DownloadJournal},
4    manifest::{FleetBackupManifest, ManifestValidationError},
5};
6use serde::{Deserialize, Serialize, de::DeserializeOwned};
7use std::{
8    collections::BTreeSet,
9    fs::{self, File},
10    io,
11    path::{Path, PathBuf},
12};
13use thiserror::Error as ThisError;
14
15const MANIFEST_FILE_NAME: &str = "fleet-backup-manifest.json";
16const JOURNAL_FILE_NAME: &str = "download-journal.json";
17
18///
19/// BackupLayout
20///
21
22#[derive(Clone, Debug)]
23pub struct BackupLayout {
24    root: PathBuf,
25}
26
27impl BackupLayout {
28    /// Create a filesystem layout rooted at one backup directory.
29    #[must_use]
30    pub const fn new(root: PathBuf) -> Self {
31        Self { root }
32    }
33
34    /// Return the root backup directory path.
35    #[must_use]
36    pub fn root(&self) -> &Path {
37        &self.root
38    }
39
40    /// Return the canonical manifest path for this backup layout.
41    #[must_use]
42    pub fn manifest_path(&self) -> PathBuf {
43        self.root.join(MANIFEST_FILE_NAME)
44    }
45
46    /// Return the canonical mutable journal path for this backup layout.
47    #[must_use]
48    pub fn journal_path(&self) -> PathBuf {
49        self.root.join(JOURNAL_FILE_NAME)
50    }
51
52    /// Write a validated manifest with atomic replace semantics.
53    pub fn write_manifest(&self, manifest: &FleetBackupManifest) -> Result<(), PersistenceError> {
54        manifest.validate()?;
55        write_json_atomic(&self.manifest_path(), manifest)
56    }
57
58    /// Read and validate a manifest from this backup layout.
59    pub fn read_manifest(&self) -> Result<FleetBackupManifest, PersistenceError> {
60        let manifest = read_json(&self.manifest_path())?;
61        FleetBackupManifest::validate(&manifest)?;
62        Ok(manifest)
63    }
64
65    /// Write a validated download journal with atomic replace semantics.
66    pub fn write_journal(&self, journal: &DownloadJournal) -> Result<(), PersistenceError> {
67        journal.validate()?;
68        write_json_atomic(&self.journal_path(), journal)
69    }
70
71    /// Read and validate a download journal from this backup layout.
72    pub fn read_journal(&self) -> Result<DownloadJournal, PersistenceError> {
73        let journal = read_json(&self.journal_path())?;
74        DownloadJournal::validate(&journal)?;
75        Ok(journal)
76    }
77
78    /// Validate the manifest, journal, and durable artifact checksums.
79    pub fn verify_integrity(&self) -> Result<BackupIntegrityReport, PersistenceError> {
80        let manifest = self.read_manifest()?;
81        let journal = self.read_journal()?;
82        verify_layout_integrity(self, &manifest, &journal)
83    }
84}
85
86///
87/// BackupIntegrityReport
88///
89
90#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
91pub struct BackupIntegrityReport {
92    pub backup_id: String,
93    pub verified: bool,
94    pub manifest_members: usize,
95    pub journal_artifacts: usize,
96    pub durable_artifacts: usize,
97    pub artifacts: Vec<ArtifactIntegrityReport>,
98}
99
100///
101/// ArtifactIntegrityReport
102///
103
104#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
105pub struct ArtifactIntegrityReport {
106    pub canister_id: String,
107    pub snapshot_id: String,
108    pub artifact_path: String,
109    pub checksum: String,
110}
111
112///
113/// PersistenceError
114///
115
116#[derive(Debug, ThisError)]
117pub enum PersistenceError {
118    #[error(transparent)]
119    Io(#[from] io::Error),
120
121    #[error(transparent)]
122    Json(#[from] serde_json::Error),
123
124    #[error(transparent)]
125    InvalidManifest(#[from] ManifestValidationError),
126
127    #[error(transparent)]
128    InvalidJournal(#[from] crate::journal::JournalValidationError),
129
130    #[error(transparent)]
131    Checksum(#[from] ArtifactChecksumError),
132
133    #[error("manifest backup id {manifest} does not match journal backup id {journal}")]
134    BackupIdMismatch { manifest: String, journal: String },
135
136    #[error("journal artifact {canister_id} snapshot {snapshot_id} is not durable")]
137    NonDurableArtifact {
138        canister_id: String,
139        snapshot_id: String,
140    },
141
142    #[error("manifest member {canister_id} snapshot {snapshot_id} has no journal artifact")]
143    MissingJournalArtifact {
144        canister_id: String,
145        snapshot_id: String,
146    },
147
148    #[error("journal artifact {canister_id} snapshot {snapshot_id} is not declared in manifest")]
149    UnexpectedJournalArtifact {
150        canister_id: String,
151        snapshot_id: String,
152    },
153
154    #[error(
155        "manifest checksum for {canister_id} snapshot {snapshot_id} does not match journal checksum"
156    )]
157    ManifestJournalChecksumMismatch {
158        canister_id: String,
159        snapshot_id: String,
160        manifest: String,
161        journal: String,
162    },
163
164    #[error("artifact path does not exist: {0}")]
165    MissingArtifact(String),
166}
167
168// Verify cross-file backup layout consistency and artifact checksums.
169fn verify_layout_integrity(
170    layout: &BackupLayout,
171    manifest: &FleetBackupManifest,
172    journal: &DownloadJournal,
173) -> Result<BackupIntegrityReport, PersistenceError> {
174    if manifest.backup_id != journal.backup_id {
175        return Err(PersistenceError::BackupIdMismatch {
176            manifest: manifest.backup_id.clone(),
177            journal: journal.backup_id.clone(),
178        });
179    }
180
181    let expected_artifacts = manifest
182        .fleet
183        .members
184        .iter()
185        .map(|member| {
186            (
187                member.canister_id.as_str(),
188                member.source_snapshot.snapshot_id.as_str(),
189            )
190        })
191        .collect::<BTreeSet<_>>();
192    for entry in &journal.artifacts {
193        if !expected_artifacts.contains(&(entry.canister_id.as_str(), entry.snapshot_id.as_str())) {
194            return Err(PersistenceError::UnexpectedJournalArtifact {
195                canister_id: entry.canister_id.clone(),
196                snapshot_id: entry.snapshot_id.clone(),
197            });
198        }
199    }
200
201    let mut artifacts = Vec::with_capacity(journal.artifacts.len());
202    for member in &manifest.fleet.members {
203        let Some(entry) = journal.artifacts.iter().find(|entry| {
204            entry.canister_id == member.canister_id
205                && entry.snapshot_id == member.source_snapshot.snapshot_id
206        }) else {
207            return Err(PersistenceError::MissingJournalArtifact {
208                canister_id: member.canister_id.clone(),
209                snapshot_id: member.source_snapshot.snapshot_id.clone(),
210            });
211        };
212
213        if entry.state != ArtifactState::Durable {
214            return Err(PersistenceError::NonDurableArtifact {
215                canister_id: entry.canister_id.clone(),
216                snapshot_id: entry.snapshot_id.clone(),
217            });
218        }
219
220        let Some(expected_hash) = entry.checksum.as_deref() else {
221            unreachable!("validated durable journals must include checksums");
222        };
223        if let Some(manifest_hash) = member.source_snapshot.checksum.as_deref()
224            && manifest_hash != expected_hash
225        {
226            return Err(PersistenceError::ManifestJournalChecksumMismatch {
227                canister_id: entry.canister_id.clone(),
228                snapshot_id: entry.snapshot_id.clone(),
229                manifest: manifest_hash.to_string(),
230                journal: expected_hash.to_string(),
231            });
232        }
233        let artifact_path = resolve_artifact_path(layout.root(), &entry.artifact_path);
234        if !artifact_path.exists() {
235            return Err(PersistenceError::MissingArtifact(
236                artifact_path.display().to_string(),
237            ));
238        }
239
240        ArtifactChecksum::from_path(&artifact_path)?.verify(expected_hash)?;
241        artifacts.push(ArtifactIntegrityReport {
242            canister_id: entry.canister_id.clone(),
243            snapshot_id: entry.snapshot_id.clone(),
244            artifact_path: artifact_path.display().to_string(),
245            checksum: expected_hash.to_string(),
246        });
247    }
248
249    Ok(BackupIntegrityReport {
250        backup_id: manifest.backup_id.clone(),
251        verified: true,
252        manifest_members: manifest.fleet.members.len(),
253        journal_artifacts: journal.artifacts.len(),
254        durable_artifacts: artifacts.len(),
255        artifacts,
256    })
257}
258
259// Resolve artifact paths from either absolute, cwd-relative, or layout-relative values.
260fn resolve_artifact_path(root: &Path, artifact_path: &str) -> PathBuf {
261    let path = PathBuf::from(artifact_path);
262    if path.is_absolute() || path.exists() {
263        path
264    } else {
265        root.join(path)
266    }
267}
268
269// Write JSON to a temporary sibling path and then atomically replace the target.
270fn write_json_atomic<T>(path: &Path, value: &T) -> Result<(), PersistenceError>
271where
272    T: Serialize,
273{
274    if let Some(parent) = path.parent() {
275        fs::create_dir_all(parent)?;
276    }
277
278    let tmp_path = temp_path_for(path);
279    let mut file = File::create(&tmp_path)?;
280    serde_json::to_writer_pretty(&mut file, value)?;
281    file.sync_all()?;
282    drop(file);
283
284    fs::rename(&tmp_path, path)?;
285
286    if let Some(parent) = path.parent() {
287        File::open(parent)?.sync_all()?;
288    }
289
290    Ok(())
291}
292
293// Read one JSON document from disk.
294fn read_json<T>(path: &Path) -> Result<T, PersistenceError>
295where
296    T: DeserializeOwned,
297{
298    let file = File::open(path)?;
299    Ok(serde_json::from_reader(file)?)
300}
301
302// Build the sibling temporary path used for atomic writes.
303fn temp_path_for(path: &Path) -> PathBuf {
304    let mut file_name = path
305        .file_name()
306        .and_then(|name| name.to_str())
307        .unwrap_or("canic-backup")
308        .to_string();
309    file_name.push_str(".tmp");
310    path.with_file_name(file_name)
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use crate::{
317        journal::{ArtifactJournalEntry, ArtifactState},
318        manifest::{
319            BackupUnit, BackupUnitKind, ConsistencyMode, ConsistencySection, FleetMember,
320            FleetSection, IdentityMode, SourceMetadata, SourceSnapshot, ToolMetadata,
321            VerificationCheck, VerificationPlan,
322        },
323    };
324    use std::{
325        fs,
326        time::{SystemTime, UNIX_EPOCH},
327    };
328
329    const ROOT: &str = "aaaaa-aa";
330    const CHILD: &str = "renrk-eyaaa-aaaaa-aaada-cai";
331    const HASH: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
332
333    // Ensure manifest writes create parent dirs and round-trip through validation.
334    #[test]
335    fn manifest_round_trips_through_layout() {
336        let root = temp_dir("canic-backup-manifest-layout");
337        let layout = BackupLayout::new(root.clone());
338        let manifest = valid_manifest();
339
340        layout
341            .write_manifest(&manifest)
342            .expect("write manifest atomically");
343        let read = layout.read_manifest().expect("read manifest");
344
345        fs::remove_dir_all(root).expect("remove temp layout");
346        assert_eq!(read.backup_id, manifest.backup_id);
347    }
348
349    // Ensure journal writes create parent dirs and round-trip through validation.
350    #[test]
351    fn journal_round_trips_through_layout() {
352        let root = temp_dir("canic-backup-journal-layout");
353        let layout = BackupLayout::new(root.clone());
354        let journal = valid_journal();
355
356        layout
357            .write_journal(&journal)
358            .expect("write journal atomically");
359        let read = layout.read_journal().expect("read journal");
360
361        fs::remove_dir_all(root).expect("remove temp layout");
362        assert_eq!(read.backup_id, journal.backup_id);
363    }
364
365    // Ensure invalid manifests are rejected before writing.
366    #[test]
367    fn invalid_manifest_is_not_written() {
368        let root = temp_dir("canic-backup-invalid-manifest");
369        let layout = BackupLayout::new(root.clone());
370        let mut manifest = valid_manifest();
371        manifest.fleet.discovery_topology_hash = "bad".to_string();
372
373        let err = layout
374            .write_manifest(&manifest)
375            .expect_err("invalid manifest should fail");
376
377        let manifest_path = layout.manifest_path();
378        fs::remove_dir_all(root).ok();
379        assert!(matches!(err, PersistenceError::InvalidManifest(_)));
380        assert!(!manifest_path.exists());
381    }
382
383    // Ensure layout integrity verifies manifest, journal, and artifact checksums.
384    #[test]
385    fn integrity_verifies_durable_artifacts() {
386        let root = temp_dir("canic-backup-integrity");
387        let layout = BackupLayout::new(root.clone());
388        let checksum = write_artifact(&root, b"root artifact");
389        let journal = journal_with_checksum(checksum.hash.clone());
390
391        layout
392            .write_manifest(&valid_manifest())
393            .expect("write manifest");
394        layout.write_journal(&journal).expect("write journal");
395
396        let report = layout.verify_integrity().expect("verify integrity");
397
398        fs::remove_dir_all(root).expect("remove temp layout");
399        assert_eq!(report.backup_id, "fbk_test_001");
400        assert!(report.verified);
401        assert_eq!(report.manifest_members, 1);
402        assert_eq!(report.durable_artifacts, 1);
403        assert_eq!(report.artifacts[0].checksum, checksum.hash);
404    }
405
406    // Ensure mismatched manifest and journal backup IDs are rejected.
407    #[test]
408    fn integrity_rejects_backup_id_mismatch() {
409        let root = temp_dir("canic-backup-integrity-id");
410        let layout = BackupLayout::new(root.clone());
411        let checksum = write_artifact(&root, b"root artifact");
412        let mut journal = journal_with_checksum(checksum.hash);
413        journal.backup_id = "other-backup".to_string();
414
415        layout
416            .write_manifest(&valid_manifest())
417            .expect("write manifest");
418        layout.write_journal(&journal).expect("write journal");
419
420        let err = layout
421            .verify_integrity()
422            .expect_err("backup id mismatch should fail");
423
424        fs::remove_dir_all(root).expect("remove temp layout");
425        assert!(matches!(err, PersistenceError::BackupIdMismatch { .. }));
426    }
427
428    // Ensure incomplete journals cannot pass backup integrity verification.
429    #[test]
430    fn integrity_rejects_non_durable_artifacts() {
431        let root = temp_dir("canic-backup-integrity-state");
432        let layout = BackupLayout::new(root.clone());
433        let mut journal = valid_journal();
434        journal.artifacts[0].state = ArtifactState::Created;
435        journal.artifacts[0].checksum = None;
436
437        layout
438            .write_manifest(&valid_manifest())
439            .expect("write manifest");
440        layout.write_journal(&journal).expect("write journal");
441
442        let err = layout
443            .verify_integrity()
444            .expect_err("non-durable artifact should fail");
445
446        fs::remove_dir_all(root).expect("remove temp layout");
447        assert!(matches!(err, PersistenceError::NonDurableArtifact { .. }));
448    }
449
450    // Ensure journals cannot include artifacts outside the manifest boundary.
451    #[test]
452    fn integrity_rejects_unexpected_journal_artifacts() {
453        let root = temp_dir("canic-backup-integrity-extra");
454        let layout = BackupLayout::new(root.clone());
455        let checksum = write_artifact(&root, b"root artifact");
456        let mut journal = journal_with_checksum(checksum.hash);
457        let mut extra = journal.artifacts[0].clone();
458        extra.snapshot_id = "extra-snapshot".to_string();
459        journal.artifacts.push(extra);
460
461        layout
462            .write_manifest(&valid_manifest())
463            .expect("write manifest");
464        layout.write_journal(&journal).expect("write journal");
465
466        let err = layout
467            .verify_integrity()
468            .expect_err("unexpected journal artifact should fail");
469
470        fs::remove_dir_all(root).expect("remove temp layout");
471        assert!(matches!(
472            err,
473            PersistenceError::UnexpectedJournalArtifact { .. }
474        ));
475    }
476
477    // Ensure manifest snapshot checksums cannot drift from the durable journal.
478    #[test]
479    fn integrity_rejects_manifest_journal_checksum_mismatch() {
480        let root = temp_dir("canic-backup-integrity-manifest-checksum");
481        let layout = BackupLayout::new(root.clone());
482        let checksum = write_artifact(&root, b"root artifact");
483        let mut manifest = valid_manifest();
484        manifest.fleet.members[0].source_snapshot.checksum =
485            Some("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff".to_string());
486
487        layout.write_manifest(&manifest).expect("write manifest");
488        layout
489            .write_journal(&journal_with_checksum(checksum.hash))
490            .expect("write journal");
491
492        let err = layout
493            .verify_integrity()
494            .expect_err("manifest checksum mismatch should fail");
495
496        fs::remove_dir_all(root).expect("remove temp layout");
497        assert!(matches!(
498            err,
499            PersistenceError::ManifestJournalChecksumMismatch { .. }
500        ));
501    }
502
503    // Build one valid manifest for persistence tests.
504    fn valid_manifest() -> FleetBackupManifest {
505        FleetBackupManifest {
506            manifest_version: 1,
507            backup_id: "fbk_test_001".to_string(),
508            created_at: "2026-04-10T12:00:00Z".to_string(),
509            tool: ToolMetadata {
510                name: "canic".to_string(),
511                version: "v1".to_string(),
512            },
513            source: SourceMetadata {
514                environment: "local".to_string(),
515                root_canister: ROOT.to_string(),
516            },
517            consistency: ConsistencySection {
518                mode: ConsistencyMode::CrashConsistent,
519                backup_units: vec![BackupUnit {
520                    unit_id: "whole-fleet".to_string(),
521                    kind: BackupUnitKind::WholeFleet,
522                    roles: vec!["root".to_string()],
523                    consistency_reason: None,
524                    dependency_closure: Vec::new(),
525                    topology_validation: "subtree-closed".to_string(),
526                    quiescence_strategy: None,
527                }],
528            },
529            fleet: FleetSection {
530                topology_hash_algorithm: "sha256".to_string(),
531                topology_hash_input: "sorted(pid,parent_pid,role,module_hash)".to_string(),
532                discovery_topology_hash: HASH.to_string(),
533                pre_snapshot_topology_hash: HASH.to_string(),
534                topology_hash: HASH.to_string(),
535                members: vec![FleetMember {
536                    role: "root".to_string(),
537                    canister_id: ROOT.to_string(),
538                    parent_canister_id: None,
539                    subnet_canister_id: Some(CHILD.to_string()),
540                    controller_hint: Some(ROOT.to_string()),
541                    identity_mode: IdentityMode::Fixed,
542                    restore_group: 1,
543                    verification_class: "basic".to_string(),
544                    verification_checks: vec![VerificationCheck {
545                        kind: "call".to_string(),
546                        method: Some("canic_ready".to_string()),
547                        roles: Vec::new(),
548                    }],
549                    source_snapshot: SourceSnapshot {
550                        snapshot_id: "snap-root".to_string(),
551                        module_hash: Some(HASH.to_string()),
552                        wasm_hash: Some(HASH.to_string()),
553                        code_version: Some("v0.30.0".to_string()),
554                        artifact_path: "artifacts/root".to_string(),
555                        checksum_algorithm: "sha256".to_string(),
556                        checksum: None,
557                    },
558                }],
559            },
560            verification: VerificationPlan {
561                fleet_checks: Vec::new(),
562                member_checks: Vec::new(),
563            },
564        }
565    }
566
567    // Build one valid durable journal for persistence tests.
568    fn valid_journal() -> DownloadJournal {
569        journal_with_checksum(HASH.to_string())
570    }
571
572    // Build one durable journal with a caller-provided checksum.
573    fn journal_with_checksum(checksum: String) -> DownloadJournal {
574        DownloadJournal {
575            journal_version: 1,
576            backup_id: "fbk_test_001".to_string(),
577            artifacts: vec![ArtifactJournalEntry {
578                canister_id: ROOT.to_string(),
579                snapshot_id: "snap-root".to_string(),
580                state: ArtifactState::Durable,
581                temp_path: None,
582                artifact_path: "artifacts/root".to_string(),
583                checksum_algorithm: "sha256".to_string(),
584                checksum: Some(checksum),
585                updated_at: "2026-04-10T12:00:00Z".to_string(),
586            }],
587        }
588    }
589
590    // Write one artifact at the layout-relative path used by test journals.
591    fn write_artifact(root: &Path, bytes: &[u8]) -> ArtifactChecksum {
592        let path = root.join("artifacts/root");
593        fs::create_dir_all(path.parent().expect("artifact has parent")).expect("create artifacts");
594        fs::write(&path, bytes).expect("write artifact");
595        ArtifactChecksum::from_bytes(bytes)
596    }
597
598    // Build a unique temporary layout directory.
599    fn temp_dir(prefix: &str) -> PathBuf {
600        let nanos = SystemTime::now()
601            .duration_since(UNIX_EPOCH)
602            .expect("system time after epoch")
603            .as_nanos();
604        std::env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
605    }
606}