Skip to main content

canic_backup/persistence/
mod.rs

1use crate::{
2    artifacts::{ArtifactChecksum, ArtifactChecksumError},
3    journal::{ArtifactState, DownloadJournal},
4    manifest::{FleetBackupManifest, ManifestValidationError},
5};
6use serde::{Deserialize, Serialize, de::DeserializeOwned};
7use std::{
8    collections::BTreeSet,
9    fs::{self, File},
10    io,
11    path::{Path, PathBuf},
12};
13use thiserror::Error as ThisError;
14
15const MANIFEST_FILE_NAME: &str = "fleet-backup-manifest.json";
16const JOURNAL_FILE_NAME: &str = "download-journal.json";
17
18///
19/// BackupLayout
20///
21
22#[derive(Clone, Debug)]
23pub struct BackupLayout {
24    root: PathBuf,
25}
26
27impl BackupLayout {
28    /// Create a filesystem layout rooted at one backup directory.
29    #[must_use]
30    pub const fn new(root: PathBuf) -> Self {
31        Self { root }
32    }
33
34    /// Return the root backup directory path.
35    #[must_use]
36    pub fn root(&self) -> &Path {
37        &self.root
38    }
39
40    /// Return the canonical manifest path for this backup layout.
41    #[must_use]
42    pub fn manifest_path(&self) -> PathBuf {
43        self.root.join(MANIFEST_FILE_NAME)
44    }
45
46    /// Return the canonical mutable journal path for this backup layout.
47    #[must_use]
48    pub fn journal_path(&self) -> PathBuf {
49        self.root.join(JOURNAL_FILE_NAME)
50    }
51
52    /// Write a validated manifest with atomic replace semantics.
53    pub fn write_manifest(&self, manifest: &FleetBackupManifest) -> Result<(), PersistenceError> {
54        manifest.validate()?;
55        write_json_atomic(&self.manifest_path(), manifest)
56    }
57
58    /// Read and validate a manifest from this backup layout.
59    pub fn read_manifest(&self) -> Result<FleetBackupManifest, PersistenceError> {
60        let manifest = read_json(&self.manifest_path())?;
61        FleetBackupManifest::validate(&manifest)?;
62        Ok(manifest)
63    }
64
65    /// Write a validated download journal with atomic replace semantics.
66    pub fn write_journal(&self, journal: &DownloadJournal) -> Result<(), PersistenceError> {
67        journal.validate()?;
68        write_json_atomic(&self.journal_path(), journal)
69    }
70
71    /// Read and validate a download journal from this backup layout.
72    pub fn read_journal(&self) -> Result<DownloadJournal, PersistenceError> {
73        let journal = read_json(&self.journal_path())?;
74        DownloadJournal::validate(&journal)?;
75        Ok(journal)
76    }
77
78    /// Validate the manifest, journal, and durable artifact checksums.
79    pub fn verify_integrity(&self) -> Result<BackupIntegrityReport, PersistenceError> {
80        let manifest = self.read_manifest()?;
81        let journal = self.read_journal()?;
82        verify_layout_integrity(self, &manifest, &journal)
83    }
84}
85
86///
87/// BackupIntegrityReport
88///
89
90#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
91pub struct BackupIntegrityReport {
92    pub backup_id: String,
93    pub verified: bool,
94    pub manifest_members: usize,
95    pub journal_artifacts: usize,
96    pub durable_artifacts: usize,
97    pub artifacts: Vec<ArtifactIntegrityReport>,
98}
99
100///
101/// ArtifactIntegrityReport
102///
103
104#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
105pub struct ArtifactIntegrityReport {
106    pub canister_id: String,
107    pub snapshot_id: String,
108    pub artifact_path: String,
109    pub checksum: String,
110}
111
112///
113/// PersistenceError
114///
115
116#[derive(Debug, ThisError)]
117pub enum PersistenceError {
118    #[error(transparent)]
119    Io(#[from] io::Error),
120
121    #[error(transparent)]
122    Json(#[from] serde_json::Error),
123
124    #[error(transparent)]
125    InvalidManifest(#[from] ManifestValidationError),
126
127    #[error(transparent)]
128    InvalidJournal(#[from] crate::journal::JournalValidationError),
129
130    #[error(transparent)]
131    Checksum(#[from] ArtifactChecksumError),
132
133    #[error("manifest backup id {manifest} does not match journal backup id {journal}")]
134    BackupIdMismatch { manifest: String, journal: String },
135
136    #[error("journal artifact {canister_id} snapshot {snapshot_id} is not durable")]
137    NonDurableArtifact {
138        canister_id: String,
139        snapshot_id: String,
140    },
141
142    #[error("manifest member {canister_id} snapshot {snapshot_id} has no journal artifact")]
143    MissingJournalArtifact {
144        canister_id: String,
145        snapshot_id: String,
146    },
147
148    #[error("journal artifact {canister_id} snapshot {snapshot_id} is not declared in manifest")]
149    UnexpectedJournalArtifact {
150        canister_id: String,
151        snapshot_id: String,
152    },
153
154    #[error("artifact path does not exist: {0}")]
155    MissingArtifact(String),
156}
157
158// Verify cross-file backup layout consistency and artifact checksums.
159fn verify_layout_integrity(
160    layout: &BackupLayout,
161    manifest: &FleetBackupManifest,
162    journal: &DownloadJournal,
163) -> Result<BackupIntegrityReport, PersistenceError> {
164    if manifest.backup_id != journal.backup_id {
165        return Err(PersistenceError::BackupIdMismatch {
166            manifest: manifest.backup_id.clone(),
167            journal: journal.backup_id.clone(),
168        });
169    }
170
171    let expected_artifacts = manifest
172        .fleet
173        .members
174        .iter()
175        .map(|member| {
176            (
177                member.canister_id.as_str(),
178                member.source_snapshot.snapshot_id.as_str(),
179            )
180        })
181        .collect::<BTreeSet<_>>();
182    for entry in &journal.artifacts {
183        if !expected_artifacts.contains(&(entry.canister_id.as_str(), entry.snapshot_id.as_str())) {
184            return Err(PersistenceError::UnexpectedJournalArtifact {
185                canister_id: entry.canister_id.clone(),
186                snapshot_id: entry.snapshot_id.clone(),
187            });
188        }
189    }
190
191    let mut artifacts = Vec::with_capacity(journal.artifacts.len());
192    for member in &manifest.fleet.members {
193        let Some(entry) = journal.artifacts.iter().find(|entry| {
194            entry.canister_id == member.canister_id
195                && entry.snapshot_id == member.source_snapshot.snapshot_id
196        }) else {
197            return Err(PersistenceError::MissingJournalArtifact {
198                canister_id: member.canister_id.clone(),
199                snapshot_id: member.source_snapshot.snapshot_id.clone(),
200            });
201        };
202
203        if entry.state != ArtifactState::Durable {
204            return Err(PersistenceError::NonDurableArtifact {
205                canister_id: entry.canister_id.clone(),
206                snapshot_id: entry.snapshot_id.clone(),
207            });
208        }
209
210        let Some(expected_hash) = entry.checksum.as_deref() else {
211            unreachable!("validated durable journals must include checksums");
212        };
213        let artifact_path = resolve_artifact_path(layout.root(), &entry.artifact_path);
214        if !artifact_path.exists() {
215            return Err(PersistenceError::MissingArtifact(
216                artifact_path.display().to_string(),
217            ));
218        }
219
220        ArtifactChecksum::from_path(&artifact_path)?.verify(expected_hash)?;
221        artifacts.push(ArtifactIntegrityReport {
222            canister_id: entry.canister_id.clone(),
223            snapshot_id: entry.snapshot_id.clone(),
224            artifact_path: artifact_path.display().to_string(),
225            checksum: expected_hash.to_string(),
226        });
227    }
228
229    Ok(BackupIntegrityReport {
230        backup_id: manifest.backup_id.clone(),
231        verified: true,
232        manifest_members: manifest.fleet.members.len(),
233        journal_artifacts: journal.artifacts.len(),
234        durable_artifacts: artifacts.len(),
235        artifacts,
236    })
237}
238
239// Resolve artifact paths from either absolute, cwd-relative, or layout-relative values.
240fn resolve_artifact_path(root: &Path, artifact_path: &str) -> PathBuf {
241    let path = PathBuf::from(artifact_path);
242    if path.is_absolute() || path.exists() {
243        path
244    } else {
245        root.join(path)
246    }
247}
248
249// Write JSON to a temporary sibling path and then atomically replace the target.
250fn write_json_atomic<T>(path: &Path, value: &T) -> Result<(), PersistenceError>
251where
252    T: Serialize,
253{
254    if let Some(parent) = path.parent() {
255        fs::create_dir_all(parent)?;
256    }
257
258    let tmp_path = temp_path_for(path);
259    let mut file = File::create(&tmp_path)?;
260    serde_json::to_writer_pretty(&mut file, value)?;
261    file.sync_all()?;
262    drop(file);
263
264    fs::rename(&tmp_path, path)?;
265
266    if let Some(parent) = path.parent() {
267        File::open(parent)?.sync_all()?;
268    }
269
270    Ok(())
271}
272
273// Read one JSON document from disk.
274fn read_json<T>(path: &Path) -> Result<T, PersistenceError>
275where
276    T: DeserializeOwned,
277{
278    let file = File::open(path)?;
279    Ok(serde_json::from_reader(file)?)
280}
281
282// Build the sibling temporary path used for atomic writes.
283fn temp_path_for(path: &Path) -> PathBuf {
284    let mut file_name = path
285        .file_name()
286        .and_then(|name| name.to_str())
287        .unwrap_or("canic-backup")
288        .to_string();
289    file_name.push_str(".tmp");
290    path.with_file_name(file_name)
291}
292
293#[cfg(test)]
294mod tests {
295    use super::*;
296    use crate::{
297        journal::{ArtifactJournalEntry, ArtifactState},
298        manifest::{
299            BackupUnit, BackupUnitKind, ConsistencyMode, ConsistencySection, FleetMember,
300            FleetSection, IdentityMode, SourceMetadata, SourceSnapshot, ToolMetadata,
301            VerificationCheck, VerificationPlan,
302        },
303    };
304    use std::{
305        fs,
306        time::{SystemTime, UNIX_EPOCH},
307    };
308
309    const ROOT: &str = "aaaaa-aa";
310    const CHILD: &str = "renrk-eyaaa-aaaaa-aaada-cai";
311    const HASH: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
312
313    // Ensure manifest writes create parent dirs and round-trip through validation.
314    #[test]
315    fn manifest_round_trips_through_layout() {
316        let root = temp_dir("canic-backup-manifest-layout");
317        let layout = BackupLayout::new(root.clone());
318        let manifest = valid_manifest();
319
320        layout
321            .write_manifest(&manifest)
322            .expect("write manifest atomically");
323        let read = layout.read_manifest().expect("read manifest");
324
325        fs::remove_dir_all(root).expect("remove temp layout");
326        assert_eq!(read.backup_id, manifest.backup_id);
327    }
328
329    // Ensure journal writes create parent dirs and round-trip through validation.
330    #[test]
331    fn journal_round_trips_through_layout() {
332        let root = temp_dir("canic-backup-journal-layout");
333        let layout = BackupLayout::new(root.clone());
334        let journal = valid_journal();
335
336        layout
337            .write_journal(&journal)
338            .expect("write journal atomically");
339        let read = layout.read_journal().expect("read journal");
340
341        fs::remove_dir_all(root).expect("remove temp layout");
342        assert_eq!(read.backup_id, journal.backup_id);
343    }
344
345    // Ensure invalid manifests are rejected before writing.
346    #[test]
347    fn invalid_manifest_is_not_written() {
348        let root = temp_dir("canic-backup-invalid-manifest");
349        let layout = BackupLayout::new(root.clone());
350        let mut manifest = valid_manifest();
351        manifest.fleet.discovery_topology_hash = "bad".to_string();
352
353        let err = layout
354            .write_manifest(&manifest)
355            .expect_err("invalid manifest should fail");
356
357        let manifest_path = layout.manifest_path();
358        fs::remove_dir_all(root).ok();
359        assert!(matches!(err, PersistenceError::InvalidManifest(_)));
360        assert!(!manifest_path.exists());
361    }
362
363    // Ensure layout integrity verifies manifest, journal, and artifact checksums.
364    #[test]
365    fn integrity_verifies_durable_artifacts() {
366        let root = temp_dir("canic-backup-integrity");
367        let layout = BackupLayout::new(root.clone());
368        let checksum = write_artifact(&root, b"root artifact");
369        let journal = journal_with_checksum(checksum.hash.clone());
370
371        layout
372            .write_manifest(&valid_manifest())
373            .expect("write manifest");
374        layout.write_journal(&journal).expect("write journal");
375
376        let report = layout.verify_integrity().expect("verify integrity");
377
378        fs::remove_dir_all(root).expect("remove temp layout");
379        assert_eq!(report.backup_id, "fbk_test_001");
380        assert!(report.verified);
381        assert_eq!(report.manifest_members, 1);
382        assert_eq!(report.durable_artifacts, 1);
383        assert_eq!(report.artifacts[0].checksum, checksum.hash);
384    }
385
386    // Ensure mismatched manifest and journal backup IDs are rejected.
387    #[test]
388    fn integrity_rejects_backup_id_mismatch() {
389        let root = temp_dir("canic-backup-integrity-id");
390        let layout = BackupLayout::new(root.clone());
391        let checksum = write_artifact(&root, b"root artifact");
392        let mut journal = journal_with_checksum(checksum.hash);
393        journal.backup_id = "other-backup".to_string();
394
395        layout
396            .write_manifest(&valid_manifest())
397            .expect("write manifest");
398        layout.write_journal(&journal).expect("write journal");
399
400        let err = layout
401            .verify_integrity()
402            .expect_err("backup id mismatch should fail");
403
404        fs::remove_dir_all(root).expect("remove temp layout");
405        assert!(matches!(err, PersistenceError::BackupIdMismatch { .. }));
406    }
407
408    // Ensure incomplete journals cannot pass backup integrity verification.
409    #[test]
410    fn integrity_rejects_non_durable_artifacts() {
411        let root = temp_dir("canic-backup-integrity-state");
412        let layout = BackupLayout::new(root.clone());
413        let mut journal = valid_journal();
414        journal.artifacts[0].state = ArtifactState::Created;
415        journal.artifacts[0].checksum = None;
416
417        layout
418            .write_manifest(&valid_manifest())
419            .expect("write manifest");
420        layout.write_journal(&journal).expect("write journal");
421
422        let err = layout
423            .verify_integrity()
424            .expect_err("non-durable artifact should fail");
425
426        fs::remove_dir_all(root).expect("remove temp layout");
427        assert!(matches!(err, PersistenceError::NonDurableArtifact { .. }));
428    }
429
430    // Ensure journals cannot include artifacts outside the manifest boundary.
431    #[test]
432    fn integrity_rejects_unexpected_journal_artifacts() {
433        let root = temp_dir("canic-backup-integrity-extra");
434        let layout = BackupLayout::new(root.clone());
435        let checksum = write_artifact(&root, b"root artifact");
436        let mut journal = journal_with_checksum(checksum.hash);
437        let mut extra = journal.artifacts[0].clone();
438        extra.snapshot_id = "extra-snapshot".to_string();
439        journal.artifacts.push(extra);
440
441        layout
442            .write_manifest(&valid_manifest())
443            .expect("write manifest");
444        layout.write_journal(&journal).expect("write journal");
445
446        let err = layout
447            .verify_integrity()
448            .expect_err("unexpected journal artifact should fail");
449
450        fs::remove_dir_all(root).expect("remove temp layout");
451        assert!(matches!(
452            err,
453            PersistenceError::UnexpectedJournalArtifact { .. }
454        ));
455    }
456
457    // Build one valid manifest for persistence tests.
458    fn valid_manifest() -> FleetBackupManifest {
459        FleetBackupManifest {
460            manifest_version: 1,
461            backup_id: "fbk_test_001".to_string(),
462            created_at: "2026-04-10T12:00:00Z".to_string(),
463            tool: ToolMetadata {
464                name: "canic".to_string(),
465                version: "v1".to_string(),
466            },
467            source: SourceMetadata {
468                environment: "local".to_string(),
469                root_canister: ROOT.to_string(),
470            },
471            consistency: ConsistencySection {
472                mode: ConsistencyMode::CrashConsistent,
473                backup_units: vec![BackupUnit {
474                    unit_id: "whole-fleet".to_string(),
475                    kind: BackupUnitKind::WholeFleet,
476                    roles: vec!["root".to_string()],
477                    consistency_reason: None,
478                    dependency_closure: Vec::new(),
479                    topology_validation: "subtree-closed".to_string(),
480                    quiescence_strategy: None,
481                }],
482            },
483            fleet: FleetSection {
484                topology_hash_algorithm: "sha256".to_string(),
485                topology_hash_input: "sorted(pid,parent_pid,role,module_hash)".to_string(),
486                discovery_topology_hash: HASH.to_string(),
487                pre_snapshot_topology_hash: HASH.to_string(),
488                topology_hash: HASH.to_string(),
489                members: vec![FleetMember {
490                    role: "root".to_string(),
491                    canister_id: ROOT.to_string(),
492                    parent_canister_id: None,
493                    subnet_canister_id: Some(CHILD.to_string()),
494                    controller_hint: Some(ROOT.to_string()),
495                    identity_mode: IdentityMode::Fixed,
496                    restore_group: 1,
497                    verification_class: "basic".to_string(),
498                    verification_checks: vec![VerificationCheck {
499                        kind: "call".to_string(),
500                        method: Some("canic_ready".to_string()),
501                        roles: Vec::new(),
502                    }],
503                    source_snapshot: SourceSnapshot {
504                        snapshot_id: "snap-root".to_string(),
505                        module_hash: Some(HASH.to_string()),
506                        wasm_hash: Some(HASH.to_string()),
507                        code_version: Some("v0.30.0".to_string()),
508                        artifact_path: "artifacts/root".to_string(),
509                        checksum_algorithm: "sha256".to_string(),
510                    },
511                }],
512            },
513            verification: VerificationPlan {
514                fleet_checks: Vec::new(),
515                member_checks: Vec::new(),
516            },
517        }
518    }
519
520    // Build one valid durable journal for persistence tests.
521    fn valid_journal() -> DownloadJournal {
522        journal_with_checksum(HASH.to_string())
523    }
524
525    // Build one durable journal with a caller-provided checksum.
526    fn journal_with_checksum(checksum: String) -> DownloadJournal {
527        DownloadJournal {
528            journal_version: 1,
529            backup_id: "fbk_test_001".to_string(),
530            artifacts: vec![ArtifactJournalEntry {
531                canister_id: ROOT.to_string(),
532                snapshot_id: "snap-root".to_string(),
533                state: ArtifactState::Durable,
534                temp_path: None,
535                artifact_path: "artifacts/root".to_string(),
536                checksum_algorithm: "sha256".to_string(),
537                checksum: Some(checksum),
538                updated_at: "2026-04-10T12:00:00Z".to_string(),
539            }],
540        }
541    }
542
543    // Write one artifact at the layout-relative path used by test journals.
544    fn write_artifact(root: &Path, bytes: &[u8]) -> ArtifactChecksum {
545        let path = root.join("artifacts/root");
546        fs::create_dir_all(path.parent().expect("artifact has parent")).expect("create artifacts");
547        fs::write(&path, bytes).expect("write artifact");
548        ArtifactChecksum::from_bytes(bytes)
549    }
550
551    // Build a unique temporary layout directory.
552    fn temp_dir(prefix: &str) -> PathBuf {
553        let nanos = SystemTime::now()
554            .duration_since(UNIX_EPOCH)
555            .expect("system time after epoch")
556            .as_nanos();
557        std::env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
558    }
559}