1use crate::{
2 artifacts::{ArtifactChecksum, ArtifactChecksumError},
3 discovery::{DiscoveryError, SnapshotTarget, parse_registry_entries, targets_from_registry},
4 journal::{
5 ArtifactJournalEntry, ArtifactState, DownloadJournal, DownloadOperationMetrics,
6 JournalValidationError,
7 },
8 manifest::{
9 BackupUnit, BackupUnitKind, ConsistencyMode, ConsistencySection, FleetBackupManifest,
10 FleetMember, FleetSection, IdentityMode, ManifestValidationError, SourceMetadata,
11 SourceSnapshot, ToolMetadata, VerificationCheck, VerificationPlan,
12 },
13 persistence::{BackupLayout, PersistenceError},
14 topology::{TopologyHash, TopologyHasher, TopologyRecord},
15};
16use candid::Principal;
17use std::{
18 collections::BTreeSet,
19 error::Error as StdError,
20 fs,
21 path::{Path, PathBuf},
22};
23use thiserror::Error as ThisError;
24
25pub type SnapshotDriverError = Box<dyn StdError + Send + Sync + 'static>;
26
27#[derive(Clone, Debug, Eq, PartialEq)]
32pub struct SnapshotArtifact {
33 pub canister_id: String,
34 pub snapshot_id: String,
35 pub path: std::path::PathBuf,
36 pub checksum: String,
37}
38
39#[derive(Clone, Copy, Debug, Eq, PartialEq)]
44pub enum SnapshotLifecycleMode {
45 SnapshotOnly,
46 StopBeforeSnapshot,
47 ResumeAfterSnapshot,
48 StopAndResume,
49}
50
51impl SnapshotLifecycleMode {
52 #[must_use]
54 pub const fn from_flags(stop_before_snapshot: bool, resume_after_snapshot: bool) -> Self {
55 match (stop_before_snapshot, resume_after_snapshot) {
56 (false, false) => Self::SnapshotOnly,
57 (true, false) => Self::StopBeforeSnapshot,
58 (false, true) => Self::ResumeAfterSnapshot,
59 (true, true) => Self::StopAndResume,
60 }
61 }
62
63 #[must_use]
65 pub const fn stop_before_snapshot(self) -> bool {
66 matches!(self, Self::StopBeforeSnapshot | Self::StopAndResume)
67 }
68
69 #[must_use]
71 pub const fn resume_after_snapshot(self) -> bool {
72 matches!(self, Self::ResumeAfterSnapshot | Self::StopAndResume)
73 }
74}
75
76#[derive(Clone, Debug, Eq, PartialEq)]
81pub struct SnapshotDownloadConfig {
82 pub canister: String,
83 pub out: PathBuf,
84 pub root: Option<String>,
85 pub include_children: bool,
86 pub recursive: bool,
87 pub dry_run: bool,
88 pub lifecycle: SnapshotLifecycleMode,
89 pub backup_id: String,
90 pub created_at: String,
91 pub tool_name: String,
92 pub tool_version: String,
93 pub environment: String,
94}
95
96#[derive(Clone, Debug, Eq, PartialEq)]
101pub struct SnapshotDownloadResult {
102 pub artifacts: Vec<SnapshotArtifact>,
103 pub planned_commands: Vec<String>,
104}
105
106#[derive(Debug, ThisError)]
111pub enum SnapshotDownloadError {
112 #[error("missing --root when using --include-children")]
113 MissingRegistrySource,
114
115 #[error("snapshot driver failed: {0}")]
116 Driver(#[source] SnapshotDriverError),
117
118 #[error(transparent)]
119 Io(#[from] std::io::Error),
120
121 #[error(transparent)]
122 Checksum(#[from] ArtifactChecksumError),
123
124 #[error(transparent)]
125 Persistence(#[from] PersistenceError),
126
127 #[error(transparent)]
128 Journal(#[from] JournalValidationError),
129
130 #[error(transparent)]
131 Discovery(#[from] DiscoveryError),
132
133 #[error(transparent)]
134 Manifest(#[from] SnapshotManifestError),
135}
136
137pub trait SnapshotDriver {
142 fn registry_json(&mut self, root: &str) -> Result<String, SnapshotDriverError>;
144
145 fn create_snapshot(&mut self, canister_id: &str) -> Result<String, SnapshotDriverError>;
147
148 fn stop_canister(&mut self, canister_id: &str) -> Result<(), SnapshotDriverError>;
150
151 fn start_canister(&mut self, canister_id: &str) -> Result<(), SnapshotDriverError>;
153
154 fn download_snapshot(
156 &mut self,
157 canister_id: &str,
158 snapshot_id: &str,
159 artifact_path: &Path,
160 ) -> Result<(), SnapshotDriverError>;
161
162 fn create_snapshot_command(&self, canister_id: &str) -> String;
164
165 fn stop_canister_command(&self, canister_id: &str) -> String;
167
168 fn start_canister_command(&self, canister_id: &str) -> String;
170
171 fn download_snapshot_command(
173 &self,
174 canister_id: &str,
175 snapshot_id: &str,
176 artifact_path: &Path,
177 ) -> String;
178}
179
180struct SnapshotArtifactPaths {
185 relative_path: PathBuf,
186 artifact_path: PathBuf,
187 temp_path: PathBuf,
188}
189
190impl SnapshotArtifactPaths {
191 fn new(root: &Path, canister_id: &str) -> Self {
193 let relative_path = PathBuf::from(safe_path_segment(canister_id));
194 let artifact_path = root.join(&relative_path);
195 let temp_path = root.join(format!("{}.tmp", safe_path_segment(canister_id)));
196
197 Self {
198 relative_path,
199 artifact_path,
200 temp_path,
201 }
202 }
203}
204
205pub struct SnapshotManifestInput<'a> {
210 pub backup_id: String,
211 pub created_at: String,
212 pub tool_name: String,
213 pub tool_version: String,
214 pub environment: String,
215 pub root_canister: String,
216 pub selected_canister: String,
217 pub include_children: bool,
218 pub targets: &'a [SnapshotTarget],
219 pub artifacts: &'a [SnapshotArtifact],
220 pub discovery_topology_hash: TopologyHash,
221 pub pre_snapshot_topology_hash: TopologyHash,
222}
223
224#[derive(Debug, ThisError)]
229pub enum SnapshotManifestError {
230 #[error("field {field} must be a valid principal: {value}")]
231 InvalidPrincipal { field: &'static str, value: String },
232
233 #[error(
234 "topology changed before snapshot start: discovery={discovery}, pre_snapshot={pre_snapshot}"
235 )]
236 TopologyChanged {
237 discovery: String,
238 pre_snapshot: String,
239 },
240
241 #[error("missing snapshot artifact for canister {0}")]
242 MissingArtifact(String),
243
244 #[error(transparent)]
245 InvalidManifest(#[from] ManifestValidationError),
246}
247
248pub fn download_snapshots(
250 config: &SnapshotDownloadConfig,
251 driver: &mut impl SnapshotDriver,
252) -> Result<SnapshotDownloadResult, SnapshotDownloadError> {
253 let targets = resolve_snapshot_targets(config, driver)?;
254 let discovery_topology_hash = topology_hash_for_targets(&config.canister, &targets)?;
255 let pre_snapshot_topology_hash =
256 accepted_pre_snapshot_topology_hash(config, driver, &discovery_topology_hash)?;
257 let layout = BackupLayout::new(config.out.clone());
258 let mut artifacts = Vec::with_capacity(targets.len());
259 let mut planned_commands = Vec::new();
260 let mut journal = DownloadJournal {
261 journal_version: 1,
262 backup_id: config.backup_id.clone(),
263 discovery_topology_hash: Some(discovery_topology_hash.hash.clone()),
264 pre_snapshot_topology_hash: Some(pre_snapshot_topology_hash.hash.clone()),
265 operation_metrics: DownloadOperationMetrics {
266 target_count: targets.len(),
267 ..DownloadOperationMetrics::default()
268 },
269 artifacts: Vec::new(),
270 };
271
272 for target in &targets {
273 let paths = SnapshotArtifactPaths::new(&config.out, &target.canister_id);
274
275 if config.dry_run {
276 let (artifact, commands) =
277 dry_run_artifact(config, driver, target, paths.artifact_path);
278 artifacts.push(artifact);
279 planned_commands.extend(commands);
280 continue;
281 }
282
283 artifacts.push(capture_snapshot_artifact(
284 config,
285 driver,
286 &layout,
287 &mut journal,
288 target,
289 paths,
290 )?);
291 }
292
293 if !config.dry_run {
294 let manifest = build_snapshot_manifest(SnapshotManifestInput {
295 backup_id: config.backup_id.clone(),
296 created_at: config.created_at.clone(),
297 tool_name: config.tool_name.clone(),
298 tool_version: config.tool_version.clone(),
299 environment: config.environment.clone(),
300 root_canister: config
301 .root
302 .clone()
303 .unwrap_or_else(|| config.canister.clone()),
304 selected_canister: config.canister.clone(),
305 include_children: config.include_children,
306 targets: &targets,
307 artifacts: &artifacts,
308 discovery_topology_hash,
309 pre_snapshot_topology_hash,
310 })?;
311 layout.write_manifest(&manifest)?;
312 }
313
314 Ok(SnapshotDownloadResult {
315 artifacts,
316 planned_commands,
317 })
318}
319
320pub fn resolve_snapshot_targets(
322 config: &SnapshotDownloadConfig,
323 driver: &mut impl SnapshotDriver,
324) -> Result<Vec<SnapshotTarget>, SnapshotDownloadError> {
325 if !config.include_children {
326 return Ok(vec![SnapshotTarget {
327 canister_id: config.canister.clone(),
328 role: None,
329 parent_canister_id: None,
330 }]);
331 }
332
333 let registry_json = if let Some(root) = &config.root {
334 driver
335 .registry_json(root)
336 .map_err(SnapshotDownloadError::Driver)?
337 } else {
338 return Err(SnapshotDownloadError::MissingRegistrySource);
339 };
340 let registry = parse_registry_entries(®istry_json)?;
341 targets_from_registry(®istry, &config.canister, config.recursive)
342 .map_err(SnapshotDownloadError::from)
343}
344
345pub fn build_snapshot_manifest(
347 input: SnapshotManifestInput<'_>,
348) -> Result<FleetBackupManifest, SnapshotManifestError> {
349 let roles = input
350 .targets
351 .iter()
352 .enumerate()
353 .map(|(index, target)| target_role(&input.selected_canister, index, target))
354 .collect::<BTreeSet<_>>()
355 .into_iter()
356 .collect::<Vec<_>>();
357
358 let manifest = FleetBackupManifest {
359 manifest_version: 1,
360 backup_id: input.backup_id,
361 created_at: input.created_at,
362 tool: ToolMetadata {
363 name: input.tool_name,
364 version: input.tool_version,
365 },
366 source: SourceMetadata {
367 environment: input.environment,
368 root_canister: input.root_canister.clone(),
369 },
370 consistency: ConsistencySection {
371 mode: ConsistencyMode::CrashConsistent,
372 backup_units: vec![BackupUnit {
373 unit_id: "snapshot-selection".to_string(),
374 kind: if input.include_children {
375 BackupUnitKind::SubtreeRooted
376 } else {
377 BackupUnitKind::Flat
378 },
379 roles,
380 consistency_reason: if input.include_children {
381 None
382 } else {
383 Some("explicit single-canister snapshot selection".to_string())
384 },
385 dependency_closure: Vec::new(),
386 topology_validation: if input.include_children {
387 "registry-subtree-selection".to_string()
388 } else {
389 "explicit-selection".to_string()
390 },
391 quiescence_strategy: None,
392 }],
393 },
394 fleet: FleetSection {
395 topology_hash_algorithm: input.discovery_topology_hash.algorithm,
396 topology_hash_input: input.discovery_topology_hash.input,
397 discovery_topology_hash: input.discovery_topology_hash.hash.clone(),
398 pre_snapshot_topology_hash: input.pre_snapshot_topology_hash.hash,
399 topology_hash: input.discovery_topology_hash.hash,
400 members: input
401 .targets
402 .iter()
403 .enumerate()
404 .map(|(index, target)| {
405 fleet_member(
406 &input.selected_canister,
407 Some(input.root_canister.as_str()).filter(|_| input.include_children),
408 index,
409 target,
410 input.artifacts,
411 )
412 })
413 .collect::<Result<Vec<_>, _>>()?,
414 },
415 verification: VerificationPlan::default(),
416 };
417
418 manifest.validate()?;
419 Ok(manifest)
420}
421
422pub fn topology_hash_for_targets(
424 selected_canister: &str,
425 targets: &[SnapshotTarget],
426) -> Result<TopologyHash, SnapshotManifestError> {
427 let topology_records = targets
428 .iter()
429 .enumerate()
430 .map(|(index, target)| topology_record(selected_canister, index, target))
431 .collect::<Result<Vec<_>, _>>()?;
432 Ok(TopologyHasher::hash(&topology_records))
433}
434
435pub fn ensure_topology_stable(
437 discovery: &TopologyHash,
438 pre_snapshot: &TopologyHash,
439) -> Result<(), SnapshotManifestError> {
440 if discovery.hash == pre_snapshot.hash {
441 return Ok(());
442 }
443
444 Err(SnapshotManifestError::TopologyChanged {
445 discovery: discovery.hash.clone(),
446 pre_snapshot: pre_snapshot.hash.clone(),
447 })
448}
449
450fn accepted_pre_snapshot_topology_hash(
452 config: &SnapshotDownloadConfig,
453 driver: &mut impl SnapshotDriver,
454 discovery_topology_hash: &TopologyHash,
455) -> Result<TopologyHash, SnapshotDownloadError> {
456 if config.dry_run {
457 return Ok(discovery_topology_hash.clone());
458 }
459
460 let pre_snapshot_targets = resolve_snapshot_targets(config, driver)?;
461 let pre_snapshot_topology_hash =
462 topology_hash_for_targets(&config.canister, &pre_snapshot_targets)?;
463 ensure_topology_stable(discovery_topology_hash, &pre_snapshot_topology_hash)?;
464 Ok(pre_snapshot_topology_hash)
465}
466
467fn dry_run_artifact(
469 config: &SnapshotDownloadConfig,
470 driver: &impl SnapshotDriver,
471 target: &SnapshotTarget,
472 artifact_path: PathBuf,
473) -> (SnapshotArtifact, Vec<String>) {
474 let mut commands = Vec::new();
475 if config.lifecycle.stop_before_snapshot() {
476 commands.push(driver.stop_canister_command(&target.canister_id));
477 }
478 commands.push(driver.create_snapshot_command(&target.canister_id));
479 commands.push(driver.download_snapshot_command(
480 &target.canister_id,
481 "<snapshot-id>",
482 &artifact_path,
483 ));
484 if config.lifecycle.resume_after_snapshot() {
485 commands.push(driver.start_canister_command(&target.canister_id));
486 }
487
488 (
489 SnapshotArtifact {
490 canister_id: target.canister_id.clone(),
491 snapshot_id: "<snapshot-id>".to_string(),
492 path: artifact_path,
493 checksum: "<sha256>".to_string(),
494 },
495 commands,
496 )
497}
498
499fn capture_snapshot_artifact(
501 config: &SnapshotDownloadConfig,
502 driver: &mut impl SnapshotDriver,
503 layout: &BackupLayout,
504 journal: &mut DownloadJournal,
505 target: &SnapshotTarget,
506 paths: SnapshotArtifactPaths,
507) -> Result<SnapshotArtifact, SnapshotDownloadError> {
508 if config.lifecycle.stop_before_snapshot() {
509 driver
510 .stop_canister(&target.canister_id)
511 .map_err(SnapshotDownloadError::Driver)?;
512 }
513
514 let result = capture_snapshot_artifact_body(
515 driver,
516 layout,
517 journal,
518 target,
519 &paths.relative_path,
520 paths.artifact_path,
521 paths.temp_path,
522 );
523
524 if config.lifecycle.resume_after_snapshot() {
525 match result {
526 Ok(artifact) => {
527 driver
528 .start_canister(&target.canister_id)
529 .map_err(SnapshotDownloadError::Driver)?;
530 Ok(artifact)
531 }
532 Err(error) => {
533 let _ = driver.start_canister(&target.canister_id);
534 Err(error)
535 }
536 }
537 } else {
538 result
539 }
540}
541
542fn capture_snapshot_artifact_body(
544 driver: &mut impl SnapshotDriver,
545 layout: &BackupLayout,
546 journal: &mut DownloadJournal,
547 target: &SnapshotTarget,
548 artifact_relative_path: &Path,
549 artifact_path: PathBuf,
550 temp_path: PathBuf,
551) -> Result<SnapshotArtifact, SnapshotDownloadError> {
552 journal.operation_metrics.snapshot_create_started += 1;
553 let snapshot_id = driver
554 .create_snapshot(&target.canister_id)
555 .map_err(SnapshotDownloadError::Driver)?;
556 journal.operation_metrics.snapshot_create_completed += 1;
557 let mut entry = ArtifactJournalEntry {
558 canister_id: target.canister_id.clone(),
559 snapshot_id: snapshot_id.clone(),
560 state: ArtifactState::Created,
561 temp_path: None,
562 artifact_path: artifact_relative_path.display().to_string(),
563 checksum_algorithm: "sha256".to_string(),
564 checksum: None,
565 updated_at: "unknown".to_string(),
566 };
567 journal.artifacts.push(entry.clone());
568 layout.write_journal(journal)?;
569
570 if temp_path.exists() {
571 fs::remove_dir_all(&temp_path)?;
572 }
573 fs::create_dir_all(&temp_path)?;
574 journal.operation_metrics.snapshot_download_started += 1;
575 layout.write_journal(journal)?;
576 driver
577 .download_snapshot(&target.canister_id, &snapshot_id, &temp_path)
578 .map_err(SnapshotDownloadError::Driver)?;
579 journal.operation_metrics.snapshot_download_completed += 1;
580 entry.advance_to(ArtifactState::Downloaded, "unknown".to_string())?;
581 entry.temp_path = Some(temp_path.display().to_string());
582 update_journal_entry(journal, &entry);
583 layout.write_journal(journal)?;
584
585 journal.operation_metrics.checksum_verify_started += 1;
586 layout.write_journal(journal)?;
587 let checksum = ArtifactChecksum::from_path(&temp_path)?;
588 journal.operation_metrics.checksum_verify_completed += 1;
589 entry.checksum = Some(checksum.hash.clone());
590 entry.advance_to(ArtifactState::ChecksumVerified, "unknown".to_string())?;
591 update_journal_entry(journal, &entry);
592 layout.write_journal(journal)?;
593
594 journal.operation_metrics.artifact_finalize_started += 1;
595 layout.write_journal(journal)?;
596 if artifact_path.exists() {
597 return Err(std::io::Error::new(
598 std::io::ErrorKind::AlreadyExists,
599 format!("artifact path already exists: {}", artifact_path.display()),
600 )
601 .into());
602 }
603 fs::rename(&temp_path, &artifact_path)?;
604 journal.operation_metrics.artifact_finalize_completed += 1;
605 entry.temp_path = None;
606 entry.advance_to(ArtifactState::Durable, "unknown".to_string())?;
607 update_journal_entry(journal, &entry);
608 layout.write_journal(journal)?;
609
610 Ok(SnapshotArtifact {
611 canister_id: target.canister_id.clone(),
612 snapshot_id,
613 path: artifact_path,
614 checksum: checksum.hash,
615 })
616}
617
618fn update_journal_entry(journal: &mut DownloadJournal, entry: &ArtifactJournalEntry) {
620 if let Some(existing) = journal.artifacts.iter_mut().find(|existing| {
621 existing.canister_id == entry.canister_id && existing.snapshot_id == entry.snapshot_id
622 }) {
623 *existing = entry.clone();
624 }
625}
626
627fn fleet_member(
629 selected_canister: &str,
630 subnet_canister_id: Option<&str>,
631 index: usize,
632 target: &SnapshotTarget,
633 artifacts: &[SnapshotArtifact],
634) -> Result<FleetMember, SnapshotManifestError> {
635 let Some(artifact) = artifacts
636 .iter()
637 .find(|artifact| artifact.canister_id == target.canister_id)
638 else {
639 return Err(SnapshotManifestError::MissingArtifact(
640 target.canister_id.clone(),
641 ));
642 };
643 let role = target_role(selected_canister, index, target);
644
645 Ok(FleetMember {
646 role: role.clone(),
647 canister_id: target.canister_id.clone(),
648 parent_canister_id: target.parent_canister_id.clone(),
649 subnet_canister_id: subnet_canister_id.map(str::to_string),
650 controller_hint: None,
651 identity_mode: if target.canister_id == selected_canister {
652 IdentityMode::Fixed
653 } else {
654 IdentityMode::Relocatable
655 },
656 restore_group: if target.canister_id == selected_canister {
657 1
658 } else {
659 2
660 },
661 verification_class: "basic".to_string(),
662 verification_checks: vec![VerificationCheck {
663 kind: "status".to_string(),
664 method: None,
665 roles: vec![role],
666 }],
667 source_snapshot: SourceSnapshot {
668 snapshot_id: artifact.snapshot_id.clone(),
669 module_hash: None,
670 wasm_hash: None,
671 code_version: None,
672 artifact_path: safe_path_segment(&target.canister_id),
673 checksum_algorithm: "sha256".to_string(),
674 checksum: Some(artifact.checksum.clone()),
675 },
676 })
677}
678
679fn topology_record(
681 selected_canister: &str,
682 index: usize,
683 target: &SnapshotTarget,
684) -> Result<TopologyRecord, SnapshotManifestError> {
685 Ok(TopologyRecord {
686 pid: parse_principal("fleet.members[].canister_id", &target.canister_id)?,
687 parent_pid: target
688 .parent_canister_id
689 .as_deref()
690 .map(|parent| parse_principal("fleet.members[].parent_canister_id", parent))
691 .transpose()?,
692 role: target_role(selected_canister, index, target),
693 module_hash: None,
694 })
695}
696
697fn target_role(selected_canister: &str, index: usize, target: &SnapshotTarget) -> String {
699 target.role.clone().unwrap_or_else(|| {
700 if target.canister_id == selected_canister {
701 "root".to_string()
702 } else {
703 format!("member-{index}")
704 }
705 })
706}
707
708fn parse_principal(field: &'static str, value: &str) -> Result<Principal, SnapshotManifestError> {
710 Principal::from_text(value).map_err(|_| SnapshotManifestError::InvalidPrincipal {
711 field,
712 value: value.to_string(),
713 })
714}
715
716fn safe_path_segment(value: &str) -> String {
718 value
719 .chars()
720 .map(|ch| match ch {
721 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' => ch,
722 _ => '_',
723 })
724 .collect()
725}
726
727#[cfg(test)]
728mod tests;