Skip to main content

canic_cli/snapshot/
mod.rs

1use crate::version_text;
2use candid::Principal;
3use canic_backup::{
4    artifacts::{ArtifactChecksum, ArtifactChecksumError},
5    journal::{
6        ArtifactJournalEntry, ArtifactState, DownloadJournal, DownloadOperationMetrics,
7        JournalValidationError,
8    },
9    manifest::{
10        BackupUnit, BackupUnitKind, ConsistencyMode, ConsistencySection, FleetBackupManifest,
11        FleetMember, FleetSection, IdentityMode, ManifestValidationError, SourceMetadata,
12        SourceSnapshot, ToolMetadata, VerificationCheck, VerificationPlan,
13    },
14    persistence::{BackupLayout, PersistenceError},
15    topology::{TopologyHash, TopologyHasher, TopologyRecord},
16};
17use serde_json::Value;
18use std::{
19    collections::{BTreeMap, BTreeSet, VecDeque},
20    ffi::OsString,
21    fs,
22    path::{Path, PathBuf},
23    process::Command,
24};
25use thiserror::Error as ThisError;
26
27///
28/// SnapshotCommandError
29///
30
31#[derive(Debug, ThisError)]
32pub enum SnapshotCommandError {
33    #[error("{0}")]
34    Usage(&'static str),
35
36    #[error("missing required option {0}")]
37    MissingOption(&'static str),
38
39    #[error("unknown option {0}")]
40    UnknownOption(String),
41
42    #[error("option {0} requires a value")]
43    MissingValue(&'static str),
44
45    #[error("cannot combine --root and --registry-json")]
46    ConflictingRegistrySources,
47
48    #[error("registry JSON did not contain the requested canister {0}")]
49    CanisterNotInRegistry(String),
50
51    #[error("dfx command failed: {command}\n{stderr}")]
52    DfxFailed { command: String, stderr: String },
53
54    #[error("could not parse snapshot id from dfx output: {0}")]
55    SnapshotIdUnavailable(String),
56
57    #[error("field {field} must be a valid principal: {value}")]
58    InvalidPrincipal { field: &'static str, value: String },
59
60    #[error(
61        "topology changed before snapshot start: discovery={discovery}, pre_snapshot={pre_snapshot}"
62    )]
63    TopologyChanged {
64        discovery: String,
65        pre_snapshot: String,
66    },
67
68    #[error(transparent)]
69    Io(#[from] std::io::Error),
70
71    #[error(transparent)]
72    Json(#[from] serde_json::Error),
73
74    #[error(transparent)]
75    Checksum(#[from] ArtifactChecksumError),
76
77    #[error(transparent)]
78    Persistence(#[from] PersistenceError),
79
80    #[error(transparent)]
81    Journal(#[from] JournalValidationError),
82
83    #[error(transparent)]
84    InvalidManifest(#[from] ManifestValidationError),
85}
86
87///
88/// SnapshotDownloadOptions
89///
90
91#[derive(Clone, Debug, Eq, PartialEq)]
92pub struct SnapshotDownloadOptions {
93    pub canister: String,
94    pub out: PathBuf,
95    pub root: Option<String>,
96    pub registry_json: Option<PathBuf>,
97    pub include_children: bool,
98    pub recursive: bool,
99    pub dry_run: bool,
100    pub lifecycle: SnapshotLifecycleMode,
101    pub network: Option<String>,
102    pub dfx: String,
103}
104
105impl SnapshotDownloadOptions {
106    /// Parse snapshot download options from CLI arguments.
107    pub fn parse<I>(args: I) -> Result<Self, SnapshotCommandError>
108    where
109        I: IntoIterator<Item = OsString>,
110    {
111        let mut canister = None;
112        let mut out = None;
113        let mut root = None;
114        let mut registry_json = None;
115        let mut include_children = false;
116        let mut recursive = false;
117        let mut dry_run = false;
118        let mut stop_before_snapshot = false;
119        let mut resume_after_snapshot = false;
120        let mut network = None;
121        let mut dfx = "dfx".to_string();
122
123        let mut args = args.into_iter();
124        while let Some(arg) = args.next() {
125            let arg = arg
126                .into_string()
127                .map_err(|_| SnapshotCommandError::Usage(usage()))?;
128            match arg.as_str() {
129                "--canister" => canister = Some(next_value(&mut args, "--canister")?),
130                "--out" => out = Some(PathBuf::from(next_value(&mut args, "--out")?)),
131                "--root" => root = Some(next_value(&mut args, "--root")?),
132                "--registry-json" => {
133                    registry_json = Some(PathBuf::from(next_value(&mut args, "--registry-json")?));
134                }
135                "--include-children" => include_children = true,
136                "--recursive" => {
137                    recursive = true;
138                    include_children = true;
139                }
140                "--dry-run" => dry_run = true,
141                "--stop-before-snapshot" => stop_before_snapshot = true,
142                "--resume-after-snapshot" => resume_after_snapshot = true,
143                "--network" => network = Some(next_value(&mut args, "--network")?),
144                "--dfx" => dfx = next_value(&mut args, "--dfx")?,
145                "--help" | "-h" => return Err(SnapshotCommandError::Usage(usage())),
146                _ => return Err(SnapshotCommandError::UnknownOption(arg)),
147            }
148        }
149
150        if root.is_some() && registry_json.is_some() {
151            return Err(SnapshotCommandError::ConflictingRegistrySources);
152        }
153
154        Ok(Self {
155            canister: canister.ok_or(SnapshotCommandError::MissingOption("--canister"))?,
156            out: out.ok_or(SnapshotCommandError::MissingOption("--out"))?,
157            root,
158            registry_json,
159            include_children,
160            recursive,
161            dry_run,
162            lifecycle: SnapshotLifecycleMode::from_flags(
163                stop_before_snapshot,
164                resume_after_snapshot,
165            ),
166            network,
167            dfx,
168        })
169    }
170}
171
172///
173/// SnapshotLifecycleMode
174///
175
176#[derive(Clone, Copy, Debug, Eq, PartialEq)]
177pub enum SnapshotLifecycleMode {
178    SnapshotOnly,
179    StopBeforeSnapshot,
180    ResumeAfterSnapshot,
181    StopAndResume,
182}
183
184impl SnapshotLifecycleMode {
185    /// Build the lifecycle mode from CLI stop/resume flags.
186    #[must_use]
187    pub const fn from_flags(stop_before_snapshot: bool, resume_after_snapshot: bool) -> Self {
188        match (stop_before_snapshot, resume_after_snapshot) {
189            (false, false) => Self::SnapshotOnly,
190            (true, false) => Self::StopBeforeSnapshot,
191            (false, true) => Self::ResumeAfterSnapshot,
192            (true, true) => Self::StopAndResume,
193        }
194    }
195
196    /// Return whether the CLI should stop before snapshot creation.
197    #[must_use]
198    pub const fn stop_before_snapshot(self) -> bool {
199        matches!(self, Self::StopBeforeSnapshot | Self::StopAndResume)
200    }
201
202    /// Return whether the CLI should start after snapshot capture.
203    #[must_use]
204    pub const fn resume_after_snapshot(self) -> bool {
205        matches!(self, Self::ResumeAfterSnapshot | Self::StopAndResume)
206    }
207}
208
209///
210/// SnapshotTarget
211///
212
213#[derive(Clone, Debug, Eq, PartialEq)]
214pub struct SnapshotTarget {
215    pub canister_id: String,
216    pub role: Option<String>,
217    pub parent_canister_id: Option<String>,
218}
219
220/// Run a snapshot subcommand.
221pub fn run<I>(args: I) -> Result<(), SnapshotCommandError>
222where
223    I: IntoIterator<Item = OsString>,
224{
225    let mut args = args.into_iter();
226    let Some(command) = args.next().and_then(|arg| arg.into_string().ok()) else {
227        return Err(SnapshotCommandError::Usage(usage()));
228    };
229
230    match command.as_str() {
231        "download" => {
232            let options = SnapshotDownloadOptions::parse(args)?;
233            let result = download_snapshots(&options)?;
234            for artifact in result.artifacts {
235                println!(
236                    "{} {} {}",
237                    artifact.canister_id,
238                    artifact.snapshot_id,
239                    artifact.path.display()
240                );
241            }
242            Ok(())
243        }
244        "help" | "--help" | "-h" => {
245            println!("{}", usage());
246            Ok(())
247        }
248        "version" | "--version" | "-V" => {
249            println!("{}", version_text());
250            Ok(())
251        }
252        _ => Err(SnapshotCommandError::UnknownOption(command)),
253    }
254}
255
256///
257/// SnapshotDownloadResult
258///
259
260#[derive(Clone, Debug, Eq, PartialEq)]
261pub struct SnapshotDownloadResult {
262    pub artifacts: Vec<SnapshotArtifact>,
263}
264
265///
266/// SnapshotArtifact
267///
268
269#[derive(Clone, Debug, Eq, PartialEq)]
270pub struct SnapshotArtifact {
271    pub canister_id: String,
272    pub snapshot_id: String,
273    pub path: PathBuf,
274    pub checksum: String,
275}
276
277/// Create and download snapshots for the selected canister set.
278pub fn download_snapshots(
279    options: &SnapshotDownloadOptions,
280) -> Result<SnapshotDownloadResult, SnapshotCommandError> {
281    let targets = resolve_targets(options)?;
282    let discovery_topology_hash = topology_hash_for_targets(options, &targets)?;
283    let pre_snapshot_topology_hash =
284        accepted_pre_snapshot_topology_hash(options, &discovery_topology_hash)?;
285    let mut artifacts = Vec::with_capacity(targets.len());
286    let mut journal = DownloadJournal {
287        journal_version: 1,
288        backup_id: backup_id(options),
289        discovery_topology_hash: Some(discovery_topology_hash.hash.clone()),
290        pre_snapshot_topology_hash: Some(pre_snapshot_topology_hash.hash.clone()),
291        operation_metrics: DownloadOperationMetrics {
292            target_count: targets.len(),
293            ..DownloadOperationMetrics::default()
294        },
295        artifacts: Vec::new(),
296    };
297    let layout = BackupLayout::new(options.out.clone());
298
299    for target in &targets {
300        let artifact_relative_path = PathBuf::from(safe_path_segment(&target.canister_id));
301        let artifact_path = options.out.join(&artifact_relative_path);
302        let temp_path = options
303            .out
304            .join(format!("{}.tmp", safe_path_segment(&target.canister_id)));
305
306        if options.dry_run {
307            artifacts.push(dry_run_artifact(options, target, artifact_path));
308            continue;
309        }
310
311        artifacts.push(capture_snapshot_artifact(
312            options,
313            &layout,
314            &mut journal,
315            target,
316            &artifact_relative_path,
317            artifact_path,
318            temp_path,
319        )?);
320    }
321
322    if !options.dry_run {
323        let manifest = build_manifest(
324            options,
325            &targets,
326            &artifacts,
327            discovery_topology_hash,
328            pre_snapshot_topology_hash,
329        )?;
330        layout.write_manifest(&manifest)?;
331    }
332
333    Ok(SnapshotDownloadResult { artifacts })
334}
335
336// Resolve and verify the pre-snapshot topology hash before any mutation.
337fn accepted_pre_snapshot_topology_hash(
338    options: &SnapshotDownloadOptions,
339    discovery_topology_hash: &TopologyHash,
340) -> Result<TopologyHash, SnapshotCommandError> {
341    if options.dry_run {
342        return Ok(discovery_topology_hash.clone());
343    }
344
345    let pre_snapshot_targets = resolve_targets(options)?;
346    let pre_snapshot_topology_hash = topology_hash_for_targets(options, &pre_snapshot_targets)?;
347    ensure_topology_stable(discovery_topology_hash, &pre_snapshot_topology_hash)?;
348    Ok(pre_snapshot_topology_hash)
349}
350
351// Print the planned commands and return a placeholder artifact for dry runs.
352fn dry_run_artifact(
353    options: &SnapshotDownloadOptions,
354    target: &SnapshotTarget,
355    artifact_path: PathBuf,
356) -> SnapshotArtifact {
357    if options.lifecycle.stop_before_snapshot() {
358        println!(
359            "{}",
360            stop_canister_command_display(options, &target.canister_id)
361        );
362    }
363    println!(
364        "{}",
365        create_snapshot_command_display(options, &target.canister_id)
366    );
367    println!(
368        "{}",
369        download_snapshot_command_display(options, &target.canister_id, "<snapshot-id>")
370    );
371    if options.lifecycle.resume_after_snapshot() {
372        println!(
373            "{}",
374            start_canister_command_display(options, &target.canister_id)
375        );
376    }
377
378    SnapshotArtifact {
379        canister_id: target.canister_id.clone(),
380        snapshot_id: "<snapshot-id>".to_string(),
381        path: artifact_path,
382        checksum: "<sha256>".to_string(),
383    }
384}
385
386// Create, download, checksum, and finalize one durable snapshot artifact.
387fn capture_snapshot_artifact(
388    options: &SnapshotDownloadOptions,
389    layout: &BackupLayout,
390    journal: &mut DownloadJournal,
391    target: &SnapshotTarget,
392    artifact_relative_path: &Path,
393    artifact_path: PathBuf,
394    temp_path: PathBuf,
395) -> Result<SnapshotArtifact, SnapshotCommandError> {
396    with_optional_stop(options, &target.canister_id, || {
397        journal.operation_metrics.snapshot_create_started += 1;
398        let snapshot_id = create_snapshot(options, &target.canister_id)?;
399        journal.operation_metrics.snapshot_create_completed += 1;
400        let mut entry = ArtifactJournalEntry {
401            canister_id: target.canister_id.clone(),
402            snapshot_id: snapshot_id.clone(),
403            state: ArtifactState::Created,
404            temp_path: None,
405            artifact_path: artifact_relative_path.display().to_string(),
406            checksum_algorithm: "sha256".to_string(),
407            checksum: None,
408            updated_at: timestamp_placeholder(),
409        };
410        journal.artifacts.push(entry.clone());
411        layout.write_journal(journal)?;
412
413        if temp_path.exists() {
414            fs::remove_dir_all(&temp_path)?;
415        }
416        fs::create_dir_all(&temp_path)?;
417        journal.operation_metrics.snapshot_download_started += 1;
418        layout.write_journal(journal)?;
419        download_snapshot(options, &target.canister_id, &snapshot_id, &temp_path)?;
420        journal.operation_metrics.snapshot_download_completed += 1;
421        entry.advance_to(ArtifactState::Downloaded, timestamp_placeholder())?;
422        entry.temp_path = Some(temp_path.display().to_string());
423        update_journal_entry(journal, &entry);
424        layout.write_journal(journal)?;
425
426        journal.operation_metrics.checksum_verify_started += 1;
427        layout.write_journal(journal)?;
428        let checksum = ArtifactChecksum::from_path(&temp_path)?;
429        journal.operation_metrics.checksum_verify_completed += 1;
430        entry.checksum = Some(checksum.hash.clone());
431        entry.advance_to(ArtifactState::ChecksumVerified, timestamp_placeholder())?;
432        update_journal_entry(journal, &entry);
433        layout.write_journal(journal)?;
434
435        journal.operation_metrics.artifact_finalize_started += 1;
436        layout.write_journal(journal)?;
437        if artifact_path.exists() {
438            return Err(std::io::Error::new(
439                std::io::ErrorKind::AlreadyExists,
440                format!("artifact path already exists: {}", artifact_path.display()),
441            )
442            .into());
443        }
444        fs::rename(&temp_path, &artifact_path)?;
445        journal.operation_metrics.artifact_finalize_completed += 1;
446        entry.temp_path = None;
447        entry.advance_to(ArtifactState::Durable, timestamp_placeholder())?;
448        update_journal_entry(journal, &entry);
449        layout.write_journal(journal)?;
450
451        Ok(SnapshotArtifact {
452            canister_id: target.canister_id.clone(),
453            snapshot_id,
454            path: artifact_path,
455            checksum: checksum.hash,
456        })
457    })
458}
459
460// Replace one artifact row in the mutable journal.
461fn update_journal_entry(journal: &mut DownloadJournal, entry: &ArtifactJournalEntry) {
462    if let Some(existing) = journal.artifacts.iter_mut().find(|existing| {
463        existing.canister_id == entry.canister_id && existing.snapshot_id == entry.snapshot_id
464    }) {
465        *existing = entry.clone();
466    }
467}
468
469/// Resolve the selected canister plus optional direct/recursive children.
470pub fn resolve_targets(
471    options: &SnapshotDownloadOptions,
472) -> Result<Vec<SnapshotTarget>, SnapshotCommandError> {
473    if !options.include_children {
474        return Ok(vec![SnapshotTarget {
475            canister_id: options.canister.clone(),
476            role: None,
477            parent_canister_id: None,
478        }]);
479    }
480
481    let registry = load_registry_entries(options)?;
482    targets_from_registry(&registry, &options.canister, options.recursive)
483}
484
485// Load registry entries from a file or live root query.
486fn load_registry_entries(
487    options: &SnapshotDownloadOptions,
488) -> Result<Vec<RegistryEntry>, SnapshotCommandError> {
489    let registry_json = if let Some(path) = &options.registry_json {
490        fs::read_to_string(path)?
491    } else if let Some(root) = &options.root {
492        call_subnet_registry(options, root)?
493    } else {
494        return Err(SnapshotCommandError::MissingOption(
495            "--root or --registry-json when using --include-children",
496        ));
497    };
498
499    parse_registry_entries(&registry_json)
500}
501
502// Run `dfx canister call <root> canic_subnet_registry --output json`.
503fn call_subnet_registry(
504    options: &SnapshotDownloadOptions,
505    root: &str,
506) -> Result<String, SnapshotCommandError> {
507    let mut command = Command::new(&options.dfx);
508    command.arg("canister");
509    add_canister_network_args(&mut command, options);
510    command.args(["call", root, "canic_subnet_registry", "--output", "json"]);
511    run_output(&mut command)
512}
513
514// Create one canister snapshot and parse the snapshot id from dfx output.
515fn create_snapshot(
516    options: &SnapshotDownloadOptions,
517    canister_id: &str,
518) -> Result<String, SnapshotCommandError> {
519    let before = list_snapshot_ids(options, canister_id)?;
520    let mut command = Command::new(&options.dfx);
521    command.arg("canister");
522    add_canister_network_args(&mut command, options);
523    command.args(["snapshot", "create", canister_id]);
524    let output = run_output_with_stderr(&mut command)?;
525    if let Some(snapshot_id) = parse_snapshot_id(&output) {
526        return Ok(snapshot_id);
527    }
528
529    let before = before.into_iter().collect::<BTreeSet<_>>();
530    let mut new_ids = list_snapshot_ids(options, canister_id)?
531        .into_iter()
532        .filter(|snapshot_id| !before.contains(snapshot_id))
533        .collect::<Vec<_>>();
534    if new_ids.len() == 1 {
535        Ok(new_ids.remove(0))
536    } else {
537        Err(SnapshotCommandError::SnapshotIdUnavailable(output))
538    }
539}
540
541// List the existing snapshot ids for one canister.
542fn list_snapshot_ids(
543    options: &SnapshotDownloadOptions,
544    canister_id: &str,
545) -> Result<Vec<String>, SnapshotCommandError> {
546    let mut command = Command::new(&options.dfx);
547    command.arg("canister");
548    add_canister_network_args(&mut command, options);
549    command.args(["snapshot", "list", canister_id]);
550    let output = run_output(&mut command)?;
551    Ok(parse_snapshot_list_ids(&output))
552}
553
554// Stop a canister before taking a snapshot when explicitly requested.
555fn stop_canister(
556    options: &SnapshotDownloadOptions,
557    canister_id: &str,
558) -> Result<(), SnapshotCommandError> {
559    let mut command = Command::new(&options.dfx);
560    command.arg("canister");
561    add_canister_network_args(&mut command, options);
562    command.args(["stop", canister_id]);
563    run_status(&mut command)
564}
565
566// Start a canister after snapshot capture when explicitly requested.
567fn start_canister(
568    options: &SnapshotDownloadOptions,
569    canister_id: &str,
570) -> Result<(), SnapshotCommandError> {
571    let mut command = Command::new(&options.dfx);
572    command.arg("canister");
573    add_canister_network_args(&mut command, options);
574    command.args(["start", canister_id]);
575    run_status(&mut command)
576}
577
578// Run one snapshot operation with optional stop/start lifecycle commands.
579fn with_optional_stop<T>(
580    options: &SnapshotDownloadOptions,
581    canister_id: &str,
582    operation: impl FnOnce() -> Result<T, SnapshotCommandError>,
583) -> Result<T, SnapshotCommandError> {
584    if options.lifecycle.stop_before_snapshot() {
585        stop_canister(options, canister_id)?;
586    }
587
588    let result = operation();
589
590    if options.lifecycle.resume_after_snapshot() {
591        match result {
592            Ok(value) => {
593                start_canister(options, canister_id)?;
594                Ok(value)
595            }
596            Err(error) => {
597                let _ = start_canister(options, canister_id);
598                Err(error)
599            }
600        }
601    } else {
602        result
603    }
604}
605
606// Download one canister snapshot into the target artifact directory.
607fn download_snapshot(
608    options: &SnapshotDownloadOptions,
609    canister_id: &str,
610    snapshot_id: &str,
611    artifact_path: &Path,
612) -> Result<(), SnapshotCommandError> {
613    let mut command = Command::new(&options.dfx);
614    command.arg("canister");
615    add_canister_network_args(&mut command, options);
616    command.args(["snapshot", "download", canister_id, snapshot_id, "--dir"]);
617    command.arg(artifact_path);
618    run_status(&mut command)
619}
620
621// Add optional `dfx canister` network arguments.
622fn add_canister_network_args(command: &mut Command, options: &SnapshotDownloadOptions) {
623    if let Some(network) = &options.network {
624        command.args(["--network", network]);
625    }
626}
627
628// Execute a command and capture stdout.
629fn run_output(command: &mut Command) -> Result<String, SnapshotCommandError> {
630    let display = command_display(command);
631    let output = command.output()?;
632    if output.status.success() {
633        Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
634    } else {
635        Err(SnapshotCommandError::DfxFailed {
636            command: display,
637            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
638        })
639    }
640}
641
642// Execute a command and capture stdout plus stderr on success.
643fn run_output_with_stderr(command: &mut Command) -> Result<String, SnapshotCommandError> {
644    let display = command_display(command);
645    let output = command.output()?;
646    if output.status.success() {
647        let mut text = String::from_utf8_lossy(&output.stdout).to_string();
648        text.push_str(&String::from_utf8_lossy(&output.stderr));
649        Ok(text.trim().to_string())
650    } else {
651        Err(SnapshotCommandError::DfxFailed {
652            command: display,
653            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
654        })
655    }
656}
657
658// Execute a command and require a successful status.
659fn run_status(command: &mut Command) -> Result<(), SnapshotCommandError> {
660    let display = command_display(command);
661    let output = command.output()?;
662    if output.status.success() {
663        Ok(())
664    } else {
665        Err(SnapshotCommandError::DfxFailed {
666            command: display,
667            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
668        })
669    }
670}
671
672// Render a command for diagnostics.
673fn command_display(command: &Command) -> String {
674    let mut parts = vec![command.get_program().to_string_lossy().to_string()];
675    parts.extend(
676        command
677            .get_args()
678            .map(|arg| arg.to_string_lossy().to_string()),
679    );
680    parts.join(" ")
681}
682
683// Render one dry-run create command.
684fn create_snapshot_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
685    let mut command = Command::new(&options.dfx);
686    command.arg("canister");
687    add_canister_network_args(&mut command, options);
688    command.args(["snapshot", "create", canister_id]);
689    command_display(&command)
690}
691
692// Render one dry-run download command.
693fn download_snapshot_command_display(
694    options: &SnapshotDownloadOptions,
695    canister_id: &str,
696    snapshot_id: &str,
697) -> String {
698    let mut command = Command::new(&options.dfx);
699    command.arg("canister");
700    add_canister_network_args(&mut command, options);
701    command.args(["snapshot", "download", canister_id, snapshot_id, "--dir"]);
702    command.arg(options.out.join(safe_path_segment(canister_id)));
703    command_display(&command)
704}
705
706// Render one dry-run stop command.
707fn stop_canister_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
708    let mut command = Command::new(&options.dfx);
709    command.arg("canister");
710    add_canister_network_args(&mut command, options);
711    command.args(["stop", canister_id]);
712    command_display(&command)
713}
714
715// Render one dry-run start command.
716fn start_canister_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
717    let mut command = Command::new(&options.dfx);
718    command.arg("canister");
719    add_canister_network_args(&mut command, options);
720    command.args(["start", canister_id]);
721    command_display(&command)
722}
723
724///
725/// RegistryEntry
726///
727
728#[derive(Clone, Debug, Eq, PartialEq)]
729pub struct RegistryEntry {
730    pub pid: String,
731    pub role: Option<String>,
732    pub parent_pid: Option<String>,
733}
734
735/// Parse the `dfx --output json` subnet registry shape.
736pub fn parse_registry_entries(
737    registry_json: &str,
738) -> Result<Vec<RegistryEntry>, SnapshotCommandError> {
739    let data = serde_json::from_str::<Value>(registry_json)?;
740    let entries = data
741        .get("Ok")
742        .and_then(Value::as_array)
743        .or_else(|| data.as_array())
744        .ok_or(SnapshotCommandError::Usage(
745            "registry JSON must be an array or {\"Ok\": [...]}",
746        ))?;
747
748    Ok(entries.iter().filter_map(parse_registry_entry).collect())
749}
750
751// Parse one registry entry from dfx JSON.
752fn parse_registry_entry(value: &Value) -> Option<RegistryEntry> {
753    let pid = value.get("pid").and_then(Value::as_str)?.to_string();
754    let role = value
755        .get("role")
756        .and_then(Value::as_str)
757        .map(str::to_string);
758    let parent_pid = value
759        .get("record")
760        .and_then(|record| record.get("parent_pid"))
761        .and_then(parse_optional_principal);
762
763    Some(RegistryEntry {
764        pid,
765        role,
766        parent_pid,
767    })
768}
769
770// Parse optional principal JSON emitted as null, string, or optional vector form.
771fn parse_optional_principal(value: &Value) -> Option<String> {
772    if value.is_null() {
773        return None;
774    }
775    if let Some(text) = value.as_str() {
776        return Some(text.to_string());
777    }
778    value
779        .as_array()
780        .and_then(|items| items.first())
781        .and_then(Value::as_str)
782        .map(str::to_string)
783}
784
785/// Resolve selected target and children from registry entries.
786pub fn targets_from_registry(
787    registry: &[RegistryEntry],
788    canister_id: &str,
789    recursive: bool,
790) -> Result<Vec<SnapshotTarget>, SnapshotCommandError> {
791    let by_pid = registry
792        .iter()
793        .map(|entry| (entry.pid.as_str(), entry))
794        .collect::<BTreeMap<_, _>>();
795
796    let root = by_pid
797        .get(canister_id)
798        .ok_or_else(|| SnapshotCommandError::CanisterNotInRegistry(canister_id.to_string()))?;
799
800    let mut targets = Vec::new();
801    let mut seen = BTreeSet::new();
802    targets.push(SnapshotTarget {
803        canister_id: root.pid.clone(),
804        role: root.role.clone(),
805        parent_canister_id: root.parent_pid.clone(),
806    });
807    seen.insert(root.pid.clone());
808
809    let mut queue = VecDeque::from([root.pid.clone()]);
810    while let Some(parent) = queue.pop_front() {
811        for child in registry
812            .iter()
813            .filter(|entry| entry.parent_pid.as_deref() == Some(parent.as_str()))
814        {
815            if seen.insert(child.pid.clone()) {
816                targets.push(SnapshotTarget {
817                    canister_id: child.pid.clone(),
818                    role: child.role.clone(),
819                    parent_canister_id: child.parent_pid.clone(),
820                });
821                if recursive {
822                    queue.push_back(child.pid.clone());
823                }
824            }
825        }
826    }
827
828    Ok(targets)
829}
830
831// Build a validated manifest for one successful snapshot download run.
832fn build_manifest(
833    options: &SnapshotDownloadOptions,
834    targets: &[SnapshotTarget],
835    artifacts: &[SnapshotArtifact],
836    discovery_topology_hash: TopologyHash,
837    pre_snapshot_topology_hash: TopologyHash,
838) -> Result<FleetBackupManifest, SnapshotCommandError> {
839    let roles = targets
840        .iter()
841        .enumerate()
842        .map(|(index, target)| target_role(options, index, target))
843        .collect::<BTreeSet<_>>()
844        .into_iter()
845        .collect::<Vec<_>>();
846
847    let manifest = FleetBackupManifest {
848        manifest_version: 1,
849        backup_id: backup_id(options),
850        created_at: timestamp_placeholder(),
851        tool: ToolMetadata {
852            name: "canic-cli".to_string(),
853            version: env!("CARGO_PKG_VERSION").to_string(),
854        },
855        source: SourceMetadata {
856            environment: options
857                .network
858                .clone()
859                .unwrap_or_else(|| "local".to_string()),
860            root_canister: options
861                .root
862                .clone()
863                .unwrap_or_else(|| options.canister.clone()),
864        },
865        consistency: ConsistencySection {
866            mode: ConsistencyMode::CrashConsistent,
867            backup_units: vec![BackupUnit {
868                unit_id: "snapshot-selection".to_string(),
869                kind: if options.include_children {
870                    BackupUnitKind::SubtreeRooted
871                } else {
872                    BackupUnitKind::Flat
873                },
874                roles,
875                consistency_reason: if options.include_children {
876                    None
877                } else {
878                    Some("explicit single-canister snapshot selection".to_string())
879                },
880                dependency_closure: Vec::new(),
881                topology_validation: if options.include_children {
882                    "registry-subtree-selection".to_string()
883                } else {
884                    "explicit-selection".to_string()
885                },
886                quiescence_strategy: None,
887            }],
888        },
889        fleet: FleetSection {
890            topology_hash_algorithm: discovery_topology_hash.algorithm,
891            topology_hash_input: discovery_topology_hash.input,
892            discovery_topology_hash: discovery_topology_hash.hash.clone(),
893            pre_snapshot_topology_hash: pre_snapshot_topology_hash.hash,
894            topology_hash: discovery_topology_hash.hash,
895            members: targets
896                .iter()
897                .enumerate()
898                .map(|(index, target)| fleet_member(options, index, target, artifacts))
899                .collect::<Result<Vec<_>, _>>()?,
900        },
901        verification: VerificationPlan::default(),
902    };
903
904    manifest.validate()?;
905    Ok(manifest)
906}
907
908// Compute the canonical topology hash for one resolved target set.
909fn topology_hash_for_targets(
910    options: &SnapshotDownloadOptions,
911    targets: &[SnapshotTarget],
912) -> Result<TopologyHash, SnapshotCommandError> {
913    let topology_records = targets
914        .iter()
915        .enumerate()
916        .map(|(index, target)| topology_record(options, index, target))
917        .collect::<Result<Vec<_>, _>>()?;
918    Ok(TopologyHasher::hash(&topology_records))
919}
920
921// Fail closed if topology changes after discovery but before snapshot creation.
922fn ensure_topology_stable(
923    discovery: &TopologyHash,
924    pre_snapshot: &TopologyHash,
925) -> Result<(), SnapshotCommandError> {
926    if discovery.hash == pre_snapshot.hash {
927        return Ok(());
928    }
929
930    Err(SnapshotCommandError::TopologyChanged {
931        discovery: discovery.hash.clone(),
932        pre_snapshot: pre_snapshot.hash.clone(),
933    })
934}
935
936// Build one canonical topology record for manifest hashing.
937fn topology_record(
938    options: &SnapshotDownloadOptions,
939    index: usize,
940    target: &SnapshotTarget,
941) -> Result<TopologyRecord, SnapshotCommandError> {
942    Ok(TopologyRecord {
943        pid: parse_principal("fleet.members[].canister_id", &target.canister_id)?,
944        parent_pid: target
945            .parent_canister_id
946            .as_deref()
947            .map(|parent| parse_principal("fleet.members[].parent_canister_id", parent))
948            .transpose()?,
949        role: target_role(options, index, target),
950        module_hash: None,
951    })
952}
953
954// Build one manifest member from a captured durable artifact.
955fn fleet_member(
956    options: &SnapshotDownloadOptions,
957    index: usize,
958    target: &SnapshotTarget,
959    artifacts: &[SnapshotArtifact],
960) -> Result<FleetMember, SnapshotCommandError> {
961    let Some(artifact) = artifacts
962        .iter()
963        .find(|artifact| artifact.canister_id == target.canister_id)
964    else {
965        return Err(SnapshotCommandError::SnapshotIdUnavailable(format!(
966            "missing artifact for {}",
967            target.canister_id
968        )));
969    };
970    let role = target_role(options, index, target);
971
972    Ok(FleetMember {
973        role: role.clone(),
974        canister_id: target.canister_id.clone(),
975        parent_canister_id: target.parent_canister_id.clone(),
976        subnet_canister_id: options.root.clone(),
977        controller_hint: None,
978        identity_mode: if target.canister_id == options.canister {
979            IdentityMode::Fixed
980        } else {
981            IdentityMode::Relocatable
982        },
983        restore_group: if target.canister_id == options.canister {
984            1
985        } else {
986            2
987        },
988        verification_class: "basic".to_string(),
989        verification_checks: vec![VerificationCheck {
990            kind: "status".to_string(),
991            method: None,
992            roles: vec![role],
993        }],
994        source_snapshot: SourceSnapshot {
995            snapshot_id: artifact.snapshot_id.clone(),
996            module_hash: None,
997            wasm_hash: None,
998            code_version: None,
999            artifact_path: safe_path_segment(&target.canister_id),
1000            checksum_algorithm: "sha256".to_string(),
1001            checksum: Some(artifact.checksum.clone()),
1002        },
1003    })
1004}
1005
1006// Return the manifest role for one selected snapshot target.
1007fn target_role(options: &SnapshotDownloadOptions, index: usize, target: &SnapshotTarget) -> String {
1008    target.role.clone().unwrap_or_else(|| {
1009        if target.canister_id == options.canister {
1010            "root".to_string()
1011        } else {
1012            format!("member-{index}")
1013        }
1014    })
1015}
1016
1017// Parse one principal used by generated topology manifest metadata.
1018fn parse_principal(field: &'static str, value: &str) -> Result<Principal, SnapshotCommandError> {
1019    Principal::from_text(value).map_err(|_| SnapshotCommandError::InvalidPrincipal {
1020        field,
1021        value: value.to_string(),
1022    })
1023}
1024
1025// Parse a likely snapshot id from dfx output.
1026fn parse_snapshot_id(output: &str) -> Option<String> {
1027    output
1028        .split(|c: char| c.is_whitespace() || matches!(c, '"' | '\'' | ':' | ','))
1029        .filter(|part| !part.is_empty())
1030        .rev()
1031        .find(|part| {
1032            part.chars()
1033                .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.'))
1034        })
1035        .map(str::to_string)
1036}
1037
1038// Parse dfx snapshot list output into snapshot ids.
1039fn parse_snapshot_list_ids(output: &str) -> Vec<String> {
1040    output
1041        .lines()
1042        .filter_map(|line| {
1043            line.split_once(':')
1044                .map(|(snapshot_id, _)| snapshot_id.trim())
1045        })
1046        .filter(|snapshot_id| !snapshot_id.is_empty())
1047        .map(str::to_string)
1048        .collect()
1049}
1050
1051// Convert a principal into a conservative filesystem path segment.
1052fn safe_path_segment(value: &str) -> String {
1053    value
1054        .chars()
1055        .map(|c| {
1056            if c.is_ascii_alphanumeric() || matches!(c, '-' | '_') {
1057                c
1058            } else {
1059                '_'
1060            }
1061        })
1062        .collect()
1063}
1064
1065// Build a stable backup id for this command's output directory.
1066fn backup_id(options: &SnapshotDownloadOptions) -> String {
1067    options
1068        .out
1069        .file_name()
1070        .and_then(|name| name.to_str())
1071        .map_or_else(|| "snapshot-download".to_string(), str::to_string)
1072}
1073
1074// Return a placeholder timestamp until the CLI owns a clock abstraction.
1075fn timestamp_placeholder() -> String {
1076    "unknown".to_string()
1077}
1078
1079// Read the next required option value.
1080fn next_value<I>(args: &mut I, option: &'static str) -> Result<String, SnapshotCommandError>
1081where
1082    I: Iterator<Item = OsString>,
1083{
1084    args.next()
1085        .and_then(|value| value.into_string().ok())
1086        .ok_or(SnapshotCommandError::MissingValue(option))
1087}
1088
1089// Return snapshot command usage text.
1090const fn usage() -> &'static str {
1091    "usage: canic snapshot download --canister <id> --out <dir> [--root <id> | --registry-json <file>] [--include-children] [--recursive] [--dry-run] [--stop-before-snapshot] [--resume-after-snapshot] [--network <name>]"
1092}
1093
1094#[cfg(test)]
1095mod tests {
1096    use super::*;
1097    use canic_backup::persistence::BackupLayout;
1098    use serde_json::json;
1099    use std::time::{SystemTime, UNIX_EPOCH};
1100
1101    const ROOT: &str = "aaaaa-aa";
1102    const CHILD: &str = "renrk-eyaaa-aaaaa-aaada-cai";
1103    const GRANDCHILD: &str = "rno2w-sqaaa-aaaaa-aaacq-cai";
1104    const HASH: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
1105
1106    // Ensure dfx registry JSON parses in the wrapped Ok shape.
1107    #[test]
1108    fn parses_wrapped_registry_json() {
1109        let json = registry_json();
1110
1111        let entries = parse_registry_entries(&json).expect("parse registry");
1112
1113        assert_eq!(entries.len(), 3);
1114        assert_eq!(entries[1].parent_pid.as_deref(), Some(ROOT));
1115    }
1116
1117    // Ensure direct-child resolution includes only one level.
1118    #[test]
1119    fn targets_include_direct_children() {
1120        let entries = parse_registry_entries(&registry_json()).expect("parse registry");
1121
1122        let targets = targets_from_registry(&entries, ROOT, false).expect("resolve targets");
1123
1124        assert_eq!(
1125            targets
1126                .iter()
1127                .map(|target| target.canister_id.as_str())
1128                .collect::<Vec<_>>(),
1129            vec![ROOT, CHILD]
1130        );
1131    }
1132
1133    // Ensure recursive resolution walks descendants.
1134    #[test]
1135    fn targets_include_recursive_children() {
1136        let entries = parse_registry_entries(&registry_json()).expect("parse registry");
1137
1138        let targets = targets_from_registry(&entries, ROOT, true).expect("resolve targets");
1139
1140        assert_eq!(
1141            targets
1142                .iter()
1143                .map(|target| target.canister_id.as_str())
1144                .collect::<Vec<_>>(),
1145            vec![ROOT, CHILD, GRANDCHILD]
1146        );
1147    }
1148
1149    // Ensure snapshot ids can be extracted from common command output.
1150    #[test]
1151    fn parses_snapshot_id_from_output() {
1152        let snapshot_id = parse_snapshot_id("Created snapshot: snap_abc-123\n");
1153
1154        assert_eq!(snapshot_id.as_deref(), Some("snap_abc-123"));
1155    }
1156
1157    // Ensure dfx snapshot list output can be used when create is quiet.
1158    #[test]
1159    fn parses_snapshot_ids_from_list_output() {
1160        let snapshot_ids = parse_snapshot_list_ids(
1161            "0000000000000000ffffffffff9000050101: 213.76 MiB, taken at 2026-05-03 12:20:53 UTC\n",
1162        );
1163
1164        assert_eq!(snapshot_ids, vec!["0000000000000000ffffffffff9000050101"]);
1165    }
1166
1167    // Ensure option parsing covers the intended dry-run command.
1168    #[test]
1169    fn parses_download_options() {
1170        let options = SnapshotDownloadOptions::parse([
1171            OsString::from("--canister"),
1172            OsString::from(ROOT),
1173            OsString::from("--out"),
1174            OsString::from("backups/test"),
1175            OsString::from("--registry-json"),
1176            OsString::from("registry.json"),
1177            OsString::from("--recursive"),
1178            OsString::from("--dry-run"),
1179            OsString::from("--stop-before-snapshot"),
1180            OsString::from("--resume-after-snapshot"),
1181        ])
1182        .expect("parse options");
1183
1184        assert_eq!(options.canister, ROOT);
1185        assert!(options.include_children);
1186        assert!(options.recursive);
1187        assert!(options.dry_run);
1188        assert_eq!(options.lifecycle, SnapshotLifecycleMode::StopAndResume);
1189    }
1190
1191    // Ensure snapshot capture fails closed when topology changes before creation.
1192    #[test]
1193    fn topology_stability_rejects_pre_snapshot_drift() {
1194        let discovery = topology_hash(HASH);
1195        let pre_snapshot =
1196            topology_hash("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff");
1197
1198        let err = ensure_topology_stable(&discovery, &pre_snapshot)
1199            .expect_err("topology drift should fail");
1200
1201        assert!(matches!(err, SnapshotCommandError::TopologyChanged { .. }));
1202    }
1203
1204    // Ensure the actual command path writes a manifest and durable journal.
1205    #[cfg(unix)]
1206    #[test]
1207    fn download_snapshots_writes_manifest_and_durable_journal() {
1208        use std::os::unix::fs::PermissionsExt;
1209
1210        let root = temp_dir("canic-cli-download");
1211        let fake_dfx = root.join("fake-dfx.sh");
1212        fs::create_dir_all(&root).expect("create temp root");
1213        fs::write(
1214            &fake_dfx,
1215            r#"#!/bin/sh
1216set -eu
1217if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "create" ]; then
1218  echo "snapshot-$4"
1219  exit 0
1220fi
1221if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "list" ]; then
1222  exit 0
1223fi
1224if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "download" ]; then
1225  mkdir -p "$7"
1226  printf "%s:%s\n" "$4" "$5" > "$7/snapshot.txt"
1227  exit 0
1228fi
1229echo "unexpected args: $*" >&2
1230exit 1
1231"#,
1232        )
1233        .expect("write fake dfx");
1234        let mut permissions = fs::metadata(&fake_dfx)
1235            .expect("stat fake dfx")
1236            .permissions();
1237        permissions.set_mode(0o755);
1238        fs::set_permissions(&fake_dfx, permissions).expect("chmod fake dfx");
1239
1240        let out = root.join("backup");
1241        let options = SnapshotDownloadOptions {
1242            canister: ROOT.to_string(),
1243            out: out.clone(),
1244            root: None,
1245            registry_json: None,
1246            include_children: false,
1247            recursive: false,
1248            dry_run: false,
1249            lifecycle: SnapshotLifecycleMode::SnapshotOnly,
1250            network: None,
1251            dfx: fake_dfx.display().to_string(),
1252        };
1253
1254        let result = download_snapshots(&options).expect("download snapshots");
1255        let layout = BackupLayout::new(out);
1256        let journal = layout.read_journal().expect("read journal");
1257        let manifest = layout.read_manifest().expect("read manifest");
1258
1259        fs::remove_dir_all(root).expect("remove temp root");
1260        assert_eq!(result.artifacts.len(), 1);
1261        assert_eq!(journal.artifacts.len(), 1);
1262        assert_eq!(journal.operation_metrics.target_count, 1);
1263        assert_eq!(journal.operation_metrics.snapshot_create_started, 1);
1264        assert_eq!(journal.operation_metrics.snapshot_create_completed, 1);
1265        assert_eq!(journal.operation_metrics.snapshot_download_started, 1);
1266        assert_eq!(journal.operation_metrics.snapshot_download_completed, 1);
1267        assert_eq!(journal.operation_metrics.checksum_verify_started, 1);
1268        assert_eq!(journal.operation_metrics.checksum_verify_completed, 1);
1269        assert_eq!(journal.operation_metrics.artifact_finalize_started, 1);
1270        assert_eq!(journal.operation_metrics.artifact_finalize_completed, 1);
1271        assert_eq!(journal.artifacts[0].state, ArtifactState::Durable);
1272        assert!(journal.artifacts[0].checksum.is_some());
1273        assert_eq!(manifest.backup_id, journal.backup_id);
1274        assert_eq!(manifest.fleet.members.len(), 1);
1275        assert_eq!(manifest.fleet.members[0].canister_id, ROOT);
1276        assert_eq!(
1277            manifest.fleet.members[0].source_snapshot.snapshot_id,
1278            "snapshot-aaaaa-aa"
1279        );
1280        assert_eq!(
1281            manifest.fleet.members[0]
1282                .source_snapshot
1283                .checksum
1284                .as_deref(),
1285            journal.artifacts[0].checksum.as_deref()
1286        );
1287    }
1288
1289    // Build representative subnet registry JSON.
1290    fn registry_json() -> String {
1291        json!({
1292            "Ok": [
1293                {
1294                    "pid": ROOT,
1295                    "role": "root",
1296                    "record": {
1297                        "pid": ROOT,
1298                        "role": "root",
1299                        "parent_pid": null
1300                    }
1301                },
1302                {
1303                    "pid": CHILD,
1304                    "role": "app",
1305                    "record": {
1306                        "pid": CHILD,
1307                        "role": "app",
1308                        "parent_pid": ROOT
1309                    }
1310                },
1311                {
1312                    "pid": GRANDCHILD,
1313                    "role": "worker",
1314                    "record": {
1315                        "pid": GRANDCHILD,
1316                        "role": "worker",
1317                        "parent_pid": [CHILD]
1318                    }
1319                }
1320            ]
1321        })
1322        .to_string()
1323    }
1324
1325    // Build one topology hash for stability tests.
1326    fn topology_hash(hash: &str) -> TopologyHash {
1327        TopologyHash {
1328            algorithm: "sha256".to_string(),
1329            input: "sorted(pid,parent_pid,role,module_hash)".to_string(),
1330            hash: hash.to_string(),
1331        }
1332    }
1333
1334    // Build a unique temporary directory.
1335    fn temp_dir(prefix: &str) -> PathBuf {
1336        let nanos = SystemTime::now()
1337            .duration_since(UNIX_EPOCH)
1338            .expect("system time after epoch")
1339            .as_nanos();
1340        std::env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
1341    }
1342}