Skip to main content

canic_cli/snapshot/
mod.rs

1use candid::Principal;
2use canic_backup::{
3    artifacts::{ArtifactChecksum, ArtifactChecksumError},
4    journal::{
5        ArtifactJournalEntry, ArtifactState, DownloadJournal, DownloadOperationMetrics,
6        JournalValidationError,
7    },
8    manifest::{
9        BackupUnit, BackupUnitKind, ConsistencyMode, ConsistencySection, FleetBackupManifest,
10        FleetMember, FleetSection, IdentityMode, ManifestValidationError, SourceMetadata,
11        SourceSnapshot, ToolMetadata, VerificationCheck, VerificationPlan,
12    },
13    persistence::{BackupLayout, PersistenceError},
14    topology::{TopologyHash, TopologyHasher, TopologyRecord},
15};
16use serde_json::Value;
17use std::{
18    collections::{BTreeMap, BTreeSet, VecDeque},
19    ffi::OsString,
20    fs,
21    path::{Path, PathBuf},
22    process::Command,
23};
24use thiserror::Error as ThisError;
25
26///
27/// SnapshotCommandError
28///
29
30#[derive(Debug, ThisError)]
31pub enum SnapshotCommandError {
32    #[error("{0}")]
33    Usage(&'static str),
34
35    #[error("missing required option {0}")]
36    MissingOption(&'static str),
37
38    #[error("unknown option {0}")]
39    UnknownOption(String),
40
41    #[error("option {0} requires a value")]
42    MissingValue(&'static str),
43
44    #[error("cannot combine --root and --registry-json")]
45    ConflictingRegistrySources,
46
47    #[error("registry JSON did not contain the requested canister {0}")]
48    CanisterNotInRegistry(String),
49
50    #[error("dfx command failed: {command}\n{stderr}")]
51    DfxFailed { command: String, stderr: String },
52
53    #[error("could not parse snapshot id from dfx output: {0}")]
54    SnapshotIdUnavailable(String),
55
56    #[error("field {field} must be a valid principal: {value}")]
57    InvalidPrincipal { field: &'static str, value: String },
58
59    #[error(
60        "topology changed before snapshot start: discovery={discovery}, pre_snapshot={pre_snapshot}"
61    )]
62    TopologyChanged {
63        discovery: String,
64        pre_snapshot: String,
65    },
66
67    #[error(transparent)]
68    Io(#[from] std::io::Error),
69
70    #[error(transparent)]
71    Json(#[from] serde_json::Error),
72
73    #[error(transparent)]
74    Checksum(#[from] ArtifactChecksumError),
75
76    #[error(transparent)]
77    Persistence(#[from] PersistenceError),
78
79    #[error(transparent)]
80    Journal(#[from] JournalValidationError),
81
82    #[error(transparent)]
83    InvalidManifest(#[from] ManifestValidationError),
84}
85
86///
87/// SnapshotDownloadOptions
88///
89
90#[derive(Clone, Debug, Eq, PartialEq)]
91pub struct SnapshotDownloadOptions {
92    pub canister: String,
93    pub out: PathBuf,
94    pub root: Option<String>,
95    pub registry_json: Option<PathBuf>,
96    pub include_children: bool,
97    pub recursive: bool,
98    pub dry_run: bool,
99    pub lifecycle: SnapshotLifecycleMode,
100    pub network: Option<String>,
101    pub dfx: String,
102}
103
104impl SnapshotDownloadOptions {
105    /// Parse snapshot download options from CLI arguments.
106    pub fn parse<I>(args: I) -> Result<Self, SnapshotCommandError>
107    where
108        I: IntoIterator<Item = OsString>,
109    {
110        let mut canister = None;
111        let mut out = None;
112        let mut root = None;
113        let mut registry_json = None;
114        let mut include_children = false;
115        let mut recursive = false;
116        let mut dry_run = false;
117        let mut stop_before_snapshot = false;
118        let mut resume_after_snapshot = false;
119        let mut network = None;
120        let mut dfx = "dfx".to_string();
121
122        let mut args = args.into_iter();
123        while let Some(arg) = args.next() {
124            let arg = arg
125                .into_string()
126                .map_err(|_| SnapshotCommandError::Usage(usage()))?;
127            match arg.as_str() {
128                "--canister" => canister = Some(next_value(&mut args, "--canister")?),
129                "--out" => out = Some(PathBuf::from(next_value(&mut args, "--out")?)),
130                "--root" => root = Some(next_value(&mut args, "--root")?),
131                "--registry-json" => {
132                    registry_json = Some(PathBuf::from(next_value(&mut args, "--registry-json")?));
133                }
134                "--include-children" => include_children = true,
135                "--recursive" => {
136                    recursive = true;
137                    include_children = true;
138                }
139                "--dry-run" => dry_run = true,
140                "--stop-before-snapshot" => stop_before_snapshot = true,
141                "--resume-after-snapshot" => resume_after_snapshot = true,
142                "--network" => network = Some(next_value(&mut args, "--network")?),
143                "--dfx" => dfx = next_value(&mut args, "--dfx")?,
144                "--help" | "-h" => return Err(SnapshotCommandError::Usage(usage())),
145                _ => return Err(SnapshotCommandError::UnknownOption(arg)),
146            }
147        }
148
149        if root.is_some() && registry_json.is_some() {
150            return Err(SnapshotCommandError::ConflictingRegistrySources);
151        }
152
153        Ok(Self {
154            canister: canister.ok_or(SnapshotCommandError::MissingOption("--canister"))?,
155            out: out.ok_or(SnapshotCommandError::MissingOption("--out"))?,
156            root,
157            registry_json,
158            include_children,
159            recursive,
160            dry_run,
161            lifecycle: SnapshotLifecycleMode::from_flags(
162                stop_before_snapshot,
163                resume_after_snapshot,
164            ),
165            network,
166            dfx,
167        })
168    }
169}
170
171///
172/// SnapshotLifecycleMode
173///
174
175#[derive(Clone, Copy, Debug, Eq, PartialEq)]
176pub enum SnapshotLifecycleMode {
177    SnapshotOnly,
178    StopBeforeSnapshot,
179    ResumeAfterSnapshot,
180    StopAndResume,
181}
182
183impl SnapshotLifecycleMode {
184    /// Build the lifecycle mode from CLI stop/resume flags.
185    #[must_use]
186    pub const fn from_flags(stop_before_snapshot: bool, resume_after_snapshot: bool) -> Self {
187        match (stop_before_snapshot, resume_after_snapshot) {
188            (false, false) => Self::SnapshotOnly,
189            (true, false) => Self::StopBeforeSnapshot,
190            (false, true) => Self::ResumeAfterSnapshot,
191            (true, true) => Self::StopAndResume,
192        }
193    }
194
195    /// Return whether the CLI should stop before snapshot creation.
196    #[must_use]
197    pub const fn stop_before_snapshot(self) -> bool {
198        matches!(self, Self::StopBeforeSnapshot | Self::StopAndResume)
199    }
200
201    /// Return whether the CLI should start after snapshot capture.
202    #[must_use]
203    pub const fn resume_after_snapshot(self) -> bool {
204        matches!(self, Self::ResumeAfterSnapshot | Self::StopAndResume)
205    }
206}
207
208///
209/// SnapshotTarget
210///
211
212#[derive(Clone, Debug, Eq, PartialEq)]
213pub struct SnapshotTarget {
214    pub canister_id: String,
215    pub role: Option<String>,
216    pub parent_canister_id: Option<String>,
217}
218
219/// Run a snapshot subcommand.
220pub fn run<I>(args: I) -> Result<(), SnapshotCommandError>
221where
222    I: IntoIterator<Item = OsString>,
223{
224    let mut args = args.into_iter();
225    let Some(command) = args.next().and_then(|arg| arg.into_string().ok()) else {
226        return Err(SnapshotCommandError::Usage(usage()));
227    };
228
229    match command.as_str() {
230        "download" => {
231            let options = SnapshotDownloadOptions::parse(args)?;
232            let result = download_snapshots(&options)?;
233            for artifact in result.artifacts {
234                println!(
235                    "{} {} {}",
236                    artifact.canister_id,
237                    artifact.snapshot_id,
238                    artifact.path.display()
239                );
240            }
241            Ok(())
242        }
243        "help" | "--help" | "-h" => Err(SnapshotCommandError::Usage(usage())),
244        _ => Err(SnapshotCommandError::UnknownOption(command)),
245    }
246}
247
248///
249/// SnapshotDownloadResult
250///
251
252#[derive(Clone, Debug, Eq, PartialEq)]
253pub struct SnapshotDownloadResult {
254    pub artifacts: Vec<SnapshotArtifact>,
255}
256
257///
258/// SnapshotArtifact
259///
260
261#[derive(Clone, Debug, Eq, PartialEq)]
262pub struct SnapshotArtifact {
263    pub canister_id: String,
264    pub snapshot_id: String,
265    pub path: PathBuf,
266    pub checksum: String,
267}
268
269/// Create and download snapshots for the selected canister set.
270pub fn download_snapshots(
271    options: &SnapshotDownloadOptions,
272) -> Result<SnapshotDownloadResult, SnapshotCommandError> {
273    let targets = resolve_targets(options)?;
274    let discovery_topology_hash = topology_hash_for_targets(options, &targets)?;
275    let pre_snapshot_topology_hash =
276        accepted_pre_snapshot_topology_hash(options, &discovery_topology_hash)?;
277    let mut artifacts = Vec::with_capacity(targets.len());
278    let mut journal = DownloadJournal {
279        journal_version: 1,
280        backup_id: backup_id(options),
281        discovery_topology_hash: Some(discovery_topology_hash.hash.clone()),
282        pre_snapshot_topology_hash: Some(pre_snapshot_topology_hash.hash.clone()),
283        operation_metrics: DownloadOperationMetrics {
284            target_count: targets.len(),
285            ..DownloadOperationMetrics::default()
286        },
287        artifacts: Vec::new(),
288    };
289    let layout = BackupLayout::new(options.out.clone());
290
291    for target in &targets {
292        let artifact_relative_path = PathBuf::from(safe_path_segment(&target.canister_id));
293        let artifact_path = options.out.join(&artifact_relative_path);
294        let temp_path = options
295            .out
296            .join(format!("{}.tmp", safe_path_segment(&target.canister_id)));
297
298        if options.dry_run {
299            artifacts.push(dry_run_artifact(options, target, artifact_path));
300            continue;
301        }
302
303        artifacts.push(capture_snapshot_artifact(
304            options,
305            &layout,
306            &mut journal,
307            target,
308            &artifact_relative_path,
309            artifact_path,
310            temp_path,
311        )?);
312    }
313
314    if !options.dry_run {
315        let manifest = build_manifest(
316            options,
317            &targets,
318            &artifacts,
319            discovery_topology_hash,
320            pre_snapshot_topology_hash,
321        )?;
322        layout.write_manifest(&manifest)?;
323    }
324
325    Ok(SnapshotDownloadResult { artifacts })
326}
327
328// Resolve and verify the pre-snapshot topology hash before any mutation.
329fn accepted_pre_snapshot_topology_hash(
330    options: &SnapshotDownloadOptions,
331    discovery_topology_hash: &TopologyHash,
332) -> Result<TopologyHash, SnapshotCommandError> {
333    if options.dry_run {
334        return Ok(discovery_topology_hash.clone());
335    }
336
337    let pre_snapshot_targets = resolve_targets(options)?;
338    let pre_snapshot_topology_hash = topology_hash_for_targets(options, &pre_snapshot_targets)?;
339    ensure_topology_stable(discovery_topology_hash, &pre_snapshot_topology_hash)?;
340    Ok(pre_snapshot_topology_hash)
341}
342
343// Print the planned commands and return a placeholder artifact for dry runs.
344fn dry_run_artifact(
345    options: &SnapshotDownloadOptions,
346    target: &SnapshotTarget,
347    artifact_path: PathBuf,
348) -> SnapshotArtifact {
349    if options.lifecycle.stop_before_snapshot() {
350        println!(
351            "{}",
352            stop_canister_command_display(options, &target.canister_id)
353        );
354    }
355    println!(
356        "{}",
357        create_snapshot_command_display(options, &target.canister_id)
358    );
359    println!(
360        "{}",
361        download_snapshot_command_display(options, &target.canister_id, "<snapshot-id>")
362    );
363    if options.lifecycle.resume_after_snapshot() {
364        println!(
365            "{}",
366            start_canister_command_display(options, &target.canister_id)
367        );
368    }
369
370    SnapshotArtifact {
371        canister_id: target.canister_id.clone(),
372        snapshot_id: "<snapshot-id>".to_string(),
373        path: artifact_path,
374        checksum: "<sha256>".to_string(),
375    }
376}
377
378// Create, download, checksum, and finalize one durable snapshot artifact.
379fn capture_snapshot_artifact(
380    options: &SnapshotDownloadOptions,
381    layout: &BackupLayout,
382    journal: &mut DownloadJournal,
383    target: &SnapshotTarget,
384    artifact_relative_path: &Path,
385    artifact_path: PathBuf,
386    temp_path: PathBuf,
387) -> Result<SnapshotArtifact, SnapshotCommandError> {
388    with_optional_stop(options, &target.canister_id, || {
389        journal.operation_metrics.snapshot_create_started += 1;
390        let snapshot_id = create_snapshot(options, &target.canister_id)?;
391        journal.operation_metrics.snapshot_create_completed += 1;
392        let mut entry = ArtifactJournalEntry {
393            canister_id: target.canister_id.clone(),
394            snapshot_id: snapshot_id.clone(),
395            state: ArtifactState::Created,
396            temp_path: None,
397            artifact_path: artifact_relative_path.display().to_string(),
398            checksum_algorithm: "sha256".to_string(),
399            checksum: None,
400            updated_at: timestamp_placeholder(),
401        };
402        journal.artifacts.push(entry.clone());
403        layout.write_journal(journal)?;
404
405        if temp_path.exists() {
406            fs::remove_dir_all(&temp_path)?;
407        }
408        fs::create_dir_all(&temp_path)?;
409        journal.operation_metrics.snapshot_download_started += 1;
410        layout.write_journal(journal)?;
411        download_snapshot(options, &target.canister_id, &snapshot_id, &temp_path)?;
412        journal.operation_metrics.snapshot_download_completed += 1;
413        entry.advance_to(ArtifactState::Downloaded, timestamp_placeholder())?;
414        entry.temp_path = Some(temp_path.display().to_string());
415        update_journal_entry(journal, &entry);
416        layout.write_journal(journal)?;
417
418        journal.operation_metrics.checksum_verify_started += 1;
419        layout.write_journal(journal)?;
420        let checksum = ArtifactChecksum::from_path(&temp_path)?;
421        journal.operation_metrics.checksum_verify_completed += 1;
422        entry.checksum = Some(checksum.hash.clone());
423        entry.advance_to(ArtifactState::ChecksumVerified, timestamp_placeholder())?;
424        update_journal_entry(journal, &entry);
425        layout.write_journal(journal)?;
426
427        journal.operation_metrics.artifact_finalize_started += 1;
428        layout.write_journal(journal)?;
429        if artifact_path.exists() {
430            return Err(std::io::Error::new(
431                std::io::ErrorKind::AlreadyExists,
432                format!("artifact path already exists: {}", artifact_path.display()),
433            )
434            .into());
435        }
436        fs::rename(&temp_path, &artifact_path)?;
437        journal.operation_metrics.artifact_finalize_completed += 1;
438        entry.temp_path = None;
439        entry.advance_to(ArtifactState::Durable, timestamp_placeholder())?;
440        update_journal_entry(journal, &entry);
441        layout.write_journal(journal)?;
442
443        Ok(SnapshotArtifact {
444            canister_id: target.canister_id.clone(),
445            snapshot_id,
446            path: artifact_path,
447            checksum: checksum.hash,
448        })
449    })
450}
451
452// Replace one artifact row in the mutable journal.
453fn update_journal_entry(journal: &mut DownloadJournal, entry: &ArtifactJournalEntry) {
454    if let Some(existing) = journal.artifacts.iter_mut().find(|existing| {
455        existing.canister_id == entry.canister_id && existing.snapshot_id == entry.snapshot_id
456    }) {
457        *existing = entry.clone();
458    }
459}
460
461/// Resolve the selected canister plus optional direct/recursive children.
462pub fn resolve_targets(
463    options: &SnapshotDownloadOptions,
464) -> Result<Vec<SnapshotTarget>, SnapshotCommandError> {
465    if !options.include_children {
466        return Ok(vec![SnapshotTarget {
467            canister_id: options.canister.clone(),
468            role: None,
469            parent_canister_id: None,
470        }]);
471    }
472
473    let registry = load_registry_entries(options)?;
474    targets_from_registry(&registry, &options.canister, options.recursive)
475}
476
477// Load registry entries from a file or live root query.
478fn load_registry_entries(
479    options: &SnapshotDownloadOptions,
480) -> Result<Vec<RegistryEntry>, SnapshotCommandError> {
481    let registry_json = if let Some(path) = &options.registry_json {
482        fs::read_to_string(path)?
483    } else if let Some(root) = &options.root {
484        call_subnet_registry(options, root)?
485    } else {
486        return Err(SnapshotCommandError::MissingOption(
487            "--root or --registry-json when using --include-children",
488        ));
489    };
490
491    parse_registry_entries(&registry_json)
492}
493
494// Run `dfx canister call <root> canic_subnet_registry --output json`.
495fn call_subnet_registry(
496    options: &SnapshotDownloadOptions,
497    root: &str,
498) -> Result<String, SnapshotCommandError> {
499    let mut command = Command::new(&options.dfx);
500    command.arg("canister");
501    add_canister_network_args(&mut command, options);
502    command.args(["call", root, "canic_subnet_registry", "--output", "json"]);
503    run_output(&mut command)
504}
505
506// Create one canister snapshot and parse the snapshot id from dfx output.
507fn create_snapshot(
508    options: &SnapshotDownloadOptions,
509    canister_id: &str,
510) -> Result<String, SnapshotCommandError> {
511    let before = list_snapshot_ids(options, canister_id)?;
512    let mut command = Command::new(&options.dfx);
513    command.arg("canister");
514    add_canister_network_args(&mut command, options);
515    command.args(["snapshot", "create", canister_id]);
516    let output = run_output_with_stderr(&mut command)?;
517    if let Some(snapshot_id) = parse_snapshot_id(&output) {
518        return Ok(snapshot_id);
519    }
520
521    let before = before.into_iter().collect::<BTreeSet<_>>();
522    let mut new_ids = list_snapshot_ids(options, canister_id)?
523        .into_iter()
524        .filter(|snapshot_id| !before.contains(snapshot_id))
525        .collect::<Vec<_>>();
526    if new_ids.len() == 1 {
527        Ok(new_ids.remove(0))
528    } else {
529        Err(SnapshotCommandError::SnapshotIdUnavailable(output))
530    }
531}
532
533// List the existing snapshot ids for one canister.
534fn list_snapshot_ids(
535    options: &SnapshotDownloadOptions,
536    canister_id: &str,
537) -> Result<Vec<String>, SnapshotCommandError> {
538    let mut command = Command::new(&options.dfx);
539    command.arg("canister");
540    add_canister_network_args(&mut command, options);
541    command.args(["snapshot", "list", canister_id]);
542    let output = run_output(&mut command)?;
543    Ok(parse_snapshot_list_ids(&output))
544}
545
546// Stop a canister before taking a snapshot when explicitly requested.
547fn stop_canister(
548    options: &SnapshotDownloadOptions,
549    canister_id: &str,
550) -> Result<(), SnapshotCommandError> {
551    let mut command = Command::new(&options.dfx);
552    command.arg("canister");
553    add_canister_network_args(&mut command, options);
554    command.args(["stop", canister_id]);
555    run_status(&mut command)
556}
557
558// Start a canister after snapshot capture when explicitly requested.
559fn start_canister(
560    options: &SnapshotDownloadOptions,
561    canister_id: &str,
562) -> Result<(), SnapshotCommandError> {
563    let mut command = Command::new(&options.dfx);
564    command.arg("canister");
565    add_canister_network_args(&mut command, options);
566    command.args(["start", canister_id]);
567    run_status(&mut command)
568}
569
570// Run one snapshot operation with optional stop/start lifecycle commands.
571fn with_optional_stop<T>(
572    options: &SnapshotDownloadOptions,
573    canister_id: &str,
574    operation: impl FnOnce() -> Result<T, SnapshotCommandError>,
575) -> Result<T, SnapshotCommandError> {
576    if options.lifecycle.stop_before_snapshot() {
577        stop_canister(options, canister_id)?;
578    }
579
580    let result = operation();
581
582    if options.lifecycle.resume_after_snapshot() {
583        match result {
584            Ok(value) => {
585                start_canister(options, canister_id)?;
586                Ok(value)
587            }
588            Err(error) => {
589                let _ = start_canister(options, canister_id);
590                Err(error)
591            }
592        }
593    } else {
594        result
595    }
596}
597
598// Download one canister snapshot into the target artifact directory.
599fn download_snapshot(
600    options: &SnapshotDownloadOptions,
601    canister_id: &str,
602    snapshot_id: &str,
603    artifact_path: &Path,
604) -> Result<(), SnapshotCommandError> {
605    let mut command = Command::new(&options.dfx);
606    command.arg("canister");
607    add_canister_network_args(&mut command, options);
608    command.args(["snapshot", "download", canister_id, snapshot_id, "--dir"]);
609    command.arg(artifact_path);
610    run_status(&mut command)
611}
612
613// Add optional `dfx canister` network arguments.
614fn add_canister_network_args(command: &mut Command, options: &SnapshotDownloadOptions) {
615    if let Some(network) = &options.network {
616        command.args(["--network", network]);
617    }
618}
619
620// Execute a command and capture stdout.
621fn run_output(command: &mut Command) -> Result<String, SnapshotCommandError> {
622    let display = command_display(command);
623    let output = command.output()?;
624    if output.status.success() {
625        Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
626    } else {
627        Err(SnapshotCommandError::DfxFailed {
628            command: display,
629            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
630        })
631    }
632}
633
634// Execute a command and capture stdout plus stderr on success.
635fn run_output_with_stderr(command: &mut Command) -> Result<String, SnapshotCommandError> {
636    let display = command_display(command);
637    let output = command.output()?;
638    if output.status.success() {
639        let mut text = String::from_utf8_lossy(&output.stdout).to_string();
640        text.push_str(&String::from_utf8_lossy(&output.stderr));
641        Ok(text.trim().to_string())
642    } else {
643        Err(SnapshotCommandError::DfxFailed {
644            command: display,
645            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
646        })
647    }
648}
649
650// Execute a command and require a successful status.
651fn run_status(command: &mut Command) -> Result<(), SnapshotCommandError> {
652    let display = command_display(command);
653    let output = command.output()?;
654    if output.status.success() {
655        Ok(())
656    } else {
657        Err(SnapshotCommandError::DfxFailed {
658            command: display,
659            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
660        })
661    }
662}
663
664// Render a command for diagnostics.
665fn command_display(command: &Command) -> String {
666    let mut parts = vec![command.get_program().to_string_lossy().to_string()];
667    parts.extend(
668        command
669            .get_args()
670            .map(|arg| arg.to_string_lossy().to_string()),
671    );
672    parts.join(" ")
673}
674
675// Render one dry-run create command.
676fn create_snapshot_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
677    let mut command = Command::new(&options.dfx);
678    command.arg("canister");
679    add_canister_network_args(&mut command, options);
680    command.args(["snapshot", "create", canister_id]);
681    command_display(&command)
682}
683
684// Render one dry-run download command.
685fn download_snapshot_command_display(
686    options: &SnapshotDownloadOptions,
687    canister_id: &str,
688    snapshot_id: &str,
689) -> String {
690    let mut command = Command::new(&options.dfx);
691    command.arg("canister");
692    add_canister_network_args(&mut command, options);
693    command.args(["snapshot", "download", canister_id, snapshot_id, "--dir"]);
694    command.arg(options.out.join(safe_path_segment(canister_id)));
695    command_display(&command)
696}
697
698// Render one dry-run stop command.
699fn stop_canister_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
700    let mut command = Command::new(&options.dfx);
701    command.arg("canister");
702    add_canister_network_args(&mut command, options);
703    command.args(["stop", canister_id]);
704    command_display(&command)
705}
706
707// Render one dry-run start command.
708fn start_canister_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
709    let mut command = Command::new(&options.dfx);
710    command.arg("canister");
711    add_canister_network_args(&mut command, options);
712    command.args(["start", canister_id]);
713    command_display(&command)
714}
715
716///
717/// RegistryEntry
718///
719
720#[derive(Clone, Debug, Eq, PartialEq)]
721pub struct RegistryEntry {
722    pub pid: String,
723    pub role: Option<String>,
724    pub parent_pid: Option<String>,
725}
726
727/// Parse the `dfx --output json` subnet registry shape.
728pub fn parse_registry_entries(
729    registry_json: &str,
730) -> Result<Vec<RegistryEntry>, SnapshotCommandError> {
731    let data = serde_json::from_str::<Value>(registry_json)?;
732    let entries = data
733        .get("Ok")
734        .and_then(Value::as_array)
735        .or_else(|| data.as_array())
736        .ok_or(SnapshotCommandError::Usage(
737            "registry JSON must be an array or {\"Ok\": [...]}",
738        ))?;
739
740    Ok(entries.iter().filter_map(parse_registry_entry).collect())
741}
742
743// Parse one registry entry from dfx JSON.
744fn parse_registry_entry(value: &Value) -> Option<RegistryEntry> {
745    let pid = value.get("pid").and_then(Value::as_str)?.to_string();
746    let role = value
747        .get("role")
748        .and_then(Value::as_str)
749        .map(str::to_string);
750    let parent_pid = value
751        .get("record")
752        .and_then(|record| record.get("parent_pid"))
753        .and_then(parse_optional_principal);
754
755    Some(RegistryEntry {
756        pid,
757        role,
758        parent_pid,
759    })
760}
761
762// Parse optional principal JSON emitted as null, string, or optional vector form.
763fn parse_optional_principal(value: &Value) -> Option<String> {
764    if value.is_null() {
765        return None;
766    }
767    if let Some(text) = value.as_str() {
768        return Some(text.to_string());
769    }
770    value
771        .as_array()
772        .and_then(|items| items.first())
773        .and_then(Value::as_str)
774        .map(str::to_string)
775}
776
777/// Resolve selected target and children from registry entries.
778pub fn targets_from_registry(
779    registry: &[RegistryEntry],
780    canister_id: &str,
781    recursive: bool,
782) -> Result<Vec<SnapshotTarget>, SnapshotCommandError> {
783    let by_pid = registry
784        .iter()
785        .map(|entry| (entry.pid.as_str(), entry))
786        .collect::<BTreeMap<_, _>>();
787
788    let root = by_pid
789        .get(canister_id)
790        .ok_or_else(|| SnapshotCommandError::CanisterNotInRegistry(canister_id.to_string()))?;
791
792    let mut targets = Vec::new();
793    let mut seen = BTreeSet::new();
794    targets.push(SnapshotTarget {
795        canister_id: root.pid.clone(),
796        role: root.role.clone(),
797        parent_canister_id: root.parent_pid.clone(),
798    });
799    seen.insert(root.pid.clone());
800
801    let mut queue = VecDeque::from([root.pid.clone()]);
802    while let Some(parent) = queue.pop_front() {
803        for child in registry
804            .iter()
805            .filter(|entry| entry.parent_pid.as_deref() == Some(parent.as_str()))
806        {
807            if seen.insert(child.pid.clone()) {
808                targets.push(SnapshotTarget {
809                    canister_id: child.pid.clone(),
810                    role: child.role.clone(),
811                    parent_canister_id: child.parent_pid.clone(),
812                });
813                if recursive {
814                    queue.push_back(child.pid.clone());
815                }
816            }
817        }
818    }
819
820    Ok(targets)
821}
822
823// Build a validated manifest for one successful snapshot download run.
824fn build_manifest(
825    options: &SnapshotDownloadOptions,
826    targets: &[SnapshotTarget],
827    artifacts: &[SnapshotArtifact],
828    discovery_topology_hash: TopologyHash,
829    pre_snapshot_topology_hash: TopologyHash,
830) -> Result<FleetBackupManifest, SnapshotCommandError> {
831    let roles = targets
832        .iter()
833        .enumerate()
834        .map(|(index, target)| target_role(options, index, target))
835        .collect::<BTreeSet<_>>()
836        .into_iter()
837        .collect::<Vec<_>>();
838
839    let manifest = FleetBackupManifest {
840        manifest_version: 1,
841        backup_id: backup_id(options),
842        created_at: timestamp_placeholder(),
843        tool: ToolMetadata {
844            name: "canic-cli".to_string(),
845            version: env!("CARGO_PKG_VERSION").to_string(),
846        },
847        source: SourceMetadata {
848            environment: options
849                .network
850                .clone()
851                .unwrap_or_else(|| "local".to_string()),
852            root_canister: options
853                .root
854                .clone()
855                .unwrap_or_else(|| options.canister.clone()),
856        },
857        consistency: ConsistencySection {
858            mode: ConsistencyMode::CrashConsistent,
859            backup_units: vec![BackupUnit {
860                unit_id: "snapshot-selection".to_string(),
861                kind: if options.include_children {
862                    BackupUnitKind::SubtreeRooted
863                } else {
864                    BackupUnitKind::Flat
865                },
866                roles,
867                consistency_reason: if options.include_children {
868                    None
869                } else {
870                    Some("explicit single-canister snapshot selection".to_string())
871                },
872                dependency_closure: Vec::new(),
873                topology_validation: if options.include_children {
874                    "registry-subtree-selection".to_string()
875                } else {
876                    "explicit-selection".to_string()
877                },
878                quiescence_strategy: None,
879            }],
880        },
881        fleet: FleetSection {
882            topology_hash_algorithm: discovery_topology_hash.algorithm,
883            topology_hash_input: discovery_topology_hash.input,
884            discovery_topology_hash: discovery_topology_hash.hash.clone(),
885            pre_snapshot_topology_hash: pre_snapshot_topology_hash.hash,
886            topology_hash: discovery_topology_hash.hash,
887            members: targets
888                .iter()
889                .enumerate()
890                .map(|(index, target)| fleet_member(options, index, target, artifacts))
891                .collect::<Result<Vec<_>, _>>()?,
892        },
893        verification: VerificationPlan::default(),
894    };
895
896    manifest.validate()?;
897    Ok(manifest)
898}
899
900// Compute the canonical topology hash for one resolved target set.
901fn topology_hash_for_targets(
902    options: &SnapshotDownloadOptions,
903    targets: &[SnapshotTarget],
904) -> Result<TopologyHash, SnapshotCommandError> {
905    let topology_records = targets
906        .iter()
907        .enumerate()
908        .map(|(index, target)| topology_record(options, index, target))
909        .collect::<Result<Vec<_>, _>>()?;
910    Ok(TopologyHasher::hash(&topology_records))
911}
912
913// Fail closed if topology changes after discovery but before snapshot creation.
914fn ensure_topology_stable(
915    discovery: &TopologyHash,
916    pre_snapshot: &TopologyHash,
917) -> Result<(), SnapshotCommandError> {
918    if discovery.hash == pre_snapshot.hash {
919        return Ok(());
920    }
921
922    Err(SnapshotCommandError::TopologyChanged {
923        discovery: discovery.hash.clone(),
924        pre_snapshot: pre_snapshot.hash.clone(),
925    })
926}
927
928// Build one canonical topology record for manifest hashing.
929fn topology_record(
930    options: &SnapshotDownloadOptions,
931    index: usize,
932    target: &SnapshotTarget,
933) -> Result<TopologyRecord, SnapshotCommandError> {
934    Ok(TopologyRecord {
935        pid: parse_principal("fleet.members[].canister_id", &target.canister_id)?,
936        parent_pid: target
937            .parent_canister_id
938            .as_deref()
939            .map(|parent| parse_principal("fleet.members[].parent_canister_id", parent))
940            .transpose()?,
941        role: target_role(options, index, target),
942        module_hash: None,
943    })
944}
945
946// Build one manifest member from a captured durable artifact.
947fn fleet_member(
948    options: &SnapshotDownloadOptions,
949    index: usize,
950    target: &SnapshotTarget,
951    artifacts: &[SnapshotArtifact],
952) -> Result<FleetMember, SnapshotCommandError> {
953    let Some(artifact) = artifacts
954        .iter()
955        .find(|artifact| artifact.canister_id == target.canister_id)
956    else {
957        return Err(SnapshotCommandError::SnapshotIdUnavailable(format!(
958            "missing artifact for {}",
959            target.canister_id
960        )));
961    };
962    let role = target_role(options, index, target);
963
964    Ok(FleetMember {
965        role: role.clone(),
966        canister_id: target.canister_id.clone(),
967        parent_canister_id: target.parent_canister_id.clone(),
968        subnet_canister_id: options.root.clone(),
969        controller_hint: None,
970        identity_mode: if target.canister_id == options.canister {
971            IdentityMode::Fixed
972        } else {
973            IdentityMode::Relocatable
974        },
975        restore_group: if target.canister_id == options.canister {
976            1
977        } else {
978            2
979        },
980        verification_class: "basic".to_string(),
981        verification_checks: vec![VerificationCheck {
982            kind: "status".to_string(),
983            method: None,
984            roles: vec![role],
985        }],
986        source_snapshot: SourceSnapshot {
987            snapshot_id: artifact.snapshot_id.clone(),
988            module_hash: None,
989            wasm_hash: None,
990            code_version: None,
991            artifact_path: safe_path_segment(&target.canister_id),
992            checksum_algorithm: "sha256".to_string(),
993            checksum: Some(artifact.checksum.clone()),
994        },
995    })
996}
997
998// Return the manifest role for one selected snapshot target.
999fn target_role(options: &SnapshotDownloadOptions, index: usize, target: &SnapshotTarget) -> String {
1000    target.role.clone().unwrap_or_else(|| {
1001        if target.canister_id == options.canister {
1002            "root".to_string()
1003        } else {
1004            format!("member-{index}")
1005        }
1006    })
1007}
1008
1009// Parse one principal used by generated topology manifest metadata.
1010fn parse_principal(field: &'static str, value: &str) -> Result<Principal, SnapshotCommandError> {
1011    Principal::from_text(value).map_err(|_| SnapshotCommandError::InvalidPrincipal {
1012        field,
1013        value: value.to_string(),
1014    })
1015}
1016
1017// Parse a likely snapshot id from dfx output.
1018fn parse_snapshot_id(output: &str) -> Option<String> {
1019    output
1020        .split(|c: char| c.is_whitespace() || matches!(c, '"' | '\'' | ':' | ','))
1021        .filter(|part| !part.is_empty())
1022        .rev()
1023        .find(|part| {
1024            part.chars()
1025                .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.'))
1026        })
1027        .map(str::to_string)
1028}
1029
1030// Parse dfx snapshot list output into snapshot ids.
1031fn parse_snapshot_list_ids(output: &str) -> Vec<String> {
1032    output
1033        .lines()
1034        .filter_map(|line| {
1035            line.split_once(':')
1036                .map(|(snapshot_id, _)| snapshot_id.trim())
1037        })
1038        .filter(|snapshot_id| !snapshot_id.is_empty())
1039        .map(str::to_string)
1040        .collect()
1041}
1042
1043// Convert a principal into a conservative filesystem path segment.
1044fn safe_path_segment(value: &str) -> String {
1045    value
1046        .chars()
1047        .map(|c| {
1048            if c.is_ascii_alphanumeric() || matches!(c, '-' | '_') {
1049                c
1050            } else {
1051                '_'
1052            }
1053        })
1054        .collect()
1055}
1056
1057// Build a stable backup id for this command's output directory.
1058fn backup_id(options: &SnapshotDownloadOptions) -> String {
1059    options
1060        .out
1061        .file_name()
1062        .and_then(|name| name.to_str())
1063        .map_or_else(|| "snapshot-download".to_string(), str::to_string)
1064}
1065
1066// Return a placeholder timestamp until the CLI owns a clock abstraction.
1067fn timestamp_placeholder() -> String {
1068    "unknown".to_string()
1069}
1070
1071// Read the next required option value.
1072fn next_value<I>(args: &mut I, option: &'static str) -> Result<String, SnapshotCommandError>
1073where
1074    I: Iterator<Item = OsString>,
1075{
1076    args.next()
1077        .and_then(|value| value.into_string().ok())
1078        .ok_or(SnapshotCommandError::MissingValue(option))
1079}
1080
1081// Return snapshot command usage text.
1082const fn usage() -> &'static str {
1083    "usage: canic snapshot download --canister <id> --out <dir> [--root <id> | --registry-json <file>] [--include-children] [--recursive] [--dry-run] [--stop-before-snapshot] [--resume-after-snapshot] [--network <name>]"
1084}
1085
1086#[cfg(test)]
1087mod tests {
1088    use super::*;
1089    use canic_backup::persistence::BackupLayout;
1090    use serde_json::json;
1091    use std::time::{SystemTime, UNIX_EPOCH};
1092
1093    const ROOT: &str = "aaaaa-aa";
1094    const CHILD: &str = "renrk-eyaaa-aaaaa-aaada-cai";
1095    const GRANDCHILD: &str = "rno2w-sqaaa-aaaaa-aaacq-cai";
1096    const HASH: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
1097
1098    // Ensure dfx registry JSON parses in the wrapped Ok shape.
1099    #[test]
1100    fn parses_wrapped_registry_json() {
1101        let json = registry_json();
1102
1103        let entries = parse_registry_entries(&json).expect("parse registry");
1104
1105        assert_eq!(entries.len(), 3);
1106        assert_eq!(entries[1].parent_pid.as_deref(), Some(ROOT));
1107    }
1108
1109    // Ensure direct-child resolution includes only one level.
1110    #[test]
1111    fn targets_include_direct_children() {
1112        let entries = parse_registry_entries(&registry_json()).expect("parse registry");
1113
1114        let targets = targets_from_registry(&entries, ROOT, false).expect("resolve targets");
1115
1116        assert_eq!(
1117            targets
1118                .iter()
1119                .map(|target| target.canister_id.as_str())
1120                .collect::<Vec<_>>(),
1121            vec![ROOT, CHILD]
1122        );
1123    }
1124
1125    // Ensure recursive resolution walks descendants.
1126    #[test]
1127    fn targets_include_recursive_children() {
1128        let entries = parse_registry_entries(&registry_json()).expect("parse registry");
1129
1130        let targets = targets_from_registry(&entries, ROOT, true).expect("resolve targets");
1131
1132        assert_eq!(
1133            targets
1134                .iter()
1135                .map(|target| target.canister_id.as_str())
1136                .collect::<Vec<_>>(),
1137            vec![ROOT, CHILD, GRANDCHILD]
1138        );
1139    }
1140
1141    // Ensure snapshot ids can be extracted from common command output.
1142    #[test]
1143    fn parses_snapshot_id_from_output() {
1144        let snapshot_id = parse_snapshot_id("Created snapshot: snap_abc-123\n");
1145
1146        assert_eq!(snapshot_id.as_deref(), Some("snap_abc-123"));
1147    }
1148
1149    // Ensure dfx snapshot list output can be used when create is quiet.
1150    #[test]
1151    fn parses_snapshot_ids_from_list_output() {
1152        let snapshot_ids = parse_snapshot_list_ids(
1153            "0000000000000000ffffffffff9000050101: 213.76 MiB, taken at 2026-05-03 12:20:53 UTC\n",
1154        );
1155
1156        assert_eq!(snapshot_ids, vec!["0000000000000000ffffffffff9000050101"]);
1157    }
1158
1159    // Ensure option parsing covers the intended dry-run command.
1160    #[test]
1161    fn parses_download_options() {
1162        let options = SnapshotDownloadOptions::parse([
1163            OsString::from("--canister"),
1164            OsString::from(ROOT),
1165            OsString::from("--out"),
1166            OsString::from("backups/test"),
1167            OsString::from("--registry-json"),
1168            OsString::from("registry.json"),
1169            OsString::from("--recursive"),
1170            OsString::from("--dry-run"),
1171            OsString::from("--stop-before-snapshot"),
1172            OsString::from("--resume-after-snapshot"),
1173        ])
1174        .expect("parse options");
1175
1176        assert_eq!(options.canister, ROOT);
1177        assert!(options.include_children);
1178        assert!(options.recursive);
1179        assert!(options.dry_run);
1180        assert_eq!(options.lifecycle, SnapshotLifecycleMode::StopAndResume);
1181    }
1182
1183    // Ensure snapshot capture fails closed when topology changes before creation.
1184    #[test]
1185    fn topology_stability_rejects_pre_snapshot_drift() {
1186        let discovery = topology_hash(HASH);
1187        let pre_snapshot =
1188            topology_hash("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff");
1189
1190        let err = ensure_topology_stable(&discovery, &pre_snapshot)
1191            .expect_err("topology drift should fail");
1192
1193        assert!(matches!(err, SnapshotCommandError::TopologyChanged { .. }));
1194    }
1195
1196    // Ensure the actual command path writes a manifest and durable journal.
1197    #[cfg(unix)]
1198    #[test]
1199    fn download_snapshots_writes_manifest_and_durable_journal() {
1200        use std::os::unix::fs::PermissionsExt;
1201
1202        let root = temp_dir("canic-cli-download");
1203        let fake_dfx = root.join("fake-dfx.sh");
1204        fs::create_dir_all(&root).expect("create temp root");
1205        fs::write(
1206            &fake_dfx,
1207            r#"#!/bin/sh
1208set -eu
1209if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "create" ]; then
1210  echo "snapshot-$4"
1211  exit 0
1212fi
1213if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "list" ]; then
1214  exit 0
1215fi
1216if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "download" ]; then
1217  mkdir -p "$7"
1218  printf "%s:%s\n" "$4" "$5" > "$7/snapshot.txt"
1219  exit 0
1220fi
1221echo "unexpected args: $*" >&2
1222exit 1
1223"#,
1224        )
1225        .expect("write fake dfx");
1226        let mut permissions = fs::metadata(&fake_dfx)
1227            .expect("stat fake dfx")
1228            .permissions();
1229        permissions.set_mode(0o755);
1230        fs::set_permissions(&fake_dfx, permissions).expect("chmod fake dfx");
1231
1232        let out = root.join("backup");
1233        let options = SnapshotDownloadOptions {
1234            canister: ROOT.to_string(),
1235            out: out.clone(),
1236            root: None,
1237            registry_json: None,
1238            include_children: false,
1239            recursive: false,
1240            dry_run: false,
1241            lifecycle: SnapshotLifecycleMode::SnapshotOnly,
1242            network: None,
1243            dfx: fake_dfx.display().to_string(),
1244        };
1245
1246        let result = download_snapshots(&options).expect("download snapshots");
1247        let layout = BackupLayout::new(out);
1248        let journal = layout.read_journal().expect("read journal");
1249        let manifest = layout.read_manifest().expect("read manifest");
1250
1251        fs::remove_dir_all(root).expect("remove temp root");
1252        assert_eq!(result.artifacts.len(), 1);
1253        assert_eq!(journal.artifacts.len(), 1);
1254        assert_eq!(journal.operation_metrics.target_count, 1);
1255        assert_eq!(journal.operation_metrics.snapshot_create_started, 1);
1256        assert_eq!(journal.operation_metrics.snapshot_create_completed, 1);
1257        assert_eq!(journal.operation_metrics.snapshot_download_started, 1);
1258        assert_eq!(journal.operation_metrics.snapshot_download_completed, 1);
1259        assert_eq!(journal.operation_metrics.checksum_verify_started, 1);
1260        assert_eq!(journal.operation_metrics.checksum_verify_completed, 1);
1261        assert_eq!(journal.operation_metrics.artifact_finalize_started, 1);
1262        assert_eq!(journal.operation_metrics.artifact_finalize_completed, 1);
1263        assert_eq!(journal.artifacts[0].state, ArtifactState::Durable);
1264        assert!(journal.artifacts[0].checksum.is_some());
1265        assert_eq!(manifest.backup_id, journal.backup_id);
1266        assert_eq!(manifest.fleet.members.len(), 1);
1267        assert_eq!(manifest.fleet.members[0].canister_id, ROOT);
1268        assert_eq!(
1269            manifest.fleet.members[0].source_snapshot.snapshot_id,
1270            "snapshot-aaaaa-aa"
1271        );
1272        assert_eq!(
1273            manifest.fleet.members[0]
1274                .source_snapshot
1275                .checksum
1276                .as_deref(),
1277            journal.artifacts[0].checksum.as_deref()
1278        );
1279    }
1280
1281    // Build representative subnet registry JSON.
1282    fn registry_json() -> String {
1283        json!({
1284            "Ok": [
1285                {
1286                    "pid": ROOT,
1287                    "role": "root",
1288                    "record": {
1289                        "pid": ROOT,
1290                        "role": "root",
1291                        "parent_pid": null
1292                    }
1293                },
1294                {
1295                    "pid": CHILD,
1296                    "role": "app",
1297                    "record": {
1298                        "pid": CHILD,
1299                        "role": "app",
1300                        "parent_pid": ROOT
1301                    }
1302                },
1303                {
1304                    "pid": GRANDCHILD,
1305                    "role": "worker",
1306                    "record": {
1307                        "pid": GRANDCHILD,
1308                        "role": "worker",
1309                        "parent_pid": [CHILD]
1310                    }
1311                }
1312            ]
1313        })
1314        .to_string()
1315    }
1316
1317    // Build one topology hash for stability tests.
1318    fn topology_hash(hash: &str) -> TopologyHash {
1319        TopologyHash {
1320            algorithm: "sha256".to_string(),
1321            input: "sorted(pid,parent_pid,role,module_hash)".to_string(),
1322            hash: hash.to_string(),
1323        }
1324    }
1325
1326    // Build a unique temporary directory.
1327    fn temp_dir(prefix: &str) -> PathBuf {
1328        let nanos = SystemTime::now()
1329            .duration_since(UNIX_EPOCH)
1330            .expect("system time after epoch")
1331            .as_nanos();
1332        std::env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
1333    }
1334}