Skip to main content

canic_cli/snapshot/
mod.rs

1use candid::Principal;
2use canic_backup::{
3    artifacts::{ArtifactChecksum, ArtifactChecksumError},
4    journal::JournalValidationError,
5    journal::{ArtifactJournalEntry, ArtifactState, DownloadJournal},
6    manifest::{
7        BackupUnit, BackupUnitKind, ConsistencyMode, ConsistencySection, FleetBackupManifest,
8        FleetMember, FleetSection, IdentityMode, ManifestValidationError, SourceMetadata,
9        SourceSnapshot, ToolMetadata, VerificationCheck, VerificationPlan,
10    },
11    persistence::{BackupLayout, PersistenceError},
12    topology::{TopologyHash, TopologyHasher, TopologyRecord},
13};
14use serde_json::Value;
15use std::{
16    collections::{BTreeMap, BTreeSet, VecDeque},
17    ffi::OsString,
18    fs,
19    path::{Path, PathBuf},
20    process::Command,
21};
22use thiserror::Error as ThisError;
23
24///
25/// SnapshotCommandError
26///
27
28#[derive(Debug, ThisError)]
29pub enum SnapshotCommandError {
30    #[error("{0}")]
31    Usage(&'static str),
32
33    #[error("missing required option {0}")]
34    MissingOption(&'static str),
35
36    #[error("unknown option {0}")]
37    UnknownOption(String),
38
39    #[error("option {0} requires a value")]
40    MissingValue(&'static str),
41
42    #[error("cannot combine --root and --registry-json")]
43    ConflictingRegistrySources,
44
45    #[error("registry JSON did not contain the requested canister {0}")]
46    CanisterNotInRegistry(String),
47
48    #[error("dfx command failed: {command}\n{stderr}")]
49    DfxFailed { command: String, stderr: String },
50
51    #[error("could not parse snapshot id from dfx output: {0}")]
52    SnapshotIdUnavailable(String),
53
54    #[error("field {field} must be a valid principal: {value}")]
55    InvalidPrincipal { field: &'static str, value: String },
56
57    #[error(
58        "topology changed before snapshot start: discovery={discovery}, pre_snapshot={pre_snapshot}"
59    )]
60    TopologyChanged {
61        discovery: String,
62        pre_snapshot: String,
63    },
64
65    #[error(transparent)]
66    Io(#[from] std::io::Error),
67
68    #[error(transparent)]
69    Json(#[from] serde_json::Error),
70
71    #[error(transparent)]
72    Checksum(#[from] ArtifactChecksumError),
73
74    #[error(transparent)]
75    Persistence(#[from] PersistenceError),
76
77    #[error(transparent)]
78    Journal(#[from] JournalValidationError),
79
80    #[error(transparent)]
81    InvalidManifest(#[from] ManifestValidationError),
82}
83
84///
85/// SnapshotDownloadOptions
86///
87
88#[derive(Clone, Debug, Eq, PartialEq)]
89pub struct SnapshotDownloadOptions {
90    pub canister: String,
91    pub out: PathBuf,
92    pub root: Option<String>,
93    pub registry_json: Option<PathBuf>,
94    pub include_children: bool,
95    pub recursive: bool,
96    pub dry_run: bool,
97    pub lifecycle: SnapshotLifecycleMode,
98    pub network: Option<String>,
99    pub dfx: String,
100}
101
102impl SnapshotDownloadOptions {
103    /// Parse snapshot download options from CLI arguments.
104    pub fn parse<I>(args: I) -> Result<Self, SnapshotCommandError>
105    where
106        I: IntoIterator<Item = OsString>,
107    {
108        let mut canister = None;
109        let mut out = None;
110        let mut root = None;
111        let mut registry_json = None;
112        let mut include_children = false;
113        let mut recursive = false;
114        let mut dry_run = false;
115        let mut stop_before_snapshot = false;
116        let mut resume_after_snapshot = false;
117        let mut network = None;
118        let mut dfx = "dfx".to_string();
119
120        let mut args = args.into_iter();
121        while let Some(arg) = args.next() {
122            let arg = arg
123                .into_string()
124                .map_err(|_| SnapshotCommandError::Usage(usage()))?;
125            match arg.as_str() {
126                "--canister" => canister = Some(next_value(&mut args, "--canister")?),
127                "--out" => out = Some(PathBuf::from(next_value(&mut args, "--out")?)),
128                "--root" => root = Some(next_value(&mut args, "--root")?),
129                "--registry-json" => {
130                    registry_json = Some(PathBuf::from(next_value(&mut args, "--registry-json")?));
131                }
132                "--include-children" => include_children = true,
133                "--recursive" => {
134                    recursive = true;
135                    include_children = true;
136                }
137                "--dry-run" => dry_run = true,
138                "--stop-before-snapshot" => stop_before_snapshot = true,
139                "--resume-after-snapshot" => resume_after_snapshot = true,
140                "--network" => network = Some(next_value(&mut args, "--network")?),
141                "--dfx" => dfx = next_value(&mut args, "--dfx")?,
142                "--help" | "-h" => return Err(SnapshotCommandError::Usage(usage())),
143                _ => return Err(SnapshotCommandError::UnknownOption(arg)),
144            }
145        }
146
147        if root.is_some() && registry_json.is_some() {
148            return Err(SnapshotCommandError::ConflictingRegistrySources);
149        }
150
151        Ok(Self {
152            canister: canister.ok_or(SnapshotCommandError::MissingOption("--canister"))?,
153            out: out.ok_or(SnapshotCommandError::MissingOption("--out"))?,
154            root,
155            registry_json,
156            include_children,
157            recursive,
158            dry_run,
159            lifecycle: SnapshotLifecycleMode::from_flags(
160                stop_before_snapshot,
161                resume_after_snapshot,
162            ),
163            network,
164            dfx,
165        })
166    }
167}
168
169///
170/// SnapshotLifecycleMode
171///
172
173#[derive(Clone, Copy, Debug, Eq, PartialEq)]
174pub enum SnapshotLifecycleMode {
175    SnapshotOnly,
176    StopBeforeSnapshot,
177    ResumeAfterSnapshot,
178    StopAndResume,
179}
180
181impl SnapshotLifecycleMode {
182    /// Build the lifecycle mode from CLI stop/resume flags.
183    #[must_use]
184    pub const fn from_flags(stop_before_snapshot: bool, resume_after_snapshot: bool) -> Self {
185        match (stop_before_snapshot, resume_after_snapshot) {
186            (false, false) => Self::SnapshotOnly,
187            (true, false) => Self::StopBeforeSnapshot,
188            (false, true) => Self::ResumeAfterSnapshot,
189            (true, true) => Self::StopAndResume,
190        }
191    }
192
193    /// Return whether the CLI should stop before snapshot creation.
194    #[must_use]
195    pub const fn stop_before_snapshot(self) -> bool {
196        matches!(self, Self::StopBeforeSnapshot | Self::StopAndResume)
197    }
198
199    /// Return whether the CLI should start after snapshot capture.
200    #[must_use]
201    pub const fn resume_after_snapshot(self) -> bool {
202        matches!(self, Self::ResumeAfterSnapshot | Self::StopAndResume)
203    }
204}
205
206///
207/// SnapshotTarget
208///
209
210#[derive(Clone, Debug, Eq, PartialEq)]
211pub struct SnapshotTarget {
212    pub canister_id: String,
213    pub role: Option<String>,
214    pub parent_canister_id: Option<String>,
215}
216
217/// Run a snapshot subcommand.
218pub fn run<I>(args: I) -> Result<(), SnapshotCommandError>
219where
220    I: IntoIterator<Item = OsString>,
221{
222    let mut args = args.into_iter();
223    let Some(command) = args.next().and_then(|arg| arg.into_string().ok()) else {
224        return Err(SnapshotCommandError::Usage(usage()));
225    };
226
227    match command.as_str() {
228        "download" => {
229            let options = SnapshotDownloadOptions::parse(args)?;
230            let result = download_snapshots(&options)?;
231            for artifact in result.artifacts {
232                println!(
233                    "{} {} {}",
234                    artifact.canister_id,
235                    artifact.snapshot_id,
236                    artifact.path.display()
237                );
238            }
239            Ok(())
240        }
241        "help" | "--help" | "-h" => Err(SnapshotCommandError::Usage(usage())),
242        _ => Err(SnapshotCommandError::UnknownOption(command)),
243    }
244}
245
246///
247/// SnapshotDownloadResult
248///
249
250#[derive(Clone, Debug, Eq, PartialEq)]
251pub struct SnapshotDownloadResult {
252    pub artifacts: Vec<SnapshotArtifact>,
253}
254
255///
256/// SnapshotArtifact
257///
258
259#[derive(Clone, Debug, Eq, PartialEq)]
260pub struct SnapshotArtifact {
261    pub canister_id: String,
262    pub snapshot_id: String,
263    pub path: PathBuf,
264    pub checksum: String,
265}
266
267/// Create and download snapshots for the selected canister set.
268pub fn download_snapshots(
269    options: &SnapshotDownloadOptions,
270) -> Result<SnapshotDownloadResult, SnapshotCommandError> {
271    let targets = resolve_targets(options)?;
272    let discovery_topology_hash = topology_hash_for_targets(options, &targets)?;
273    let pre_snapshot_topology_hash =
274        accepted_pre_snapshot_topology_hash(options, &discovery_topology_hash)?;
275    let mut artifacts = Vec::with_capacity(targets.len());
276    let mut journal = DownloadJournal {
277        journal_version: 1,
278        backup_id: backup_id(options),
279        discovery_topology_hash: Some(discovery_topology_hash.hash.clone()),
280        pre_snapshot_topology_hash: Some(pre_snapshot_topology_hash.hash.clone()),
281        artifacts: Vec::new(),
282    };
283    let layout = BackupLayout::new(options.out.clone());
284
285    for target in &targets {
286        let artifact_relative_path = PathBuf::from(safe_path_segment(&target.canister_id));
287        let artifact_path = options.out.join(&artifact_relative_path);
288        let temp_path = options
289            .out
290            .join(format!("{}.tmp", safe_path_segment(&target.canister_id)));
291
292        if options.dry_run {
293            artifacts.push(dry_run_artifact(options, target, artifact_path));
294            continue;
295        }
296
297        artifacts.push(capture_snapshot_artifact(
298            options,
299            &layout,
300            &mut journal,
301            target,
302            &artifact_relative_path,
303            artifact_path,
304            temp_path,
305        )?);
306    }
307
308    if !options.dry_run {
309        let manifest = build_manifest(
310            options,
311            &targets,
312            &artifacts,
313            discovery_topology_hash,
314            pre_snapshot_topology_hash,
315        )?;
316        layout.write_manifest(&manifest)?;
317    }
318
319    Ok(SnapshotDownloadResult { artifacts })
320}
321
322// Resolve and verify the pre-snapshot topology hash before any mutation.
323fn accepted_pre_snapshot_topology_hash(
324    options: &SnapshotDownloadOptions,
325    discovery_topology_hash: &TopologyHash,
326) -> Result<TopologyHash, SnapshotCommandError> {
327    if options.dry_run {
328        return Ok(discovery_topology_hash.clone());
329    }
330
331    let pre_snapshot_targets = resolve_targets(options)?;
332    let pre_snapshot_topology_hash = topology_hash_for_targets(options, &pre_snapshot_targets)?;
333    ensure_topology_stable(discovery_topology_hash, &pre_snapshot_topology_hash)?;
334    Ok(pre_snapshot_topology_hash)
335}
336
337// Print the planned commands and return a placeholder artifact for dry runs.
338fn dry_run_artifact(
339    options: &SnapshotDownloadOptions,
340    target: &SnapshotTarget,
341    artifact_path: PathBuf,
342) -> SnapshotArtifact {
343    if options.lifecycle.stop_before_snapshot() {
344        println!(
345            "{}",
346            stop_canister_command_display(options, &target.canister_id)
347        );
348    }
349    println!(
350        "{}",
351        create_snapshot_command_display(options, &target.canister_id)
352    );
353    println!(
354        "{}",
355        download_snapshot_command_display(options, &target.canister_id, "<snapshot-id>")
356    );
357    if options.lifecycle.resume_after_snapshot() {
358        println!(
359            "{}",
360            start_canister_command_display(options, &target.canister_id)
361        );
362    }
363
364    SnapshotArtifact {
365        canister_id: target.canister_id.clone(),
366        snapshot_id: "<snapshot-id>".to_string(),
367        path: artifact_path,
368        checksum: "<sha256>".to_string(),
369    }
370}
371
372// Create, download, checksum, and finalize one durable snapshot artifact.
373fn capture_snapshot_artifact(
374    options: &SnapshotDownloadOptions,
375    layout: &BackupLayout,
376    journal: &mut DownloadJournal,
377    target: &SnapshotTarget,
378    artifact_relative_path: &Path,
379    artifact_path: PathBuf,
380    temp_path: PathBuf,
381) -> Result<SnapshotArtifact, SnapshotCommandError> {
382    with_optional_stop(options, &target.canister_id, || {
383        let snapshot_id = create_snapshot(options, &target.canister_id)?;
384        let mut entry = ArtifactJournalEntry {
385            canister_id: target.canister_id.clone(),
386            snapshot_id: snapshot_id.clone(),
387            state: ArtifactState::Created,
388            temp_path: None,
389            artifact_path: artifact_relative_path.display().to_string(),
390            checksum_algorithm: "sha256".to_string(),
391            checksum: None,
392            updated_at: timestamp_placeholder(),
393        };
394        journal.artifacts.push(entry.clone());
395        layout.write_journal(journal)?;
396
397        if temp_path.exists() {
398            fs::remove_dir_all(&temp_path)?;
399        }
400        fs::create_dir_all(&temp_path)?;
401        download_snapshot(options, &target.canister_id, &snapshot_id, &temp_path)?;
402        entry.advance_to(ArtifactState::Downloaded, timestamp_placeholder())?;
403        entry.temp_path = Some(temp_path.display().to_string());
404        update_journal_entry(journal, &entry);
405        layout.write_journal(journal)?;
406
407        let checksum = ArtifactChecksum::from_path(&temp_path)?;
408        entry.checksum = Some(checksum.hash.clone());
409        entry.advance_to(ArtifactState::ChecksumVerified, timestamp_placeholder())?;
410        update_journal_entry(journal, &entry);
411        layout.write_journal(journal)?;
412
413        if artifact_path.exists() {
414            return Err(std::io::Error::new(
415                std::io::ErrorKind::AlreadyExists,
416                format!("artifact path already exists: {}", artifact_path.display()),
417            )
418            .into());
419        }
420        fs::rename(&temp_path, &artifact_path)?;
421        entry.temp_path = None;
422        entry.advance_to(ArtifactState::Durable, timestamp_placeholder())?;
423        update_journal_entry(journal, &entry);
424        layout.write_journal(journal)?;
425
426        Ok(SnapshotArtifact {
427            canister_id: target.canister_id.clone(),
428            snapshot_id,
429            path: artifact_path,
430            checksum: checksum.hash,
431        })
432    })
433}
434
435// Replace one artifact row in the mutable journal.
436fn update_journal_entry(journal: &mut DownloadJournal, entry: &ArtifactJournalEntry) {
437    if let Some(existing) = journal.artifacts.iter_mut().find(|existing| {
438        existing.canister_id == entry.canister_id && existing.snapshot_id == entry.snapshot_id
439    }) {
440        *existing = entry.clone();
441    }
442}
443
444/// Resolve the selected canister plus optional direct/recursive children.
445pub fn resolve_targets(
446    options: &SnapshotDownloadOptions,
447) -> Result<Vec<SnapshotTarget>, SnapshotCommandError> {
448    if !options.include_children {
449        return Ok(vec![SnapshotTarget {
450            canister_id: options.canister.clone(),
451            role: None,
452            parent_canister_id: None,
453        }]);
454    }
455
456    let registry = load_registry_entries(options)?;
457    targets_from_registry(&registry, &options.canister, options.recursive)
458}
459
460// Load registry entries from a file or live root query.
461fn load_registry_entries(
462    options: &SnapshotDownloadOptions,
463) -> Result<Vec<RegistryEntry>, SnapshotCommandError> {
464    let registry_json = if let Some(path) = &options.registry_json {
465        fs::read_to_string(path)?
466    } else if let Some(root) = &options.root {
467        call_subnet_registry(options, root)?
468    } else {
469        return Err(SnapshotCommandError::MissingOption(
470            "--root or --registry-json when using --include-children",
471        ));
472    };
473
474    parse_registry_entries(&registry_json)
475}
476
477// Run `dfx canister call <root> canic_subnet_registry --output json`.
478fn call_subnet_registry(
479    options: &SnapshotDownloadOptions,
480    root: &str,
481) -> Result<String, SnapshotCommandError> {
482    let mut command = Command::new(&options.dfx);
483    command.arg("canister");
484    add_canister_network_args(&mut command, options);
485    command.args(["call", root, "canic_subnet_registry", "--output", "json"]);
486    run_output(&mut command)
487}
488
489// Create one canister snapshot and parse the snapshot id from dfx output.
490fn create_snapshot(
491    options: &SnapshotDownloadOptions,
492    canister_id: &str,
493) -> Result<String, SnapshotCommandError> {
494    let before = list_snapshot_ids(options, canister_id)?;
495    let mut command = Command::new(&options.dfx);
496    command.arg("canister");
497    add_canister_network_args(&mut command, options);
498    command.args(["snapshot", "create", canister_id]);
499    let output = run_output_with_stderr(&mut command)?;
500    if let Some(snapshot_id) = parse_snapshot_id(&output) {
501        return Ok(snapshot_id);
502    }
503
504    let before = before.into_iter().collect::<BTreeSet<_>>();
505    let mut new_ids = list_snapshot_ids(options, canister_id)?
506        .into_iter()
507        .filter(|snapshot_id| !before.contains(snapshot_id))
508        .collect::<Vec<_>>();
509    if new_ids.len() == 1 {
510        Ok(new_ids.remove(0))
511    } else {
512        Err(SnapshotCommandError::SnapshotIdUnavailable(output))
513    }
514}
515
516// List the existing snapshot ids for one canister.
517fn list_snapshot_ids(
518    options: &SnapshotDownloadOptions,
519    canister_id: &str,
520) -> Result<Vec<String>, SnapshotCommandError> {
521    let mut command = Command::new(&options.dfx);
522    command.arg("canister");
523    add_canister_network_args(&mut command, options);
524    command.args(["snapshot", "list", canister_id]);
525    let output = run_output(&mut command)?;
526    Ok(parse_snapshot_list_ids(&output))
527}
528
529// Stop a canister before taking a snapshot when explicitly requested.
530fn stop_canister(
531    options: &SnapshotDownloadOptions,
532    canister_id: &str,
533) -> Result<(), SnapshotCommandError> {
534    let mut command = Command::new(&options.dfx);
535    command.arg("canister");
536    add_canister_network_args(&mut command, options);
537    command.args(["stop", canister_id]);
538    run_status(&mut command)
539}
540
541// Start a canister after snapshot capture when explicitly requested.
542fn start_canister(
543    options: &SnapshotDownloadOptions,
544    canister_id: &str,
545) -> Result<(), SnapshotCommandError> {
546    let mut command = Command::new(&options.dfx);
547    command.arg("canister");
548    add_canister_network_args(&mut command, options);
549    command.args(["start", canister_id]);
550    run_status(&mut command)
551}
552
553// Run one snapshot operation with optional stop/start lifecycle commands.
554fn with_optional_stop<T>(
555    options: &SnapshotDownloadOptions,
556    canister_id: &str,
557    operation: impl FnOnce() -> Result<T, SnapshotCommandError>,
558) -> Result<T, SnapshotCommandError> {
559    if options.lifecycle.stop_before_snapshot() {
560        stop_canister(options, canister_id)?;
561    }
562
563    let result = operation();
564
565    if options.lifecycle.resume_after_snapshot() {
566        match result {
567            Ok(value) => {
568                start_canister(options, canister_id)?;
569                Ok(value)
570            }
571            Err(error) => {
572                let _ = start_canister(options, canister_id);
573                Err(error)
574            }
575        }
576    } else {
577        result
578    }
579}
580
581// Download one canister snapshot into the target artifact directory.
582fn download_snapshot(
583    options: &SnapshotDownloadOptions,
584    canister_id: &str,
585    snapshot_id: &str,
586    artifact_path: &Path,
587) -> Result<(), SnapshotCommandError> {
588    let mut command = Command::new(&options.dfx);
589    command.arg("canister");
590    add_canister_network_args(&mut command, options);
591    command.args(["snapshot", "download", canister_id, snapshot_id, "--dir"]);
592    command.arg(artifact_path);
593    run_status(&mut command)
594}
595
596// Add optional `dfx canister` network arguments.
597fn add_canister_network_args(command: &mut Command, options: &SnapshotDownloadOptions) {
598    if let Some(network) = &options.network {
599        command.args(["--network", network]);
600    }
601}
602
603// Execute a command and capture stdout.
604fn run_output(command: &mut Command) -> Result<String, SnapshotCommandError> {
605    let display = command_display(command);
606    let output = command.output()?;
607    if output.status.success() {
608        Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
609    } else {
610        Err(SnapshotCommandError::DfxFailed {
611            command: display,
612            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
613        })
614    }
615}
616
617// Execute a command and capture stdout plus stderr on success.
618fn run_output_with_stderr(command: &mut Command) -> Result<String, SnapshotCommandError> {
619    let display = command_display(command);
620    let output = command.output()?;
621    if output.status.success() {
622        let mut text = String::from_utf8_lossy(&output.stdout).to_string();
623        text.push_str(&String::from_utf8_lossy(&output.stderr));
624        Ok(text.trim().to_string())
625    } else {
626        Err(SnapshotCommandError::DfxFailed {
627            command: display,
628            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
629        })
630    }
631}
632
633// Execute a command and require a successful status.
634fn run_status(command: &mut Command) -> Result<(), SnapshotCommandError> {
635    let display = command_display(command);
636    let output = command.output()?;
637    if output.status.success() {
638        Ok(())
639    } else {
640        Err(SnapshotCommandError::DfxFailed {
641            command: display,
642            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
643        })
644    }
645}
646
647// Render a command for diagnostics.
648fn command_display(command: &Command) -> String {
649    let mut parts = vec![command.get_program().to_string_lossy().to_string()];
650    parts.extend(
651        command
652            .get_args()
653            .map(|arg| arg.to_string_lossy().to_string()),
654    );
655    parts.join(" ")
656}
657
658// Render one dry-run create command.
659fn create_snapshot_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
660    let mut command = Command::new(&options.dfx);
661    command.arg("canister");
662    add_canister_network_args(&mut command, options);
663    command.args(["snapshot", "create", canister_id]);
664    command_display(&command)
665}
666
667// Render one dry-run download command.
668fn download_snapshot_command_display(
669    options: &SnapshotDownloadOptions,
670    canister_id: &str,
671    snapshot_id: &str,
672) -> String {
673    let mut command = Command::new(&options.dfx);
674    command.arg("canister");
675    add_canister_network_args(&mut command, options);
676    command.args(["snapshot", "download", canister_id, snapshot_id, "--dir"]);
677    command.arg(options.out.join(safe_path_segment(canister_id)));
678    command_display(&command)
679}
680
681// Render one dry-run stop command.
682fn stop_canister_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
683    let mut command = Command::new(&options.dfx);
684    command.arg("canister");
685    add_canister_network_args(&mut command, options);
686    command.args(["stop", canister_id]);
687    command_display(&command)
688}
689
690// Render one dry-run start command.
691fn start_canister_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
692    let mut command = Command::new(&options.dfx);
693    command.arg("canister");
694    add_canister_network_args(&mut command, options);
695    command.args(["start", canister_id]);
696    command_display(&command)
697}
698
699///
700/// RegistryEntry
701///
702
703#[derive(Clone, Debug, Eq, PartialEq)]
704pub struct RegistryEntry {
705    pub pid: String,
706    pub role: Option<String>,
707    pub parent_pid: Option<String>,
708}
709
710/// Parse the `dfx --output json` subnet registry shape.
711pub fn parse_registry_entries(
712    registry_json: &str,
713) -> Result<Vec<RegistryEntry>, SnapshotCommandError> {
714    let data = serde_json::from_str::<Value>(registry_json)?;
715    let entries = data
716        .get("Ok")
717        .and_then(Value::as_array)
718        .or_else(|| data.as_array())
719        .ok_or(SnapshotCommandError::Usage(
720            "registry JSON must be an array or {\"Ok\": [...]}",
721        ))?;
722
723    Ok(entries.iter().filter_map(parse_registry_entry).collect())
724}
725
726// Parse one registry entry from dfx JSON.
727fn parse_registry_entry(value: &Value) -> Option<RegistryEntry> {
728    let pid = value.get("pid").and_then(Value::as_str)?.to_string();
729    let role = value
730        .get("role")
731        .and_then(Value::as_str)
732        .map(str::to_string);
733    let parent_pid = value
734        .get("record")
735        .and_then(|record| record.get("parent_pid"))
736        .and_then(parse_optional_principal);
737
738    Some(RegistryEntry {
739        pid,
740        role,
741        parent_pid,
742    })
743}
744
745// Parse optional principal JSON emitted as null, string, or optional vector form.
746fn parse_optional_principal(value: &Value) -> Option<String> {
747    if value.is_null() {
748        return None;
749    }
750    if let Some(text) = value.as_str() {
751        return Some(text.to_string());
752    }
753    value
754        .as_array()
755        .and_then(|items| items.first())
756        .and_then(Value::as_str)
757        .map(str::to_string)
758}
759
760/// Resolve selected target and children from registry entries.
761pub fn targets_from_registry(
762    registry: &[RegistryEntry],
763    canister_id: &str,
764    recursive: bool,
765) -> Result<Vec<SnapshotTarget>, SnapshotCommandError> {
766    let by_pid = registry
767        .iter()
768        .map(|entry| (entry.pid.as_str(), entry))
769        .collect::<BTreeMap<_, _>>();
770
771    let root = by_pid
772        .get(canister_id)
773        .ok_or_else(|| SnapshotCommandError::CanisterNotInRegistry(canister_id.to_string()))?;
774
775    let mut targets = Vec::new();
776    let mut seen = BTreeSet::new();
777    targets.push(SnapshotTarget {
778        canister_id: root.pid.clone(),
779        role: root.role.clone(),
780        parent_canister_id: root.parent_pid.clone(),
781    });
782    seen.insert(root.pid.clone());
783
784    let mut queue = VecDeque::from([root.pid.clone()]);
785    while let Some(parent) = queue.pop_front() {
786        for child in registry
787            .iter()
788            .filter(|entry| entry.parent_pid.as_deref() == Some(parent.as_str()))
789        {
790            if seen.insert(child.pid.clone()) {
791                targets.push(SnapshotTarget {
792                    canister_id: child.pid.clone(),
793                    role: child.role.clone(),
794                    parent_canister_id: child.parent_pid.clone(),
795                });
796                if recursive {
797                    queue.push_back(child.pid.clone());
798                }
799            }
800        }
801    }
802
803    Ok(targets)
804}
805
806// Build a validated manifest for one successful snapshot download run.
807fn build_manifest(
808    options: &SnapshotDownloadOptions,
809    targets: &[SnapshotTarget],
810    artifacts: &[SnapshotArtifact],
811    discovery_topology_hash: TopologyHash,
812    pre_snapshot_topology_hash: TopologyHash,
813) -> Result<FleetBackupManifest, SnapshotCommandError> {
814    let roles = targets
815        .iter()
816        .enumerate()
817        .map(|(index, target)| target_role(options, index, target))
818        .collect::<BTreeSet<_>>()
819        .into_iter()
820        .collect::<Vec<_>>();
821
822    let manifest = FleetBackupManifest {
823        manifest_version: 1,
824        backup_id: backup_id(options),
825        created_at: timestamp_placeholder(),
826        tool: ToolMetadata {
827            name: "canic-cli".to_string(),
828            version: env!("CARGO_PKG_VERSION").to_string(),
829        },
830        source: SourceMetadata {
831            environment: options
832                .network
833                .clone()
834                .unwrap_or_else(|| "local".to_string()),
835            root_canister: options
836                .root
837                .clone()
838                .unwrap_or_else(|| options.canister.clone()),
839        },
840        consistency: ConsistencySection {
841            mode: ConsistencyMode::CrashConsistent,
842            backup_units: vec![BackupUnit {
843                unit_id: "snapshot-selection".to_string(),
844                kind: if options.include_children {
845                    BackupUnitKind::SubtreeRooted
846                } else {
847                    BackupUnitKind::Flat
848                },
849                roles,
850                consistency_reason: if options.include_children {
851                    None
852                } else {
853                    Some("explicit single-canister snapshot selection".to_string())
854                },
855                dependency_closure: Vec::new(),
856                topology_validation: if options.include_children {
857                    "registry-subtree-selection".to_string()
858                } else {
859                    "explicit-selection".to_string()
860                },
861                quiescence_strategy: None,
862            }],
863        },
864        fleet: FleetSection {
865            topology_hash_algorithm: discovery_topology_hash.algorithm,
866            topology_hash_input: discovery_topology_hash.input,
867            discovery_topology_hash: discovery_topology_hash.hash.clone(),
868            pre_snapshot_topology_hash: pre_snapshot_topology_hash.hash,
869            topology_hash: discovery_topology_hash.hash,
870            members: targets
871                .iter()
872                .enumerate()
873                .map(|(index, target)| fleet_member(options, index, target, artifacts))
874                .collect::<Result<Vec<_>, _>>()?,
875        },
876        verification: VerificationPlan::default(),
877    };
878
879    manifest.validate()?;
880    Ok(manifest)
881}
882
883// Compute the canonical topology hash for one resolved target set.
884fn topology_hash_for_targets(
885    options: &SnapshotDownloadOptions,
886    targets: &[SnapshotTarget],
887) -> Result<TopologyHash, SnapshotCommandError> {
888    let topology_records = targets
889        .iter()
890        .enumerate()
891        .map(|(index, target)| topology_record(options, index, target))
892        .collect::<Result<Vec<_>, _>>()?;
893    Ok(TopologyHasher::hash(&topology_records))
894}
895
896// Fail closed if topology changes after discovery but before snapshot creation.
897fn ensure_topology_stable(
898    discovery: &TopologyHash,
899    pre_snapshot: &TopologyHash,
900) -> Result<(), SnapshotCommandError> {
901    if discovery.hash == pre_snapshot.hash {
902        return Ok(());
903    }
904
905    Err(SnapshotCommandError::TopologyChanged {
906        discovery: discovery.hash.clone(),
907        pre_snapshot: pre_snapshot.hash.clone(),
908    })
909}
910
911// Build one canonical topology record for manifest hashing.
912fn topology_record(
913    options: &SnapshotDownloadOptions,
914    index: usize,
915    target: &SnapshotTarget,
916) -> Result<TopologyRecord, SnapshotCommandError> {
917    Ok(TopologyRecord {
918        pid: parse_principal("fleet.members[].canister_id", &target.canister_id)?,
919        parent_pid: target
920            .parent_canister_id
921            .as_deref()
922            .map(|parent| parse_principal("fleet.members[].parent_canister_id", parent))
923            .transpose()?,
924        role: target_role(options, index, target),
925        module_hash: None,
926    })
927}
928
929// Build one manifest member from a captured durable artifact.
930fn fleet_member(
931    options: &SnapshotDownloadOptions,
932    index: usize,
933    target: &SnapshotTarget,
934    artifacts: &[SnapshotArtifact],
935) -> Result<FleetMember, SnapshotCommandError> {
936    let Some(artifact) = artifacts
937        .iter()
938        .find(|artifact| artifact.canister_id == target.canister_id)
939    else {
940        return Err(SnapshotCommandError::SnapshotIdUnavailable(format!(
941            "missing artifact for {}",
942            target.canister_id
943        )));
944    };
945    let role = target_role(options, index, target);
946
947    Ok(FleetMember {
948        role: role.clone(),
949        canister_id: target.canister_id.clone(),
950        parent_canister_id: target.parent_canister_id.clone(),
951        subnet_canister_id: options.root.clone(),
952        controller_hint: None,
953        identity_mode: if target.canister_id == options.canister {
954            IdentityMode::Fixed
955        } else {
956            IdentityMode::Relocatable
957        },
958        restore_group: if target.canister_id == options.canister {
959            1
960        } else {
961            2
962        },
963        verification_class: "basic".to_string(),
964        verification_checks: vec![VerificationCheck {
965            kind: "status".to_string(),
966            method: None,
967            roles: vec![role],
968        }],
969        source_snapshot: SourceSnapshot {
970            snapshot_id: artifact.snapshot_id.clone(),
971            module_hash: None,
972            wasm_hash: None,
973            code_version: None,
974            artifact_path: safe_path_segment(&target.canister_id),
975            checksum_algorithm: "sha256".to_string(),
976            checksum: Some(artifact.checksum.clone()),
977        },
978    })
979}
980
981// Return the manifest role for one selected snapshot target.
982fn target_role(options: &SnapshotDownloadOptions, index: usize, target: &SnapshotTarget) -> String {
983    target.role.clone().unwrap_or_else(|| {
984        if target.canister_id == options.canister {
985            "root".to_string()
986        } else {
987            format!("member-{index}")
988        }
989    })
990}
991
992// Parse one principal used by generated topology manifest metadata.
993fn parse_principal(field: &'static str, value: &str) -> Result<Principal, SnapshotCommandError> {
994    Principal::from_text(value).map_err(|_| SnapshotCommandError::InvalidPrincipal {
995        field,
996        value: value.to_string(),
997    })
998}
999
1000// Parse a likely snapshot id from dfx output.
1001fn parse_snapshot_id(output: &str) -> Option<String> {
1002    output
1003        .split(|c: char| c.is_whitespace() || matches!(c, '"' | '\'' | ':' | ','))
1004        .filter(|part| !part.is_empty())
1005        .rev()
1006        .find(|part| {
1007            part.chars()
1008                .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.'))
1009        })
1010        .map(str::to_string)
1011}
1012
1013// Parse dfx snapshot list output into snapshot ids.
1014fn parse_snapshot_list_ids(output: &str) -> Vec<String> {
1015    output
1016        .lines()
1017        .filter_map(|line| {
1018            line.split_once(':')
1019                .map(|(snapshot_id, _)| snapshot_id.trim())
1020        })
1021        .filter(|snapshot_id| !snapshot_id.is_empty())
1022        .map(str::to_string)
1023        .collect()
1024}
1025
1026// Convert a principal into a conservative filesystem path segment.
1027fn safe_path_segment(value: &str) -> String {
1028    value
1029        .chars()
1030        .map(|c| {
1031            if c.is_ascii_alphanumeric() || matches!(c, '-' | '_') {
1032                c
1033            } else {
1034                '_'
1035            }
1036        })
1037        .collect()
1038}
1039
1040// Build a stable backup id for this command's output directory.
1041fn backup_id(options: &SnapshotDownloadOptions) -> String {
1042    options
1043        .out
1044        .file_name()
1045        .and_then(|name| name.to_str())
1046        .map_or_else(|| "snapshot-download".to_string(), str::to_string)
1047}
1048
1049// Return a placeholder timestamp until the CLI owns a clock abstraction.
1050fn timestamp_placeholder() -> String {
1051    "unknown".to_string()
1052}
1053
1054// Read the next required option value.
1055fn next_value<I>(args: &mut I, option: &'static str) -> Result<String, SnapshotCommandError>
1056where
1057    I: Iterator<Item = OsString>,
1058{
1059    args.next()
1060        .and_then(|value| value.into_string().ok())
1061        .ok_or(SnapshotCommandError::MissingValue(option))
1062}
1063
1064// Return snapshot command usage text.
1065const fn usage() -> &'static str {
1066    "usage: canic snapshot download --canister <id> --out <dir> [--root <id> | --registry-json <file>] [--include-children] [--recursive] [--dry-run] [--stop-before-snapshot] [--resume-after-snapshot] [--network <name>]"
1067}
1068
1069#[cfg(test)]
1070mod tests {
1071    use super::*;
1072    use canic_backup::persistence::BackupLayout;
1073    use serde_json::json;
1074    use std::time::{SystemTime, UNIX_EPOCH};
1075
1076    const ROOT: &str = "aaaaa-aa";
1077    const CHILD: &str = "renrk-eyaaa-aaaaa-aaada-cai";
1078    const GRANDCHILD: &str = "rno2w-sqaaa-aaaaa-aaacq-cai";
1079    const HASH: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
1080
1081    // Ensure dfx registry JSON parses in the wrapped Ok shape.
1082    #[test]
1083    fn parses_wrapped_registry_json() {
1084        let json = registry_json();
1085
1086        let entries = parse_registry_entries(&json).expect("parse registry");
1087
1088        assert_eq!(entries.len(), 3);
1089        assert_eq!(entries[1].parent_pid.as_deref(), Some(ROOT));
1090    }
1091
1092    // Ensure direct-child resolution includes only one level.
1093    #[test]
1094    fn targets_include_direct_children() {
1095        let entries = parse_registry_entries(&registry_json()).expect("parse registry");
1096
1097        let targets = targets_from_registry(&entries, ROOT, false).expect("resolve targets");
1098
1099        assert_eq!(
1100            targets
1101                .iter()
1102                .map(|target| target.canister_id.as_str())
1103                .collect::<Vec<_>>(),
1104            vec![ROOT, CHILD]
1105        );
1106    }
1107
1108    // Ensure recursive resolution walks descendants.
1109    #[test]
1110    fn targets_include_recursive_children() {
1111        let entries = parse_registry_entries(&registry_json()).expect("parse registry");
1112
1113        let targets = targets_from_registry(&entries, ROOT, true).expect("resolve targets");
1114
1115        assert_eq!(
1116            targets
1117                .iter()
1118                .map(|target| target.canister_id.as_str())
1119                .collect::<Vec<_>>(),
1120            vec![ROOT, CHILD, GRANDCHILD]
1121        );
1122    }
1123
1124    // Ensure snapshot ids can be extracted from common command output.
1125    #[test]
1126    fn parses_snapshot_id_from_output() {
1127        let snapshot_id = parse_snapshot_id("Created snapshot: snap_abc-123\n");
1128
1129        assert_eq!(snapshot_id.as_deref(), Some("snap_abc-123"));
1130    }
1131
1132    // Ensure dfx snapshot list output can be used when create is quiet.
1133    #[test]
1134    fn parses_snapshot_ids_from_list_output() {
1135        let snapshot_ids = parse_snapshot_list_ids(
1136            "0000000000000000ffffffffff9000050101: 213.76 MiB, taken at 2026-05-03 12:20:53 UTC\n",
1137        );
1138
1139        assert_eq!(snapshot_ids, vec!["0000000000000000ffffffffff9000050101"]);
1140    }
1141
1142    // Ensure option parsing covers the intended dry-run command.
1143    #[test]
1144    fn parses_download_options() {
1145        let options = SnapshotDownloadOptions::parse([
1146            OsString::from("--canister"),
1147            OsString::from(ROOT),
1148            OsString::from("--out"),
1149            OsString::from("backups/test"),
1150            OsString::from("--registry-json"),
1151            OsString::from("registry.json"),
1152            OsString::from("--recursive"),
1153            OsString::from("--dry-run"),
1154            OsString::from("--stop-before-snapshot"),
1155            OsString::from("--resume-after-snapshot"),
1156        ])
1157        .expect("parse options");
1158
1159        assert_eq!(options.canister, ROOT);
1160        assert!(options.include_children);
1161        assert!(options.recursive);
1162        assert!(options.dry_run);
1163        assert_eq!(options.lifecycle, SnapshotLifecycleMode::StopAndResume);
1164    }
1165
1166    // Ensure snapshot capture fails closed when topology changes before creation.
1167    #[test]
1168    fn topology_stability_rejects_pre_snapshot_drift() {
1169        let discovery = topology_hash(HASH);
1170        let pre_snapshot =
1171            topology_hash("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff");
1172
1173        let err = ensure_topology_stable(&discovery, &pre_snapshot)
1174            .expect_err("topology drift should fail");
1175
1176        assert!(matches!(err, SnapshotCommandError::TopologyChanged { .. }));
1177    }
1178
1179    // Ensure the actual command path writes a manifest and durable journal.
1180    #[cfg(unix)]
1181    #[test]
1182    fn download_snapshots_writes_manifest_and_durable_journal() {
1183        use std::os::unix::fs::PermissionsExt;
1184
1185        let root = temp_dir("canic-cli-download");
1186        let fake_dfx = root.join("fake-dfx.sh");
1187        fs::create_dir_all(&root).expect("create temp root");
1188        fs::write(
1189            &fake_dfx,
1190            r#"#!/bin/sh
1191set -eu
1192if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "create" ]; then
1193  echo "snapshot-$4"
1194  exit 0
1195fi
1196if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "list" ]; then
1197  exit 0
1198fi
1199if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "download" ]; then
1200  mkdir -p "$7"
1201  printf "%s:%s\n" "$4" "$5" > "$7/snapshot.txt"
1202  exit 0
1203fi
1204echo "unexpected args: $*" >&2
1205exit 1
1206"#,
1207        )
1208        .expect("write fake dfx");
1209        let mut permissions = fs::metadata(&fake_dfx)
1210            .expect("stat fake dfx")
1211            .permissions();
1212        permissions.set_mode(0o755);
1213        fs::set_permissions(&fake_dfx, permissions).expect("chmod fake dfx");
1214
1215        let out = root.join("backup");
1216        let options = SnapshotDownloadOptions {
1217            canister: ROOT.to_string(),
1218            out: out.clone(),
1219            root: None,
1220            registry_json: None,
1221            include_children: false,
1222            recursive: false,
1223            dry_run: false,
1224            lifecycle: SnapshotLifecycleMode::SnapshotOnly,
1225            network: None,
1226            dfx: fake_dfx.display().to_string(),
1227        };
1228
1229        let result = download_snapshots(&options).expect("download snapshots");
1230        let layout = BackupLayout::new(out);
1231        let journal = layout.read_journal().expect("read journal");
1232        let manifest = layout.read_manifest().expect("read manifest");
1233
1234        fs::remove_dir_all(root).expect("remove temp root");
1235        assert_eq!(result.artifacts.len(), 1);
1236        assert_eq!(journal.artifacts.len(), 1);
1237        assert_eq!(journal.artifacts[0].state, ArtifactState::Durable);
1238        assert!(journal.artifacts[0].checksum.is_some());
1239        assert_eq!(manifest.backup_id, journal.backup_id);
1240        assert_eq!(manifest.fleet.members.len(), 1);
1241        assert_eq!(manifest.fleet.members[0].canister_id, ROOT);
1242        assert_eq!(
1243            manifest.fleet.members[0].source_snapshot.snapshot_id,
1244            "snapshot-aaaaa-aa"
1245        );
1246        assert_eq!(
1247            manifest.fleet.members[0]
1248                .source_snapshot
1249                .checksum
1250                .as_deref(),
1251            journal.artifacts[0].checksum.as_deref()
1252        );
1253    }
1254
1255    // Build representative subnet registry JSON.
1256    fn registry_json() -> String {
1257        json!({
1258            "Ok": [
1259                {
1260                    "pid": ROOT,
1261                    "role": "root",
1262                    "record": {
1263                        "pid": ROOT,
1264                        "role": "root",
1265                        "parent_pid": null
1266                    }
1267                },
1268                {
1269                    "pid": CHILD,
1270                    "role": "app",
1271                    "record": {
1272                        "pid": CHILD,
1273                        "role": "app",
1274                        "parent_pid": ROOT
1275                    }
1276                },
1277                {
1278                    "pid": GRANDCHILD,
1279                    "role": "worker",
1280                    "record": {
1281                        "pid": GRANDCHILD,
1282                        "role": "worker",
1283                        "parent_pid": [CHILD]
1284                    }
1285                }
1286            ]
1287        })
1288        .to_string()
1289    }
1290
1291    // Build one topology hash for stability tests.
1292    fn topology_hash(hash: &str) -> TopologyHash {
1293        TopologyHash {
1294            algorithm: "sha256".to_string(),
1295            input: "sorted(pid,parent_pid,role,module_hash)".to_string(),
1296            hash: hash.to_string(),
1297        }
1298    }
1299
1300    // Build a unique temporary directory.
1301    fn temp_dir(prefix: &str) -> PathBuf {
1302        let nanos = SystemTime::now()
1303            .duration_since(UNIX_EPOCH)
1304            .expect("system time after epoch")
1305            .as_nanos();
1306        std::env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
1307    }
1308}