Skip to main content

canic_cli/snapshot/
mod.rs

1use candid::Principal;
2use canic_backup::{
3    artifacts::{ArtifactChecksum, ArtifactChecksumError},
4    journal::{
5        ArtifactJournalEntry, ArtifactState, DownloadJournal, DownloadOperationMetrics,
6        JournalValidationError,
7    },
8    manifest::{
9        BackupUnit, BackupUnitKind, ConsistencyMode, ConsistencySection, FleetBackupManifest,
10        FleetMember, FleetSection, IdentityMode, ManifestValidationError, SourceMetadata,
11        SourceSnapshot, ToolMetadata, VerificationCheck, VerificationPlan,
12    },
13    persistence::{BackupLayout, PersistenceError},
14    topology::{TopologyHash, TopologyHasher, TopologyRecord},
15};
16use serde_json::Value;
17use std::{
18    collections::{BTreeMap, BTreeSet, VecDeque},
19    ffi::OsString,
20    fs,
21    path::{Path, PathBuf},
22    process::Command,
23};
24use thiserror::Error as ThisError;
25
26///
27/// SnapshotCommandError
28///
29
30#[derive(Debug, ThisError)]
31pub enum SnapshotCommandError {
32    #[error("{0}")]
33    Usage(&'static str),
34
35    #[error("missing required option {0}")]
36    MissingOption(&'static str),
37
38    #[error("unknown option {0}")]
39    UnknownOption(String),
40
41    #[error("option {0} requires a value")]
42    MissingValue(&'static str),
43
44    #[error("cannot combine --root and --registry-json")]
45    ConflictingRegistrySources,
46
47    #[error("registry JSON did not contain the requested canister {0}")]
48    CanisterNotInRegistry(String),
49
50    #[error("dfx command failed: {command}\n{stderr}")]
51    DfxFailed { command: String, stderr: String },
52
53    #[error("could not parse snapshot id from dfx output: {0}")]
54    SnapshotIdUnavailable(String),
55
56    #[error("field {field} must be a valid principal: {value}")]
57    InvalidPrincipal { field: &'static str, value: String },
58
59    #[error(
60        "topology changed before snapshot start: discovery={discovery}, pre_snapshot={pre_snapshot}"
61    )]
62    TopologyChanged {
63        discovery: String,
64        pre_snapshot: String,
65    },
66
67    #[error(transparent)]
68    Io(#[from] std::io::Error),
69
70    #[error(transparent)]
71    Json(#[from] serde_json::Error),
72
73    #[error(transparent)]
74    Checksum(#[from] ArtifactChecksumError),
75
76    #[error(transparent)]
77    Persistence(#[from] PersistenceError),
78
79    #[error(transparent)]
80    Journal(#[from] JournalValidationError),
81
82    #[error(transparent)]
83    InvalidManifest(#[from] ManifestValidationError),
84}
85
86///
87/// SnapshotDownloadOptions
88///
89
90#[derive(Clone, Debug, Eq, PartialEq)]
91pub struct SnapshotDownloadOptions {
92    pub canister: String,
93    pub out: PathBuf,
94    pub root: Option<String>,
95    pub registry_json: Option<PathBuf>,
96    pub include_children: bool,
97    pub recursive: bool,
98    pub dry_run: bool,
99    pub lifecycle: SnapshotLifecycleMode,
100    pub network: Option<String>,
101    pub dfx: String,
102}
103
104impl SnapshotDownloadOptions {
105    /// Parse snapshot download options from CLI arguments.
106    pub fn parse<I>(args: I) -> Result<Self, SnapshotCommandError>
107    where
108        I: IntoIterator<Item = OsString>,
109    {
110        let mut canister = None;
111        let mut out = None;
112        let mut root = None;
113        let mut registry_json = None;
114        let mut include_children = false;
115        let mut recursive = false;
116        let mut dry_run = false;
117        let mut stop_before_snapshot = false;
118        let mut resume_after_snapshot = false;
119        let mut network = None;
120        let mut dfx = "dfx".to_string();
121
122        let mut args = args.into_iter();
123        while let Some(arg) = args.next() {
124            let arg = arg
125                .into_string()
126                .map_err(|_| SnapshotCommandError::Usage(usage()))?;
127            match arg.as_str() {
128                "--canister" => canister = Some(next_value(&mut args, "--canister")?),
129                "--out" => out = Some(PathBuf::from(next_value(&mut args, "--out")?)),
130                "--root" => root = Some(next_value(&mut args, "--root")?),
131                "--registry-json" => {
132                    registry_json = Some(PathBuf::from(next_value(&mut args, "--registry-json")?));
133                }
134                "--include-children" => include_children = true,
135                "--recursive" => {
136                    recursive = true;
137                    include_children = true;
138                }
139                "--dry-run" => dry_run = true,
140                "--stop-before-snapshot" => stop_before_snapshot = true,
141                "--resume-after-snapshot" => resume_after_snapshot = true,
142                "--network" => network = Some(next_value(&mut args, "--network")?),
143                "--dfx" => dfx = next_value(&mut args, "--dfx")?,
144                "--help" | "-h" => return Err(SnapshotCommandError::Usage(usage())),
145                _ => return Err(SnapshotCommandError::UnknownOption(arg)),
146            }
147        }
148
149        if root.is_some() && registry_json.is_some() {
150            return Err(SnapshotCommandError::ConflictingRegistrySources);
151        }
152
153        Ok(Self {
154            canister: canister.ok_or(SnapshotCommandError::MissingOption("--canister"))?,
155            out: out.ok_or(SnapshotCommandError::MissingOption("--out"))?,
156            root,
157            registry_json,
158            include_children,
159            recursive,
160            dry_run,
161            lifecycle: SnapshotLifecycleMode::from_flags(
162                stop_before_snapshot,
163                resume_after_snapshot,
164            ),
165            network,
166            dfx,
167        })
168    }
169}
170
171///
172/// SnapshotLifecycleMode
173///
174
175#[derive(Clone, Copy, Debug, Eq, PartialEq)]
176pub enum SnapshotLifecycleMode {
177    SnapshotOnly,
178    StopBeforeSnapshot,
179    ResumeAfterSnapshot,
180    StopAndResume,
181}
182
183impl SnapshotLifecycleMode {
184    /// Build the lifecycle mode from CLI stop/resume flags.
185    #[must_use]
186    pub const fn from_flags(stop_before_snapshot: bool, resume_after_snapshot: bool) -> Self {
187        match (stop_before_snapshot, resume_after_snapshot) {
188            (false, false) => Self::SnapshotOnly,
189            (true, false) => Self::StopBeforeSnapshot,
190            (false, true) => Self::ResumeAfterSnapshot,
191            (true, true) => Self::StopAndResume,
192        }
193    }
194
195    /// Return whether the CLI should stop before snapshot creation.
196    #[must_use]
197    pub const fn stop_before_snapshot(self) -> bool {
198        matches!(self, Self::StopBeforeSnapshot | Self::StopAndResume)
199    }
200
201    /// Return whether the CLI should start after snapshot capture.
202    #[must_use]
203    pub const fn resume_after_snapshot(self) -> bool {
204        matches!(self, Self::ResumeAfterSnapshot | Self::StopAndResume)
205    }
206}
207
208///
209/// SnapshotTarget
210///
211
212#[derive(Clone, Debug, Eq, PartialEq)]
213pub struct SnapshotTarget {
214    pub canister_id: String,
215    pub role: Option<String>,
216    pub parent_canister_id: Option<String>,
217}
218
219/// Run a snapshot subcommand.
220pub fn run<I>(args: I) -> Result<(), SnapshotCommandError>
221where
222    I: IntoIterator<Item = OsString>,
223{
224    let mut args = args.into_iter();
225    let Some(command) = args.next().and_then(|arg| arg.into_string().ok()) else {
226        return Err(SnapshotCommandError::Usage(usage()));
227    };
228
229    match command.as_str() {
230        "download" => {
231            let options = SnapshotDownloadOptions::parse(args)?;
232            let result = download_snapshots(&options)?;
233            for artifact in result.artifacts {
234                println!(
235                    "{} {} {}",
236                    artifact.canister_id,
237                    artifact.snapshot_id,
238                    artifact.path.display()
239                );
240            }
241            Ok(())
242        }
243        "help" | "--help" | "-h" => {
244            println!("{}", usage());
245            Ok(())
246        }
247        _ => Err(SnapshotCommandError::UnknownOption(command)),
248    }
249}
250
251///
252/// SnapshotDownloadResult
253///
254
255#[derive(Clone, Debug, Eq, PartialEq)]
256pub struct SnapshotDownloadResult {
257    pub artifacts: Vec<SnapshotArtifact>,
258}
259
260///
261/// SnapshotArtifact
262///
263
264#[derive(Clone, Debug, Eq, PartialEq)]
265pub struct SnapshotArtifact {
266    pub canister_id: String,
267    pub snapshot_id: String,
268    pub path: PathBuf,
269    pub checksum: String,
270}
271
272/// Create and download snapshots for the selected canister set.
273pub fn download_snapshots(
274    options: &SnapshotDownloadOptions,
275) -> Result<SnapshotDownloadResult, SnapshotCommandError> {
276    let targets = resolve_targets(options)?;
277    let discovery_topology_hash = topology_hash_for_targets(options, &targets)?;
278    let pre_snapshot_topology_hash =
279        accepted_pre_snapshot_topology_hash(options, &discovery_topology_hash)?;
280    let mut artifacts = Vec::with_capacity(targets.len());
281    let mut journal = DownloadJournal {
282        journal_version: 1,
283        backup_id: backup_id(options),
284        discovery_topology_hash: Some(discovery_topology_hash.hash.clone()),
285        pre_snapshot_topology_hash: Some(pre_snapshot_topology_hash.hash.clone()),
286        operation_metrics: DownloadOperationMetrics {
287            target_count: targets.len(),
288            ..DownloadOperationMetrics::default()
289        },
290        artifacts: Vec::new(),
291    };
292    let layout = BackupLayout::new(options.out.clone());
293
294    for target in &targets {
295        let artifact_relative_path = PathBuf::from(safe_path_segment(&target.canister_id));
296        let artifact_path = options.out.join(&artifact_relative_path);
297        let temp_path = options
298            .out
299            .join(format!("{}.tmp", safe_path_segment(&target.canister_id)));
300
301        if options.dry_run {
302            artifacts.push(dry_run_artifact(options, target, artifact_path));
303            continue;
304        }
305
306        artifacts.push(capture_snapshot_artifact(
307            options,
308            &layout,
309            &mut journal,
310            target,
311            &artifact_relative_path,
312            artifact_path,
313            temp_path,
314        )?);
315    }
316
317    if !options.dry_run {
318        let manifest = build_manifest(
319            options,
320            &targets,
321            &artifacts,
322            discovery_topology_hash,
323            pre_snapshot_topology_hash,
324        )?;
325        layout.write_manifest(&manifest)?;
326    }
327
328    Ok(SnapshotDownloadResult { artifacts })
329}
330
331// Resolve and verify the pre-snapshot topology hash before any mutation.
332fn accepted_pre_snapshot_topology_hash(
333    options: &SnapshotDownloadOptions,
334    discovery_topology_hash: &TopologyHash,
335) -> Result<TopologyHash, SnapshotCommandError> {
336    if options.dry_run {
337        return Ok(discovery_topology_hash.clone());
338    }
339
340    let pre_snapshot_targets = resolve_targets(options)?;
341    let pre_snapshot_topology_hash = topology_hash_for_targets(options, &pre_snapshot_targets)?;
342    ensure_topology_stable(discovery_topology_hash, &pre_snapshot_topology_hash)?;
343    Ok(pre_snapshot_topology_hash)
344}
345
346// Print the planned commands and return a placeholder artifact for dry runs.
347fn dry_run_artifact(
348    options: &SnapshotDownloadOptions,
349    target: &SnapshotTarget,
350    artifact_path: PathBuf,
351) -> SnapshotArtifact {
352    if options.lifecycle.stop_before_snapshot() {
353        println!(
354            "{}",
355            stop_canister_command_display(options, &target.canister_id)
356        );
357    }
358    println!(
359        "{}",
360        create_snapshot_command_display(options, &target.canister_id)
361    );
362    println!(
363        "{}",
364        download_snapshot_command_display(options, &target.canister_id, "<snapshot-id>")
365    );
366    if options.lifecycle.resume_after_snapshot() {
367        println!(
368            "{}",
369            start_canister_command_display(options, &target.canister_id)
370        );
371    }
372
373    SnapshotArtifact {
374        canister_id: target.canister_id.clone(),
375        snapshot_id: "<snapshot-id>".to_string(),
376        path: artifact_path,
377        checksum: "<sha256>".to_string(),
378    }
379}
380
381// Create, download, checksum, and finalize one durable snapshot artifact.
382fn capture_snapshot_artifact(
383    options: &SnapshotDownloadOptions,
384    layout: &BackupLayout,
385    journal: &mut DownloadJournal,
386    target: &SnapshotTarget,
387    artifact_relative_path: &Path,
388    artifact_path: PathBuf,
389    temp_path: PathBuf,
390) -> Result<SnapshotArtifact, SnapshotCommandError> {
391    with_optional_stop(options, &target.canister_id, || {
392        journal.operation_metrics.snapshot_create_started += 1;
393        let snapshot_id = create_snapshot(options, &target.canister_id)?;
394        journal.operation_metrics.snapshot_create_completed += 1;
395        let mut entry = ArtifactJournalEntry {
396            canister_id: target.canister_id.clone(),
397            snapshot_id: snapshot_id.clone(),
398            state: ArtifactState::Created,
399            temp_path: None,
400            artifact_path: artifact_relative_path.display().to_string(),
401            checksum_algorithm: "sha256".to_string(),
402            checksum: None,
403            updated_at: timestamp_placeholder(),
404        };
405        journal.artifacts.push(entry.clone());
406        layout.write_journal(journal)?;
407
408        if temp_path.exists() {
409            fs::remove_dir_all(&temp_path)?;
410        }
411        fs::create_dir_all(&temp_path)?;
412        journal.operation_metrics.snapshot_download_started += 1;
413        layout.write_journal(journal)?;
414        download_snapshot(options, &target.canister_id, &snapshot_id, &temp_path)?;
415        journal.operation_metrics.snapshot_download_completed += 1;
416        entry.advance_to(ArtifactState::Downloaded, timestamp_placeholder())?;
417        entry.temp_path = Some(temp_path.display().to_string());
418        update_journal_entry(journal, &entry);
419        layout.write_journal(journal)?;
420
421        journal.operation_metrics.checksum_verify_started += 1;
422        layout.write_journal(journal)?;
423        let checksum = ArtifactChecksum::from_path(&temp_path)?;
424        journal.operation_metrics.checksum_verify_completed += 1;
425        entry.checksum = Some(checksum.hash.clone());
426        entry.advance_to(ArtifactState::ChecksumVerified, timestamp_placeholder())?;
427        update_journal_entry(journal, &entry);
428        layout.write_journal(journal)?;
429
430        journal.operation_metrics.artifact_finalize_started += 1;
431        layout.write_journal(journal)?;
432        if artifact_path.exists() {
433            return Err(std::io::Error::new(
434                std::io::ErrorKind::AlreadyExists,
435                format!("artifact path already exists: {}", artifact_path.display()),
436            )
437            .into());
438        }
439        fs::rename(&temp_path, &artifact_path)?;
440        journal.operation_metrics.artifact_finalize_completed += 1;
441        entry.temp_path = None;
442        entry.advance_to(ArtifactState::Durable, timestamp_placeholder())?;
443        update_journal_entry(journal, &entry);
444        layout.write_journal(journal)?;
445
446        Ok(SnapshotArtifact {
447            canister_id: target.canister_id.clone(),
448            snapshot_id,
449            path: artifact_path,
450            checksum: checksum.hash,
451        })
452    })
453}
454
455// Replace one artifact row in the mutable journal.
456fn update_journal_entry(journal: &mut DownloadJournal, entry: &ArtifactJournalEntry) {
457    if let Some(existing) = journal.artifacts.iter_mut().find(|existing| {
458        existing.canister_id == entry.canister_id && existing.snapshot_id == entry.snapshot_id
459    }) {
460        *existing = entry.clone();
461    }
462}
463
464/// Resolve the selected canister plus optional direct/recursive children.
465pub fn resolve_targets(
466    options: &SnapshotDownloadOptions,
467) -> Result<Vec<SnapshotTarget>, SnapshotCommandError> {
468    if !options.include_children {
469        return Ok(vec![SnapshotTarget {
470            canister_id: options.canister.clone(),
471            role: None,
472            parent_canister_id: None,
473        }]);
474    }
475
476    let registry = load_registry_entries(options)?;
477    targets_from_registry(&registry, &options.canister, options.recursive)
478}
479
480// Load registry entries from a file or live root query.
481fn load_registry_entries(
482    options: &SnapshotDownloadOptions,
483) -> Result<Vec<RegistryEntry>, SnapshotCommandError> {
484    let registry_json = if let Some(path) = &options.registry_json {
485        fs::read_to_string(path)?
486    } else if let Some(root) = &options.root {
487        call_subnet_registry(options, root)?
488    } else {
489        return Err(SnapshotCommandError::MissingOption(
490            "--root or --registry-json when using --include-children",
491        ));
492    };
493
494    parse_registry_entries(&registry_json)
495}
496
497// Run `dfx canister call <root> canic_subnet_registry --output json`.
498fn call_subnet_registry(
499    options: &SnapshotDownloadOptions,
500    root: &str,
501) -> Result<String, SnapshotCommandError> {
502    let mut command = Command::new(&options.dfx);
503    command.arg("canister");
504    add_canister_network_args(&mut command, options);
505    command.args(["call", root, "canic_subnet_registry", "--output", "json"]);
506    run_output(&mut command)
507}
508
509// Create one canister snapshot and parse the snapshot id from dfx output.
510fn create_snapshot(
511    options: &SnapshotDownloadOptions,
512    canister_id: &str,
513) -> Result<String, SnapshotCommandError> {
514    let before = list_snapshot_ids(options, canister_id)?;
515    let mut command = Command::new(&options.dfx);
516    command.arg("canister");
517    add_canister_network_args(&mut command, options);
518    command.args(["snapshot", "create", canister_id]);
519    let output = run_output_with_stderr(&mut command)?;
520    if let Some(snapshot_id) = parse_snapshot_id(&output) {
521        return Ok(snapshot_id);
522    }
523
524    let before = before.into_iter().collect::<BTreeSet<_>>();
525    let mut new_ids = list_snapshot_ids(options, canister_id)?
526        .into_iter()
527        .filter(|snapshot_id| !before.contains(snapshot_id))
528        .collect::<Vec<_>>();
529    if new_ids.len() == 1 {
530        Ok(new_ids.remove(0))
531    } else {
532        Err(SnapshotCommandError::SnapshotIdUnavailable(output))
533    }
534}
535
536// List the existing snapshot ids for one canister.
537fn list_snapshot_ids(
538    options: &SnapshotDownloadOptions,
539    canister_id: &str,
540) -> Result<Vec<String>, SnapshotCommandError> {
541    let mut command = Command::new(&options.dfx);
542    command.arg("canister");
543    add_canister_network_args(&mut command, options);
544    command.args(["snapshot", "list", canister_id]);
545    let output = run_output(&mut command)?;
546    Ok(parse_snapshot_list_ids(&output))
547}
548
549// Stop a canister before taking a snapshot when explicitly requested.
550fn stop_canister(
551    options: &SnapshotDownloadOptions,
552    canister_id: &str,
553) -> Result<(), SnapshotCommandError> {
554    let mut command = Command::new(&options.dfx);
555    command.arg("canister");
556    add_canister_network_args(&mut command, options);
557    command.args(["stop", canister_id]);
558    run_status(&mut command)
559}
560
561// Start a canister after snapshot capture when explicitly requested.
562fn start_canister(
563    options: &SnapshotDownloadOptions,
564    canister_id: &str,
565) -> Result<(), SnapshotCommandError> {
566    let mut command = Command::new(&options.dfx);
567    command.arg("canister");
568    add_canister_network_args(&mut command, options);
569    command.args(["start", canister_id]);
570    run_status(&mut command)
571}
572
573// Run one snapshot operation with optional stop/start lifecycle commands.
574fn with_optional_stop<T>(
575    options: &SnapshotDownloadOptions,
576    canister_id: &str,
577    operation: impl FnOnce() -> Result<T, SnapshotCommandError>,
578) -> Result<T, SnapshotCommandError> {
579    if options.lifecycle.stop_before_snapshot() {
580        stop_canister(options, canister_id)?;
581    }
582
583    let result = operation();
584
585    if options.lifecycle.resume_after_snapshot() {
586        match result {
587            Ok(value) => {
588                start_canister(options, canister_id)?;
589                Ok(value)
590            }
591            Err(error) => {
592                let _ = start_canister(options, canister_id);
593                Err(error)
594            }
595        }
596    } else {
597        result
598    }
599}
600
601// Download one canister snapshot into the target artifact directory.
602fn download_snapshot(
603    options: &SnapshotDownloadOptions,
604    canister_id: &str,
605    snapshot_id: &str,
606    artifact_path: &Path,
607) -> Result<(), SnapshotCommandError> {
608    let mut command = Command::new(&options.dfx);
609    command.arg("canister");
610    add_canister_network_args(&mut command, options);
611    command.args(["snapshot", "download", canister_id, snapshot_id, "--dir"]);
612    command.arg(artifact_path);
613    run_status(&mut command)
614}
615
616// Add optional `dfx canister` network arguments.
617fn add_canister_network_args(command: &mut Command, options: &SnapshotDownloadOptions) {
618    if let Some(network) = &options.network {
619        command.args(["--network", network]);
620    }
621}
622
623// Execute a command and capture stdout.
624fn run_output(command: &mut Command) -> Result<String, SnapshotCommandError> {
625    let display = command_display(command);
626    let output = command.output()?;
627    if output.status.success() {
628        Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
629    } else {
630        Err(SnapshotCommandError::DfxFailed {
631            command: display,
632            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
633        })
634    }
635}
636
637// Execute a command and capture stdout plus stderr on success.
638fn run_output_with_stderr(command: &mut Command) -> Result<String, SnapshotCommandError> {
639    let display = command_display(command);
640    let output = command.output()?;
641    if output.status.success() {
642        let mut text = String::from_utf8_lossy(&output.stdout).to_string();
643        text.push_str(&String::from_utf8_lossy(&output.stderr));
644        Ok(text.trim().to_string())
645    } else {
646        Err(SnapshotCommandError::DfxFailed {
647            command: display,
648            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
649        })
650    }
651}
652
653// Execute a command and require a successful status.
654fn run_status(command: &mut Command) -> Result<(), SnapshotCommandError> {
655    let display = command_display(command);
656    let output = command.output()?;
657    if output.status.success() {
658        Ok(())
659    } else {
660        Err(SnapshotCommandError::DfxFailed {
661            command: display,
662            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
663        })
664    }
665}
666
667// Render a command for diagnostics.
668fn command_display(command: &Command) -> String {
669    let mut parts = vec![command.get_program().to_string_lossy().to_string()];
670    parts.extend(
671        command
672            .get_args()
673            .map(|arg| arg.to_string_lossy().to_string()),
674    );
675    parts.join(" ")
676}
677
678// Render one dry-run create command.
679fn create_snapshot_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
680    let mut command = Command::new(&options.dfx);
681    command.arg("canister");
682    add_canister_network_args(&mut command, options);
683    command.args(["snapshot", "create", canister_id]);
684    command_display(&command)
685}
686
687// Render one dry-run download command.
688fn download_snapshot_command_display(
689    options: &SnapshotDownloadOptions,
690    canister_id: &str,
691    snapshot_id: &str,
692) -> String {
693    let mut command = Command::new(&options.dfx);
694    command.arg("canister");
695    add_canister_network_args(&mut command, options);
696    command.args(["snapshot", "download", canister_id, snapshot_id, "--dir"]);
697    command.arg(options.out.join(safe_path_segment(canister_id)));
698    command_display(&command)
699}
700
701// Render one dry-run stop command.
702fn stop_canister_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
703    let mut command = Command::new(&options.dfx);
704    command.arg("canister");
705    add_canister_network_args(&mut command, options);
706    command.args(["stop", canister_id]);
707    command_display(&command)
708}
709
710// Render one dry-run start command.
711fn start_canister_command_display(options: &SnapshotDownloadOptions, canister_id: &str) -> String {
712    let mut command = Command::new(&options.dfx);
713    command.arg("canister");
714    add_canister_network_args(&mut command, options);
715    command.args(["start", canister_id]);
716    command_display(&command)
717}
718
719///
720/// RegistryEntry
721///
722
723#[derive(Clone, Debug, Eq, PartialEq)]
724pub struct RegistryEntry {
725    pub pid: String,
726    pub role: Option<String>,
727    pub parent_pid: Option<String>,
728}
729
730/// Parse the `dfx --output json` subnet registry shape.
731pub fn parse_registry_entries(
732    registry_json: &str,
733) -> Result<Vec<RegistryEntry>, SnapshotCommandError> {
734    let data = serde_json::from_str::<Value>(registry_json)?;
735    let entries = data
736        .get("Ok")
737        .and_then(Value::as_array)
738        .or_else(|| data.as_array())
739        .ok_or(SnapshotCommandError::Usage(
740            "registry JSON must be an array or {\"Ok\": [...]}",
741        ))?;
742
743    Ok(entries.iter().filter_map(parse_registry_entry).collect())
744}
745
746// Parse one registry entry from dfx JSON.
747fn parse_registry_entry(value: &Value) -> Option<RegistryEntry> {
748    let pid = value.get("pid").and_then(Value::as_str)?.to_string();
749    let role = value
750        .get("role")
751        .and_then(Value::as_str)
752        .map(str::to_string);
753    let parent_pid = value
754        .get("record")
755        .and_then(|record| record.get("parent_pid"))
756        .and_then(parse_optional_principal);
757
758    Some(RegistryEntry {
759        pid,
760        role,
761        parent_pid,
762    })
763}
764
765// Parse optional principal JSON emitted as null, string, or optional vector form.
766fn parse_optional_principal(value: &Value) -> Option<String> {
767    if value.is_null() {
768        return None;
769    }
770    if let Some(text) = value.as_str() {
771        return Some(text.to_string());
772    }
773    value
774        .as_array()
775        .and_then(|items| items.first())
776        .and_then(Value::as_str)
777        .map(str::to_string)
778}
779
780/// Resolve selected target and children from registry entries.
781pub fn targets_from_registry(
782    registry: &[RegistryEntry],
783    canister_id: &str,
784    recursive: bool,
785) -> Result<Vec<SnapshotTarget>, SnapshotCommandError> {
786    let by_pid = registry
787        .iter()
788        .map(|entry| (entry.pid.as_str(), entry))
789        .collect::<BTreeMap<_, _>>();
790
791    let root = by_pid
792        .get(canister_id)
793        .ok_or_else(|| SnapshotCommandError::CanisterNotInRegistry(canister_id.to_string()))?;
794
795    let mut targets = Vec::new();
796    let mut seen = BTreeSet::new();
797    targets.push(SnapshotTarget {
798        canister_id: root.pid.clone(),
799        role: root.role.clone(),
800        parent_canister_id: root.parent_pid.clone(),
801    });
802    seen.insert(root.pid.clone());
803
804    let mut queue = VecDeque::from([root.pid.clone()]);
805    while let Some(parent) = queue.pop_front() {
806        for child in registry
807            .iter()
808            .filter(|entry| entry.parent_pid.as_deref() == Some(parent.as_str()))
809        {
810            if seen.insert(child.pid.clone()) {
811                targets.push(SnapshotTarget {
812                    canister_id: child.pid.clone(),
813                    role: child.role.clone(),
814                    parent_canister_id: child.parent_pid.clone(),
815                });
816                if recursive {
817                    queue.push_back(child.pid.clone());
818                }
819            }
820        }
821    }
822
823    Ok(targets)
824}
825
826// Build a validated manifest for one successful snapshot download run.
827fn build_manifest(
828    options: &SnapshotDownloadOptions,
829    targets: &[SnapshotTarget],
830    artifacts: &[SnapshotArtifact],
831    discovery_topology_hash: TopologyHash,
832    pre_snapshot_topology_hash: TopologyHash,
833) -> Result<FleetBackupManifest, SnapshotCommandError> {
834    let roles = targets
835        .iter()
836        .enumerate()
837        .map(|(index, target)| target_role(options, index, target))
838        .collect::<BTreeSet<_>>()
839        .into_iter()
840        .collect::<Vec<_>>();
841
842    let manifest = FleetBackupManifest {
843        manifest_version: 1,
844        backup_id: backup_id(options),
845        created_at: timestamp_placeholder(),
846        tool: ToolMetadata {
847            name: "canic-cli".to_string(),
848            version: env!("CARGO_PKG_VERSION").to_string(),
849        },
850        source: SourceMetadata {
851            environment: options
852                .network
853                .clone()
854                .unwrap_or_else(|| "local".to_string()),
855            root_canister: options
856                .root
857                .clone()
858                .unwrap_or_else(|| options.canister.clone()),
859        },
860        consistency: ConsistencySection {
861            mode: ConsistencyMode::CrashConsistent,
862            backup_units: vec![BackupUnit {
863                unit_id: "snapshot-selection".to_string(),
864                kind: if options.include_children {
865                    BackupUnitKind::SubtreeRooted
866                } else {
867                    BackupUnitKind::Flat
868                },
869                roles,
870                consistency_reason: if options.include_children {
871                    None
872                } else {
873                    Some("explicit single-canister snapshot selection".to_string())
874                },
875                dependency_closure: Vec::new(),
876                topology_validation: if options.include_children {
877                    "registry-subtree-selection".to_string()
878                } else {
879                    "explicit-selection".to_string()
880                },
881                quiescence_strategy: None,
882            }],
883        },
884        fleet: FleetSection {
885            topology_hash_algorithm: discovery_topology_hash.algorithm,
886            topology_hash_input: discovery_topology_hash.input,
887            discovery_topology_hash: discovery_topology_hash.hash.clone(),
888            pre_snapshot_topology_hash: pre_snapshot_topology_hash.hash,
889            topology_hash: discovery_topology_hash.hash,
890            members: targets
891                .iter()
892                .enumerate()
893                .map(|(index, target)| fleet_member(options, index, target, artifacts))
894                .collect::<Result<Vec<_>, _>>()?,
895        },
896        verification: VerificationPlan::default(),
897    };
898
899    manifest.validate()?;
900    Ok(manifest)
901}
902
903// Compute the canonical topology hash for one resolved target set.
904fn topology_hash_for_targets(
905    options: &SnapshotDownloadOptions,
906    targets: &[SnapshotTarget],
907) -> Result<TopologyHash, SnapshotCommandError> {
908    let topology_records = targets
909        .iter()
910        .enumerate()
911        .map(|(index, target)| topology_record(options, index, target))
912        .collect::<Result<Vec<_>, _>>()?;
913    Ok(TopologyHasher::hash(&topology_records))
914}
915
916// Fail closed if topology changes after discovery but before snapshot creation.
917fn ensure_topology_stable(
918    discovery: &TopologyHash,
919    pre_snapshot: &TopologyHash,
920) -> Result<(), SnapshotCommandError> {
921    if discovery.hash == pre_snapshot.hash {
922        return Ok(());
923    }
924
925    Err(SnapshotCommandError::TopologyChanged {
926        discovery: discovery.hash.clone(),
927        pre_snapshot: pre_snapshot.hash.clone(),
928    })
929}
930
931// Build one canonical topology record for manifest hashing.
932fn topology_record(
933    options: &SnapshotDownloadOptions,
934    index: usize,
935    target: &SnapshotTarget,
936) -> Result<TopologyRecord, SnapshotCommandError> {
937    Ok(TopologyRecord {
938        pid: parse_principal("fleet.members[].canister_id", &target.canister_id)?,
939        parent_pid: target
940            .parent_canister_id
941            .as_deref()
942            .map(|parent| parse_principal("fleet.members[].parent_canister_id", parent))
943            .transpose()?,
944        role: target_role(options, index, target),
945        module_hash: None,
946    })
947}
948
949// Build one manifest member from a captured durable artifact.
950fn fleet_member(
951    options: &SnapshotDownloadOptions,
952    index: usize,
953    target: &SnapshotTarget,
954    artifacts: &[SnapshotArtifact],
955) -> Result<FleetMember, SnapshotCommandError> {
956    let Some(artifact) = artifacts
957        .iter()
958        .find(|artifact| artifact.canister_id == target.canister_id)
959    else {
960        return Err(SnapshotCommandError::SnapshotIdUnavailable(format!(
961            "missing artifact for {}",
962            target.canister_id
963        )));
964    };
965    let role = target_role(options, index, target);
966
967    Ok(FleetMember {
968        role: role.clone(),
969        canister_id: target.canister_id.clone(),
970        parent_canister_id: target.parent_canister_id.clone(),
971        subnet_canister_id: options.root.clone(),
972        controller_hint: None,
973        identity_mode: if target.canister_id == options.canister {
974            IdentityMode::Fixed
975        } else {
976            IdentityMode::Relocatable
977        },
978        restore_group: if target.canister_id == options.canister {
979            1
980        } else {
981            2
982        },
983        verification_class: "basic".to_string(),
984        verification_checks: vec![VerificationCheck {
985            kind: "status".to_string(),
986            method: None,
987            roles: vec![role],
988        }],
989        source_snapshot: SourceSnapshot {
990            snapshot_id: artifact.snapshot_id.clone(),
991            module_hash: None,
992            wasm_hash: None,
993            code_version: None,
994            artifact_path: safe_path_segment(&target.canister_id),
995            checksum_algorithm: "sha256".to_string(),
996            checksum: Some(artifact.checksum.clone()),
997        },
998    })
999}
1000
1001// Return the manifest role for one selected snapshot target.
1002fn target_role(options: &SnapshotDownloadOptions, index: usize, target: &SnapshotTarget) -> String {
1003    target.role.clone().unwrap_or_else(|| {
1004        if target.canister_id == options.canister {
1005            "root".to_string()
1006        } else {
1007            format!("member-{index}")
1008        }
1009    })
1010}
1011
1012// Parse one principal used by generated topology manifest metadata.
1013fn parse_principal(field: &'static str, value: &str) -> Result<Principal, SnapshotCommandError> {
1014    Principal::from_text(value).map_err(|_| SnapshotCommandError::InvalidPrincipal {
1015        field,
1016        value: value.to_string(),
1017    })
1018}
1019
1020// Parse a likely snapshot id from dfx output.
1021fn parse_snapshot_id(output: &str) -> Option<String> {
1022    output
1023        .split(|c: char| c.is_whitespace() || matches!(c, '"' | '\'' | ':' | ','))
1024        .filter(|part| !part.is_empty())
1025        .rev()
1026        .find(|part| {
1027            part.chars()
1028                .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.'))
1029        })
1030        .map(str::to_string)
1031}
1032
1033// Parse dfx snapshot list output into snapshot ids.
1034fn parse_snapshot_list_ids(output: &str) -> Vec<String> {
1035    output
1036        .lines()
1037        .filter_map(|line| {
1038            line.split_once(':')
1039                .map(|(snapshot_id, _)| snapshot_id.trim())
1040        })
1041        .filter(|snapshot_id| !snapshot_id.is_empty())
1042        .map(str::to_string)
1043        .collect()
1044}
1045
1046// Convert a principal into a conservative filesystem path segment.
1047fn safe_path_segment(value: &str) -> String {
1048    value
1049        .chars()
1050        .map(|c| {
1051            if c.is_ascii_alphanumeric() || matches!(c, '-' | '_') {
1052                c
1053            } else {
1054                '_'
1055            }
1056        })
1057        .collect()
1058}
1059
1060// Build a stable backup id for this command's output directory.
1061fn backup_id(options: &SnapshotDownloadOptions) -> String {
1062    options
1063        .out
1064        .file_name()
1065        .and_then(|name| name.to_str())
1066        .map_or_else(|| "snapshot-download".to_string(), str::to_string)
1067}
1068
1069// Return a placeholder timestamp until the CLI owns a clock abstraction.
1070fn timestamp_placeholder() -> String {
1071    "unknown".to_string()
1072}
1073
1074// Read the next required option value.
1075fn next_value<I>(args: &mut I, option: &'static str) -> Result<String, SnapshotCommandError>
1076where
1077    I: Iterator<Item = OsString>,
1078{
1079    args.next()
1080        .and_then(|value| value.into_string().ok())
1081        .ok_or(SnapshotCommandError::MissingValue(option))
1082}
1083
1084// Return snapshot command usage text.
1085const fn usage() -> &'static str {
1086    "usage: canic snapshot download --canister <id> --out <dir> [--root <id> | --registry-json <file>] [--include-children] [--recursive] [--dry-run] [--stop-before-snapshot] [--resume-after-snapshot] [--network <name>]"
1087}
1088
1089#[cfg(test)]
1090mod tests {
1091    use super::*;
1092    use canic_backup::persistence::BackupLayout;
1093    use serde_json::json;
1094    use std::time::{SystemTime, UNIX_EPOCH};
1095
1096    const ROOT: &str = "aaaaa-aa";
1097    const CHILD: &str = "renrk-eyaaa-aaaaa-aaada-cai";
1098    const GRANDCHILD: &str = "rno2w-sqaaa-aaaaa-aaacq-cai";
1099    const HASH: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
1100
1101    // Ensure dfx registry JSON parses in the wrapped Ok shape.
1102    #[test]
1103    fn parses_wrapped_registry_json() {
1104        let json = registry_json();
1105
1106        let entries = parse_registry_entries(&json).expect("parse registry");
1107
1108        assert_eq!(entries.len(), 3);
1109        assert_eq!(entries[1].parent_pid.as_deref(), Some(ROOT));
1110    }
1111
1112    // Ensure direct-child resolution includes only one level.
1113    #[test]
1114    fn targets_include_direct_children() {
1115        let entries = parse_registry_entries(&registry_json()).expect("parse registry");
1116
1117        let targets = targets_from_registry(&entries, ROOT, false).expect("resolve targets");
1118
1119        assert_eq!(
1120            targets
1121                .iter()
1122                .map(|target| target.canister_id.as_str())
1123                .collect::<Vec<_>>(),
1124            vec![ROOT, CHILD]
1125        );
1126    }
1127
1128    // Ensure recursive resolution walks descendants.
1129    #[test]
1130    fn targets_include_recursive_children() {
1131        let entries = parse_registry_entries(&registry_json()).expect("parse registry");
1132
1133        let targets = targets_from_registry(&entries, ROOT, true).expect("resolve targets");
1134
1135        assert_eq!(
1136            targets
1137                .iter()
1138                .map(|target| target.canister_id.as_str())
1139                .collect::<Vec<_>>(),
1140            vec![ROOT, CHILD, GRANDCHILD]
1141        );
1142    }
1143
1144    // Ensure snapshot ids can be extracted from common command output.
1145    #[test]
1146    fn parses_snapshot_id_from_output() {
1147        let snapshot_id = parse_snapshot_id("Created snapshot: snap_abc-123\n");
1148
1149        assert_eq!(snapshot_id.as_deref(), Some("snap_abc-123"));
1150    }
1151
1152    // Ensure dfx snapshot list output can be used when create is quiet.
1153    #[test]
1154    fn parses_snapshot_ids_from_list_output() {
1155        let snapshot_ids = parse_snapshot_list_ids(
1156            "0000000000000000ffffffffff9000050101: 213.76 MiB, taken at 2026-05-03 12:20:53 UTC\n",
1157        );
1158
1159        assert_eq!(snapshot_ids, vec!["0000000000000000ffffffffff9000050101"]);
1160    }
1161
1162    // Ensure option parsing covers the intended dry-run command.
1163    #[test]
1164    fn parses_download_options() {
1165        let options = SnapshotDownloadOptions::parse([
1166            OsString::from("--canister"),
1167            OsString::from(ROOT),
1168            OsString::from("--out"),
1169            OsString::from("backups/test"),
1170            OsString::from("--registry-json"),
1171            OsString::from("registry.json"),
1172            OsString::from("--recursive"),
1173            OsString::from("--dry-run"),
1174            OsString::from("--stop-before-snapshot"),
1175            OsString::from("--resume-after-snapshot"),
1176        ])
1177        .expect("parse options");
1178
1179        assert_eq!(options.canister, ROOT);
1180        assert!(options.include_children);
1181        assert!(options.recursive);
1182        assert!(options.dry_run);
1183        assert_eq!(options.lifecycle, SnapshotLifecycleMode::StopAndResume);
1184    }
1185
1186    // Ensure snapshot capture fails closed when topology changes before creation.
1187    #[test]
1188    fn topology_stability_rejects_pre_snapshot_drift() {
1189        let discovery = topology_hash(HASH);
1190        let pre_snapshot =
1191            topology_hash("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff");
1192
1193        let err = ensure_topology_stable(&discovery, &pre_snapshot)
1194            .expect_err("topology drift should fail");
1195
1196        assert!(matches!(err, SnapshotCommandError::TopologyChanged { .. }));
1197    }
1198
1199    // Ensure the actual command path writes a manifest and durable journal.
1200    #[cfg(unix)]
1201    #[test]
1202    fn download_snapshots_writes_manifest_and_durable_journal() {
1203        use std::os::unix::fs::PermissionsExt;
1204
1205        let root = temp_dir("canic-cli-download");
1206        let fake_dfx = root.join("fake-dfx.sh");
1207        fs::create_dir_all(&root).expect("create temp root");
1208        fs::write(
1209            &fake_dfx,
1210            r#"#!/bin/sh
1211set -eu
1212if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "create" ]; then
1213  echo "snapshot-$4"
1214  exit 0
1215fi
1216if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "list" ]; then
1217  exit 0
1218fi
1219if [ "$1" = "canister" ] && [ "$2" = "snapshot" ] && [ "$3" = "download" ]; then
1220  mkdir -p "$7"
1221  printf "%s:%s\n" "$4" "$5" > "$7/snapshot.txt"
1222  exit 0
1223fi
1224echo "unexpected args: $*" >&2
1225exit 1
1226"#,
1227        )
1228        .expect("write fake dfx");
1229        let mut permissions = fs::metadata(&fake_dfx)
1230            .expect("stat fake dfx")
1231            .permissions();
1232        permissions.set_mode(0o755);
1233        fs::set_permissions(&fake_dfx, permissions).expect("chmod fake dfx");
1234
1235        let out = root.join("backup");
1236        let options = SnapshotDownloadOptions {
1237            canister: ROOT.to_string(),
1238            out: out.clone(),
1239            root: None,
1240            registry_json: None,
1241            include_children: false,
1242            recursive: false,
1243            dry_run: false,
1244            lifecycle: SnapshotLifecycleMode::SnapshotOnly,
1245            network: None,
1246            dfx: fake_dfx.display().to_string(),
1247        };
1248
1249        let result = download_snapshots(&options).expect("download snapshots");
1250        let layout = BackupLayout::new(out);
1251        let journal = layout.read_journal().expect("read journal");
1252        let manifest = layout.read_manifest().expect("read manifest");
1253
1254        fs::remove_dir_all(root).expect("remove temp root");
1255        assert_eq!(result.artifacts.len(), 1);
1256        assert_eq!(journal.artifacts.len(), 1);
1257        assert_eq!(journal.operation_metrics.target_count, 1);
1258        assert_eq!(journal.operation_metrics.snapshot_create_started, 1);
1259        assert_eq!(journal.operation_metrics.snapshot_create_completed, 1);
1260        assert_eq!(journal.operation_metrics.snapshot_download_started, 1);
1261        assert_eq!(journal.operation_metrics.snapshot_download_completed, 1);
1262        assert_eq!(journal.operation_metrics.checksum_verify_started, 1);
1263        assert_eq!(journal.operation_metrics.checksum_verify_completed, 1);
1264        assert_eq!(journal.operation_metrics.artifact_finalize_started, 1);
1265        assert_eq!(journal.operation_metrics.artifact_finalize_completed, 1);
1266        assert_eq!(journal.artifacts[0].state, ArtifactState::Durable);
1267        assert!(journal.artifacts[0].checksum.is_some());
1268        assert_eq!(manifest.backup_id, journal.backup_id);
1269        assert_eq!(manifest.fleet.members.len(), 1);
1270        assert_eq!(manifest.fleet.members[0].canister_id, ROOT);
1271        assert_eq!(
1272            manifest.fleet.members[0].source_snapshot.snapshot_id,
1273            "snapshot-aaaaa-aa"
1274        );
1275        assert_eq!(
1276            manifest.fleet.members[0]
1277                .source_snapshot
1278                .checksum
1279                .as_deref(),
1280            journal.artifacts[0].checksum.as_deref()
1281        );
1282    }
1283
1284    // Build representative subnet registry JSON.
1285    fn registry_json() -> String {
1286        json!({
1287            "Ok": [
1288                {
1289                    "pid": ROOT,
1290                    "role": "root",
1291                    "record": {
1292                        "pid": ROOT,
1293                        "role": "root",
1294                        "parent_pid": null
1295                    }
1296                },
1297                {
1298                    "pid": CHILD,
1299                    "role": "app",
1300                    "record": {
1301                        "pid": CHILD,
1302                        "role": "app",
1303                        "parent_pid": ROOT
1304                    }
1305                },
1306                {
1307                    "pid": GRANDCHILD,
1308                    "role": "worker",
1309                    "record": {
1310                        "pid": GRANDCHILD,
1311                        "role": "worker",
1312                        "parent_pid": [CHILD]
1313                    }
1314                }
1315            ]
1316        })
1317        .to_string()
1318    }
1319
1320    // Build one topology hash for stability tests.
1321    fn topology_hash(hash: &str) -> TopologyHash {
1322        TopologyHash {
1323            algorithm: "sha256".to_string(),
1324            input: "sorted(pid,parent_pid,role,module_hash)".to_string(),
1325            hash: hash.to_string(),
1326        }
1327    }
1328
1329    // Build a unique temporary directory.
1330    fn temp_dir(prefix: &str) -> PathBuf {
1331        let nanos = SystemTime::now()
1332            .duration_since(UNIX_EPOCH)
1333            .expect("system time after epoch")
1334            .as_nanos();
1335        std::env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
1336    }
1337}