canic_core/workflow/
orchestrator.rs

1use crate::{
2    Error, ThisError,
3    cdk::{api::canister_self, mgmt::CanisterInstallMode, types::Principal},
4    ids::CanisterRole,
5    log,
6    log::Topic,
7    ops::ic::{delete_canister, upgrade_canister},
8    ops::{
9        runtime::{canister::install_code_with_extra_arg, wasm::WasmOps},
10        storage::{
11            directory::{AppDirectoryOps, SubnetDirectoryOps},
12            pool::PoolOps,
13            registry::SubnetRegistryOps,
14        },
15    },
16    workflow::{
17        WorkflowError,
18        cascade::{state::root_cascade_state, topology::root_cascade_topology_for_pid},
19        directory::{RootAppDirectoryBuilder, RootSubnetDirectoryBuilder},
20        ic::provision::{
21            build_nonroot_init_payload, create_and_install_canister,
22            rebuild_directories_from_registry,
23        },
24        pool::{pool_export_canister, pool_import_canister, pool_recycle_canister},
25    },
26};
27
28///
29/// OrchestratorError
30///
31
32#[derive(Debug, ThisError)]
33pub enum OrchestratorError {
34    #[error("parent {0} not found in registry")]
35    ParentNotFound(Principal),
36
37    #[error("registry entry missing for {0}")]
38    RegistryEntryMissing(Principal),
39
40    #[error("immediate-parent mismatch: canister {pid} expects parent {expected}, got {found:?}")]
41    ImmediateParentMismatch {
42        pid: Principal,
43        expected: Principal,
44        found: Option<Principal>,
45    },
46
47    #[error("cannot delete {pid}: subtree is not empty ({size} nodes)")]
48    SubtreeNotEmpty { pid: Principal, size: usize },
49
50    #[error("module hash mismatch for {0}")]
51    ModuleHashMismatch(Principal),
52
53    #[error("app directory diverged from registry")]
54    AppDirectoryDiverged,
55
56    #[error("subnet directory diverged from registry")]
57    SubnetDirectoryDiverged,
58
59    #[error("canister {0} unexpectedly present in pool")]
60    InPool(Principal),
61
62    #[error("expected canister {0} to be in pool")]
63    NotInPool(Principal),
64
65    #[error("cannot perform init-based install for root canister {0}")]
66    RootInitNotSupported(Principal),
67
68    #[error("cannot build init payload for {0}: missing parent pid")]
69    MissingParentPid(Principal),
70}
71
72impl From<OrchestratorError> for Error {
73    fn from(err: OrchestratorError) -> Self {
74        WorkflowError::from(err).into()
75    }
76}
77
78pub enum LifecycleEvent {
79    Create {
80        role: CanisterRole,
81        parent: Principal,
82        extra_arg: Option<Vec<u8>>,
83    },
84    Delete {
85        pid: Principal,
86    },
87    Upgrade {
88        pid: Principal,
89    },
90    Reinstall {
91        pid: Principal,
92    },
93
94    /// Adopt a pool canister into topology under `parent`.
95    /// Pool export is a handoff; this event performs the attach + install.
96    AdoptPool {
97        pid: Principal,
98        parent: Principal,
99        extra_arg: Option<Vec<u8>>,
100    },
101
102    RecycleToPool {
103        pid: Principal,
104    },
105}
106
107#[derive(Default)]
108pub struct LifecycleResult {
109    pub new_canister_pid: Option<Principal>,
110    pub cascaded_topology: bool,
111    pub cascaded_directories: bool,
112}
113
114impl LifecycleResult {
115    #[must_use]
116    pub const fn created(pid: Principal) -> Self {
117        Self {
118            new_canister_pid: Some(pid),
119            cascaded_topology: true,
120            cascaded_directories: true,
121        }
122    }
123}
124
125pub struct CanisterLifecycleOrchestrator;
126
127impl CanisterLifecycleOrchestrator {
128    pub async fn apply(event: LifecycleEvent) -> Result<LifecycleResult, Error> {
129        let root_pid = canister_self();
130
131        match event {
132            // -----------------------------------------------------------------
133            // CREATE
134            // -----------------------------------------------------------------
135            LifecycleEvent::Create {
136                role,
137                parent,
138                extra_arg,
139            } => Self::apply_create(role, parent, extra_arg).await,
140
141            // -----------------------------------------------------------------
142            // DELETE (leaf-only)
143            // -----------------------------------------------------------------
144            LifecycleEvent::Delete { pid } => Self::apply_delete(pid, root_pid).await,
145
146            // -----------------------------------------------------------------
147            // UPGRADE
148            // -----------------------------------------------------------------
149            LifecycleEvent::Upgrade { pid } => Self::apply_upgrade(pid).await,
150
151            // -----------------------------------------------------------------
152            // REINSTALL
153            // -----------------------------------------------------------------
154            LifecycleEvent::Reinstall { pid } => Self::apply_reinstall(pid).await,
155
156            // -----------------------------------------------------------------
157            // ADOPT FROM POOL
158            // -----------------------------------------------------------------
159            LifecycleEvent::AdoptPool {
160                pid,
161                parent,
162                extra_arg,
163            } => Self::apply_adopt_pool(pid, parent, extra_arg).await,
164            // -----------------------------------------------------------------
165            // RECYCLE INTO POOL
166            // -----------------------------------------------------------------
167            LifecycleEvent::RecycleToPool { pid } => {
168                Self::apply_recycle_to_pool(pid, root_pid).await
169            }
170        }
171    }
172
173    async fn apply_create(
174        role: CanisterRole,
175        parent: Principal,
176        extra_arg: Option<Vec<u8>>,
177    ) -> Result<LifecycleResult, Error> {
178        assert_parent_exists(parent)?;
179
180        let pid = create_and_install_canister(&role, parent, extra_arg).await?;
181
182        assert_immediate_parent(pid, parent)?;
183        assert_not_in_pool(pid)?;
184
185        cascade_all(Some(&role), Some(pid)).await?;
186
187        Ok(LifecycleResult::created(pid))
188    }
189
190    async fn apply_delete(pid: Principal, root_pid: Principal) -> Result<LifecycleResult, Error> {
191        assert_no_children(pid)?;
192
193        // Snapshot BEFORE destructive delete.
194        let snap = snapshot_topology_required(pid)?;
195
196        delete_canister(pid).await?;
197
198        let topology_target = snap.parent_pid.filter(|p| *p != root_pid);
199        cascade_all(Some(&snap.role), topology_target).await?;
200
201        Ok(LifecycleResult {
202            new_canister_pid: None,
203            cascaded_topology: topology_target.is_some(),
204            cascaded_directories: true,
205        })
206    }
207
208    async fn apply_upgrade(pid: Principal) -> Result<LifecycleResult, Error> {
209        let entry =
210            SubnetRegistryOps::get(pid).ok_or(OrchestratorError::RegistryEntryMissing(pid))?;
211
212        let wasm = WasmOps::try_get(&entry.role)?;
213
214        if let Some(parent_pid) = entry.parent_pid {
215            assert_parent_exists(parent_pid)?;
216            assert_immediate_parent(pid, parent_pid)?;
217        }
218        assert_not_in_pool(pid)?;
219
220        upgrade_canister(pid, wasm.bytes()).await?;
221        SubnetRegistryOps::update_module_hash(pid, wasm.module_hash());
222        assert_module_hash(pid, wasm.module_hash())?;
223
224        Ok(LifecycleResult::default())
225    }
226
227    async fn apply_reinstall(pid: Principal) -> Result<LifecycleResult, Error> {
228        let entry =
229            SubnetRegistryOps::get(pid).ok_or(OrchestratorError::RegistryEntryMissing(pid))?;
230
231        if entry.role == CanisterRole::ROOT {
232            return Err(OrchestratorError::RootInitNotSupported(pid).into());
233        }
234
235        let wasm = WasmOps::try_get(&entry.role)?;
236
237        let parent_pid = entry
238            .parent_pid
239            .ok_or(OrchestratorError::MissingParentPid(pid))?;
240        assert_parent_exists(parent_pid)?;
241        assert_immediate_parent(pid, parent_pid)?;
242        assert_not_in_pool(pid)?;
243
244        let payload = build_nonroot_init_payload(&entry.role, parent_pid)?;
245        install_code_with_extra_arg(
246            CanisterInstallMode::Reinstall,
247            pid,
248            wasm.bytes(),
249            payload,
250            None,
251        )
252        .await?;
253        SubnetRegistryOps::update_module_hash(pid, wasm.module_hash());
254        assert_module_hash(pid, wasm.module_hash())?;
255
256        Ok(LifecycleResult::default())
257    }
258
259    async fn apply_adopt_pool(
260        pid: Principal,
261        parent: Principal,
262        extra_arg: Option<Vec<u8>>,
263    ) -> Result<LifecycleResult, Error> {
264        // Must currently be in pool
265        assert_in_pool(pid)?;
266        assert_parent_exists(parent)?;
267
268        // Export metadata from pool (handoff)
269        let (role, stored_hash) = pool_export_canister(pid).await?;
270
271        // No longer in pool
272        assert_not_in_pool(pid)?;
273
274        if role == CanisterRole::ROOT {
275            try_return_to_pool(pid, "adopt_pool role=ROOT").await;
276            return Err(OrchestratorError::RootInitNotSupported(pid).into());
277        }
278
279        let wasm = WasmOps::try_get(&role)?;
280
281        // Validate module hash matches what pool expected (defensive)
282        if wasm.module_hash() != stored_hash {
283            try_return_to_pool(pid, "adopt_pool module hash mismatch").await;
284            return Err(OrchestratorError::ModuleHashMismatch(pid).into());
285        }
286
287        // Attach before install so init hooks can observe the registry; roll back on failure.
288        if let Err(err) = SubnetRegistryOps::register(pid, &role, parent, stored_hash) {
289            try_return_to_pool(pid, "adopt_pool register failed").await;
290            return Err(err);
291        }
292
293        let payload = build_nonroot_init_payload(&role, parent)?;
294        if let Err(err) = install_code_with_extra_arg(
295            CanisterInstallMode::Install,
296            pid,
297            wasm.bytes(),
298            payload,
299            extra_arg,
300        )
301        .await
302        {
303            let _ = SubnetRegistryOps::remove(&pid);
304            try_return_to_pool(pid, "adopt_pool install failed").await;
305            return Err(err);
306        }
307
308        // Postconditions
309        assert_immediate_parent(pid, parent)?;
310
311        // Targeted cascade on the newly adopted canister
312        cascade_all(Some(&role), Some(pid)).await?;
313
314        Ok(LifecycleResult {
315            new_canister_pid: None,
316            cascaded_topology: true,
317            cascaded_directories: true,
318        })
319    }
320
321    async fn apply_recycle_to_pool(
322        pid: Principal,
323        root_pid: Principal,
324    ) -> Result<LifecycleResult, Error> {
325        // Snapshot BEFORE destruction. If it wasn't in registry, that's a bug.
326        let snap = snapshot_topology_required(pid)?;
327
328        pool_recycle_canister(pid).await?;
329
330        let topology_target = snap.parent_pid.filter(|p| *p != root_pid);
331        cascade_all(Some(&snap.role), topology_target).await?;
332
333        Ok(LifecycleResult {
334            new_canister_pid: None,
335            cascaded_topology: topology_target.is_some(),
336            cascaded_directories: true,
337        })
338    }
339}
340
341//
342// Topology snapshotting: single source of parent/role for destructive operations.
343//
344
345struct TopologySnapshot {
346    role: CanisterRole,
347    parent_pid: Option<Principal>,
348}
349
350fn snapshot_topology_required(pid: Principal) -> Result<TopologySnapshot, OrchestratorError> {
351    let entry = SubnetRegistryOps::get(pid).ok_or(OrchestratorError::RegistryEntryMissing(pid))?;
352
353    Ok(TopologySnapshot {
354        role: entry.role,
355        parent_pid: entry.parent_pid,
356    })
357}
358
359//
360// Cascades
361//
362
363async fn cascade_all(
364    role_opt: Option<&CanisterRole>,
365    topology_target: Option<Principal>,
366) -> Result<(), Error> {
367    if let Some(target) = topology_target {
368        root_cascade_topology_for_pid(target).await?;
369    }
370
371    if let Some(role) = role_opt {
372        // Ensure newly created/adopted canisters inherit the current app state.
373        let snapshot = rebuild_directories_from_registry(Some(role))
374            .await?
375            .with_app_state()
376            .build();
377        root_cascade_state(&snapshot).await?;
378        assert_directories_match_registry()?;
379    }
380
381    Ok(())
382}
383
384//
385// Invariants
386//
387
388fn assert_parent_exists(parent_pid: Principal) -> Result<(), OrchestratorError> {
389    SubnetRegistryOps::get(parent_pid).ok_or(OrchestratorError::ParentNotFound(parent_pid))?;
390    Ok(())
391}
392
393fn assert_no_children(pid: Principal) -> Result<(), OrchestratorError> {
394    let subtree = SubnetRegistryOps::subtree(pid);
395    if subtree.len() > 1 {
396        return Err(OrchestratorError::SubtreeNotEmpty {
397            pid,
398            size: subtree.len(),
399        });
400    }
401    Ok(())
402}
403
404fn assert_module_hash(pid: Principal, expected_hash: Vec<u8>) -> Result<(), OrchestratorError> {
405    let entry = SubnetRegistryOps::get(pid).ok_or(OrchestratorError::RegistryEntryMissing(pid))?;
406    if entry.module_hash == Some(expected_hash) {
407        Ok(())
408    } else {
409        Err(OrchestratorError::ModuleHashMismatch(pid))
410    }
411}
412
413fn assert_directories_match_registry() -> Result<(), Error> {
414    let app_built = RootAppDirectoryBuilder::build_from_registry();
415    let app_exported = AppDirectoryOps::export();
416
417    if app_built != app_exported {
418        return Err(OrchestratorError::AppDirectoryDiverged.into());
419    }
420
421    let subnet_built = RootSubnetDirectoryBuilder::build_from_registry();
422    let subnet_exported = SubnetDirectoryOps::export();
423
424    if subnet_built != subnet_exported {
425        return Err(OrchestratorError::SubnetDirectoryDiverged.into());
426    }
427
428    Ok(())
429}
430
431fn assert_not_in_pool(pid: Principal) -> Result<(), OrchestratorError> {
432    if PoolOps::contains(&pid) {
433        Err(OrchestratorError::InPool(pid))
434    } else {
435        Ok(())
436    }
437}
438
439fn assert_in_pool(pid: Principal) -> Result<(), OrchestratorError> {
440    if PoolOps::contains(&pid) {
441        Ok(())
442    } else {
443        Err(OrchestratorError::NotInPool(pid))
444    }
445}
446
447fn assert_immediate_parent(
448    pid: Principal,
449    expected_parent: Principal,
450) -> Result<(), OrchestratorError> {
451    let entry = SubnetRegistryOps::get(pid).ok_or(OrchestratorError::RegistryEntryMissing(pid))?;
452
453    match entry.parent_pid {
454        Some(pp) if pp == expected_parent => Ok(()),
455        other => Err(OrchestratorError::ImmediateParentMismatch {
456            pid,
457            expected: expected_parent,
458            found: other,
459        }),
460    }
461}
462
463async fn try_return_to_pool(pid: Principal, context: &str) {
464    if let Err(err) = pool_import_canister(pid).await {
465        log!(
466            Topic::CanisterLifecycle,
467            Warn,
468            "failed to return {pid} to pool after {context}: {err}"
469        );
470    }
471}