Skip to main content

koi_compose/
cores.rs

1//! Daemon core composition — the single place that constructs every domain core, wires the
2//! cross-domain integration bridges between them, spawns the domain background tasks
3//! (orchestrator + certmesh role loops), and tears it all down in order.
4//!
5//! Before P07 this graph was hand-written twice — in the binary's `daemon_mode` and again
6//! in the Windows service's `run_service` — and the two had already drifted into a verified
7//! `koi install` defect. [`build_cores`] is now the one copy both call, so the daemon they
8//! construct is identical by construction.
9//!
10//! The enrollment-approval pump is intentionally *not* spawned here: its decider differs by
11//! host (the foreground daemon prompts on stdin; consoleless hosts deny). The caller spawns
12//! it via [`crate::certmesh::spawn_enrollment_approval`].
13
14use std::path::Path;
15use std::sync::Arc;
16use std::time::Duration;
17
18use tokio::task::JoinHandle;
19use tokio_util::sync::CancellationToken;
20
21use koi_common::integration::{
22    AliasFeedback, CertmeshSnapshot, DnsProbe, MdnsSnapshot, ProxySnapshot,
23};
24
25/// The set of domain cores a daemon runs. Each is present only if its capability is enabled
26/// (via the `no_*` flags in [`CoreSpec`]).
27#[derive(Clone, Default)]
28pub struct Cores {
29    pub mdns: Option<Arc<koi_mdns::MdnsCore>>,
30    pub certmesh: Option<Arc<koi_certmesh::CertmeshCore>>,
31    pub dns: Option<Arc<koi_dns::DnsRuntime>>,
32    pub health: Option<Arc<koi_health::HealthRuntime>>,
33    pub proxy: Option<Arc<koi_proxy::ProxyRuntime>>,
34    pub udp: Option<Arc<koi_udp::UdpRuntime>>,
35    pub runtime: Option<Arc<koi_runtime::RuntimeCore>>,
36}
37
38/// Capability flags + inputs needed to build the cores. A daemon-`Config` subset, kept here
39/// (rather than depending on the binary's `Config`) so koi-compose stays standalone.
40pub struct CoreSpec {
41    pub no_mdns: bool,
42    pub no_certmesh: bool,
43    pub no_dns: bool,
44    pub no_health: bool,
45    pub no_proxy: bool,
46    pub no_udp: bool,
47    pub no_runtime: bool,
48    /// Data directory for certmesh state (resolved by the caller).
49    pub data_dir: std::path::PathBuf,
50    /// DNS configuration (the caller's resolved `DnsConfig`).
51    pub dns_config: koi_dns::DnsConfig,
52    /// Runtime backend selector string ("auto", "docker", "podman", …).
53    pub runtime: String,
54    /// HTTP port — used by the certmesh CA announcement / heartbeat loops.
55    pub http_port: u16,
56}
57
58/// Initialize the certmesh core, auto-unlocking from the vault when a key is present.
59///
60/// Always returns `Some` (so HTTP routes mount even before `koi certmesh create`):
61/// - CA not initialized → an uninitialized core (routes reachable for `/create`);
62/// - CA initialized + a vault auto-unlock key present → booted **already unlocked**,
63///   collapsing the old "create locked → read key → unlock" three-step;
64/// - CA initialized + no key (or decryption fails) → a locked core.
65///
66/// This is the converged single definition shared by the daemon, the Windows service, and
67/// koi-embedded (the daemon path thereby gains the vault auto-unlock embedded already had).
68pub fn init_certmesh_core(data_dir: Option<&Path>) -> Option<Arc<koi_certmesh::CertmeshCore>> {
69    // Composition root: resolve the data dir once (Some -> injected dir, None -> the one
70    // default) and thread it into every branch so a custom data_dir is honoured end-to-end,
71    // including the early returns.
72    let paths = koi_certmesh::CertmeshPaths::with_data_dir(
73        koi_common::paths::koi_data_dir_with_override(data_dir),
74    );
75    if !paths.is_ca_initialized() {
76        tracing::info!("Certmesh: CA not initialized - routes mounted for /create");
77        return Some(Arc::new(
78            koi_certmesh::CertmeshCore::uninitialized_with_paths(paths),
79        ));
80    }
81
82    let roster_path = paths.roster_path();
83    let roster = match koi_certmesh::roster::load_roster(&roster_path) {
84        Ok(r) => r,
85        Err(e) => {
86            tracing::warn!(error = %e, "Failed to load certmesh roster - using uninitialized state");
87            return Some(Arc::new(
88                koi_certmesh::CertmeshCore::uninitialized_with_paths(paths),
89            ));
90        }
91    };
92
93    let profile = roster.metadata.trust_profile;
94
95    // ── Auto-unlock at init: single source of truth ─────────────
96    // The auto-unlock passphrase lives in the koi-crypto vault (written by
97    // CertmeshCore::save_auto_unlock_key_at, which deletes any legacy plaintext file).
98    // Retrieve it through the domain crate so this boot path can never drift from where the
99    // key is actually stored. When a key is present, boot the core already unlocked.
100    if let Ok(Some(pp)) = koi_certmesh::CertmeshCore::read_auto_unlock_key(&paths) {
101        match koi_certmesh::ca::load_ca(&pp, &paths) {
102            Ok(ca_state) => {
103                // Reload roster (fresh copy for the new Arc)
104                if let Ok(fresh_roster) = koi_certmesh::roster::load_roster(&roster_path) {
105                    let auth_path = paths.auth_path();
106                    let auth = if auth_path.exists() {
107                        std::fs::read_to_string(&auth_path)
108                            .ok()
109                            .and_then(|json| {
110                                serde_json::from_str::<koi_crypto::auth::StoredAuth>(&json).ok()
111                            })
112                            .and_then(|stored| stored.unlock(&pp).ok())
113                    } else {
114                        None
115                    };
116
117                    tracing::info!("Certmesh CA auto-unlocked at init from vault");
118                    return Some(Arc::new(koi_certmesh::CertmeshCore::new_with_paths(
119                        ca_state,
120                        fresh_roster,
121                        auth,
122                        profile,
123                        paths,
124                    )));
125                }
126            }
127            Err(e) => {
128                tracing::warn!(
129                    error = %e,
130                    "Auto-unlock key exists in vault but CA decryption failed"
131                );
132            }
133        }
134    }
135
136    // No auto-unlock key - boot locked
137    tracing::info!("Certmesh: CA initialized (locked, use `koi certmesh unlock` to decrypt)");
138    let core = koi_certmesh::CertmeshCore::locked_with_paths(roster, profile, paths);
139    Some(Arc::new(core))
140}
141
142/// Build all domain cores + cross-domain bridges, then spawn the caller-invariant domain
143/// background tasks: the runtime orchestrator (when runtime is enabled) and the certmesh
144/// role loops (when certmesh is enabled). Returns the assembled [`Cores`].
145///
146/// The bridges are wired in dependency order: DNS consumes the mDNS/certmesh/alias bridges;
147/// health consumes the mDNS/DNS/certmesh/proxy bridges. Disabled capabilities pass `None`.
148pub async fn build_cores(
149    spec: &CoreSpec,
150    cancel: &CancellationToken,
151    tasks: &mut Vec<JoinHandle<()>>,
152) -> Cores {
153    // ── mDNS ──
154    let mdns_core = if !spec.no_mdns {
155        match koi_mdns::MdnsCore::with_cancel(cancel.clone()) {
156            Ok(core) => Some(Arc::new(core)),
157            Err(e) => {
158                tracing::error!(error = %e, "Failed to initialize mDNS core");
159                None
160            }
161        }
162    } else {
163        tracing::info!("mDNS capability: disabled");
164        None
165    };
166
167    // ── Certmesh ──
168    let certmesh_core = if !spec.no_certmesh {
169        init_certmesh_core(Some(&spec.data_dir))
170    } else {
171        tracing::info!("Certmesh capability: disabled");
172        None
173    };
174
175    // ── Integration bridges ──
176    // These wrap domain cores and implement cross-domain traits from koi_common::integration.
177    let mdns_bridge: Option<Arc<dyn MdnsSnapshot>> = if let Some(ref core) = mdns_core {
178        Some(crate::bridges::MdnsBridge::spawn(core.clone()).await)
179    } else {
180        None
181    };
182
183    let certmesh_bridge: Option<Arc<dyn CertmeshSnapshot>> = certmesh_core
184        .as_ref()
185        .map(|core| crate::bridges::CertmeshBridge::new(core.clone()) as Arc<dyn CertmeshSnapshot>);
186
187    let alias_feedback: Option<Arc<dyn AliasFeedback>> = certmesh_core.as_ref().map(|core| {
188        crate::bridges::AliasFeedbackBridge::new(core.clone()) as Arc<dyn AliasFeedback>
189    });
190
191    // ── DNS (consumes mdns + certmesh + alias bridges) ──
192    let dns_runtime = if !spec.no_dns {
193        let core = koi_dns::DnsCore::new(
194            spec.dns_config.clone(),
195            mdns_bridge.clone(),
196            certmesh_bridge.clone(),
197            alias_feedback,
198        )
199        .await;
200        match core {
201            Ok(core) => {
202                let runtime = Arc::new(koi_dns::DnsRuntime::new(core));
203                if let Err(e) = runtime.start().await {
204                    tracing::error!(error = %e, "Failed to start DNS server");
205                }
206                Some(runtime)
207            }
208            Err(e) => {
209                tracing::error!(error = %e, "Failed to initialize DNS core");
210                None
211            }
212        }
213    } else {
214        tracing::info!("DNS capability: disabled");
215        None
216    };
217
218    // ── Proxy ──
219    let proxy_runtime = if !spec.no_proxy {
220        match koi_proxy::ProxyCore::new() {
221            Ok(core) => {
222                let runtime = Arc::new(koi_proxy::ProxyRuntime::new(Arc::new(core)));
223                if let Err(e) = runtime.start_all().await {
224                    tracing::error!(error = %e, "Failed to start proxy listeners");
225                }
226                Some(runtime)
227            }
228            Err(e) => {
229                tracing::error!(error = %e, "Failed to initialize proxy core");
230                None
231            }
232        }
233    } else {
234        tracing::info!("Proxy capability: disabled");
235        None
236    };
237
238    let dns_bridge: Option<Arc<dyn DnsProbe>> = dns_runtime
239        .as_ref()
240        .map(|rt| crate::bridges::DnsBridge::new(rt.clone()) as Arc<dyn DnsProbe>);
241
242    let proxy_bridge: Option<Arc<dyn ProxySnapshot>> = proxy_runtime
243        .as_ref()
244        .map(|rt| crate::bridges::ProxyBridge::new(rt.core()) as Arc<dyn ProxySnapshot>);
245
246    // ── Health (consumes mdns + dns + certmesh + proxy bridges) ──
247    let health_runtime = if !spec.no_health {
248        let core = Arc::new(
249            koi_health::HealthCore::new(
250                mdns_bridge.clone(),
251                dns_bridge,
252                certmesh_bridge,
253                proxy_bridge,
254            )
255            .await,
256        );
257        let runtime = Arc::new(koi_health::HealthRuntime::new(core));
258        if let Err(e) = runtime.start().await {
259            tracing::error!(error = %e, "Failed to start health checks");
260        }
261        Some(runtime)
262    } else {
263        tracing::info!("Health capability: disabled");
264        None
265    };
266
267    // ── UDP ──
268    let udp_runtime = if !spec.no_udp {
269        Some(Arc::new(koi_udp::UdpRuntime::new(cancel.clone())))
270    } else {
271        tracing::info!("UDP capability: disabled");
272        None
273    };
274
275    // ── Runtime adapter ──
276    let runtime_core = if !spec.no_runtime {
277        let backend_kind = koi_runtime::RuntimeBackendKind::from_str_loose(&spec.runtime)
278            .unwrap_or_else(|| {
279                tracing::warn!(
280                    value = %spec.runtime,
281                    "Unknown runtime backend, falling back to auto"
282                );
283                koi_runtime::RuntimeBackendKind::Auto
284            });
285        let rt_config = koi_runtime::RuntimeConfig {
286            backend_kind,
287            socket_path: None,
288        };
289        let core = Arc::new(koi_runtime::RuntimeCore::new(rt_config));
290        match core.start_watching(cancel.clone()).await {
291            Ok(()) => Some(core),
292            Err(e) => {
293                tracing::warn!(error = %e, "Runtime adapter unavailable, continuing without it");
294                None
295            }
296        }
297    } else {
298        tracing::info!("Runtime capability: disabled");
299        None
300    };
301
302    // ── Runtime orchestrator ──
303    // Translates container lifecycle events into mDNS/DNS/health/proxy operations.
304    if let Some(ref rt) = runtime_core {
305        tasks.push(crate::orchestrator::spawn_orchestrator(
306            rt,
307            crate::orchestrator::OrchestrationTargets {
308                mdns: mdns_core.clone(),
309                dns: dns_runtime.clone(),
310                health: health_runtime.clone(),
311                proxy: proxy_runtime.clone(),
312            },
313            cancel.clone(),
314        ));
315    }
316
317    let cores = Cores {
318        mdns: mdns_core,
319        certmesh: certmesh_core,
320        dns: dns_runtime,
321        health: health_runtime,
322        proxy: proxy_runtime,
323        udp: udp_runtime,
324        runtime: runtime_core,
325    };
326
327    // ── Certmesh role background loops (caller-invariant) ──
328    // The approval pump is spawned by the caller (its decider differs by host).
329    if let Some(ref certmesh) = cores.certmesh {
330        crate::certmesh::spawn_certmesh_background_tasks(
331            certmesh,
332            cores.mdns.clone(),
333            spec.http_port,
334            cancel,
335            tasks,
336        );
337    }
338
339    tracing::debug!("Domain cores built");
340    cores
341}
342
343/// Ordered teardown: cancel → drain in-flight → join tasks → withdraw the HTTP mDNS
344/// announcement → core goodbye (mDNS, DNS, health, proxy, UDP). Bounded by `timeout`.
345pub async fn ordered_shutdown(
346    cancel: &CancellationToken,
347    tasks: Vec<JoinHandle<()>>,
348    cores: &Cores,
349    http_announce_id: Option<String>,
350    timeout: Duration,
351    drain: Duration,
352) {
353    let shutdown = async {
354        cancel.cancel();
355        tokio::time::sleep(drain).await;
356        for task in tasks {
357            let _ = task.await;
358        }
359        if let Some(ref id) = http_announce_id {
360            if let Some(ref core) = cores.mdns {
361                if let Err(e) = core.unregister(id) {
362                    tracing::warn!(error = %e, "Failed to withdraw HTTP mDNS announcement");
363                }
364            }
365        }
366        if let Some(ref core) = cores.mdns {
367            if let Err(e) = core.shutdown().await {
368                tracing::warn!(error = %e, "Error during mDNS shutdown");
369            }
370        }
371        if let Some(ref dns) = cores.dns {
372            dns.stop().await;
373        }
374        if let Some(ref health) = cores.health {
375            let _ = health.stop().await;
376        }
377        if let Some(ref proxy) = cores.proxy {
378            let _ = proxy.stop_all().await;
379        }
380        if let Some(ref udp) = cores.udp {
381            udp.shutdown().await;
382        }
383    };
384    if tokio::time::timeout(timeout, shutdown).await.is_err() {
385        tracing::warn!("Shutdown timed out after {:?} - forcing exit", timeout);
386    }
387}