Skip to main content

koi_certmesh/
core_enroll.rs

1//! Enrollment: process member joins and self-enroll the CA node.
2//!
3//! Part of the inherent impl CertmeshCore, split from lib.rs (certmesh M2).
4//! As a child module of the crate root, 'use super::*' inherits lib.rs's
5//! imports, sibling modules, and crate-private state/helpers as in the original.
6use super::*;
7
8impl CertmeshCore {
9    /// Process an enrollment request. Returns the join response on success.
10    ///
11    /// The joining machine’s hostname comes from the request - not from
12    /// `hostname::get()` which would return the CA server’s hostname.
13    pub async fn enroll(
14        &self,
15        request: &protocol::JoinRequest,
16    ) -> Result<protocol::JoinResponse, CertmeshError> {
17        let hostname = &request.hostname;
18        validate_hostname(hostname)?;
19        // Default SANs: hostname + hostname.local, plus any extras the joiner sent.
20        // Every extra SAN is validated (F15): IP literals pass through; everything
21        // else must be a valid RFC 1123 hostname — so a joiner can't slip a wildcard
22        // or junk DNS name into its cert. Capped to bound the SAN list.
23        const MAX_EXTRA_SANS: usize = 16;
24        let mut sans = vec![hostname.clone(), format!("{hostname}.local")];
25        for extra in request.sans.iter().take(MAX_EXTRA_SANS) {
26            if extra.parse::<std::net::IpAddr>().is_err() {
27                validate_hostname(extra)?;
28            }
29            if !sans.contains(extra) {
30                sans.push(extra.clone());
31            }
32        }
33
34        let ca_guard = self.state.ca.lock().await;
35        let ca = ca_guard.as_ref().ok_or_else(|| {
36            if self.state.paths.is_ca_initialized() {
37                CertmeshError::CaLocked
38            } else {
39                CertmeshError::CaNotInitialized
40            }
41        })?;
42
43        let roster = self.state.roster.lock().await;
44        let auth_guard = self.state.auth.lock().await;
45        // The enrollment auth credential may be absent when the CA was unlocked
46        // via a non-passphrase slot (TOTP/auto-unlock master key). Invite-token
47        // enrollment (ADR-015 F2) does not need it; the TOTP branch inside
48        // `process_enrollment` fails closed (CaLocked) when it does.
49        let auth_state = auth_guard.as_ref();
50        let challenge_guard = self.state.pending_challenge.lock().await;
51        let challenge = challenge_guard
52            .as_ref()
53            .cloned()
54            .unwrap_or(koi_crypto::auth::AuthChallenge::Totp);
55        let mut rate_limiter = self.state.rate_limiter.lock().await;
56        let requires_approval = roster.requires_approval();
57        let fallback_operator = roster.metadata.operator.clone();
58        drop(roster);
59
60        let approved_by = if requires_approval {
61            request_approval(&self.state, hostname, requires_approval).await?
62        } else {
63            fallback_operator
64        };
65
66        // Single-writer commit (ADR-017 F8): process_enrollment validates, signs,
67        // and pushes the member under the lock; commit_roster bumps `seq` and
68        // persists atomically. On any error it errors *before* mutating, so the
69        // roster is left unchanged and nothing is persisted.
70        let result = self
71            .state
72            .commit_roster(|roster| {
73                enrollment::process_enrollment(
74                    ca,
75                    roster,
76                    auth_state,
77                    &challenge,
78                    &mut rate_limiter,
79                    request,
80                    hostname,
81                    &sans,
82                    approved_by,
83                    &self.state.paths,
84                )
85            })
86            .await;
87
88        // Persist the rate-limiter regardless of outcome (ADR-017 F7): a failed
89        // TOTP attempt advances a lockout that must survive a restart. Snapshot +
90        // drop the guard before the blocking write (no lock held across I/O).
91        // Invite joins never consult the limiter, so this is a no-op for them.
92        let limiter_snapshot = rate_limiter.clone();
93        drop(rate_limiter);
94        if let Err(e) = persist_rate_limiter(&self.state.paths, &limiter_snapshot) {
95            tracing::warn!(error = %e, "Could not persist rate-limiter state");
96        }
97
98        let (response, _issued) = result?;
99
100        let _ = self.state.event_tx.send(CertmeshEvent::MemberJoined {
101            hostname: response.hostname.clone(),
102            fingerprint: response.ca_fingerprint.clone(),
103        });
104
105        Ok(response)
106    }
107
108    /// Self-enroll the daemon as a certmesh member.
109    ///
110    /// Called automatically after CA creation (and on every daemon start) to get
111    /// the server leaf the mTLS + ACME listeners use. This is the **one** issuance
112    /// path that key-gens on the CA (the CA's own identity, [`ca::issue_certificate`]
113    /// — ADR-017 P3); member leaves only ever come from a member CSR.
114    ///
115    /// Idempotent **except** when the on-disk leaf is within the CA policy's
116    /// `renew_threshold_days`: then it re-issues, so a restart refreshes the
117    /// listener cert (the CA self-renews — no live mTLS reload yet; the restart is
118    /// the reload point).
119    pub async fn self_enroll(&self) -> Result<SelfEnrollment, CertmeshError> {
120        let hostname = hostname::get()
121            .ok()
122            .and_then(|os| os.into_string().ok())
123            .unwrap_or_else(|| "unknown".to_string());
124
125        // Validate hostname before using as certificate SAN (RFC 1123, F15).
126        validate_hostname(&hostname)?;
127
128        let sans = vec![
129            hostname.clone(),
130            format!("{hostname}.local"),
131            "localhost".to_string(),
132            "127.0.0.1".to_string(),
133        ];
134
135        // Read the CA-held policy (self-leaf lifetime + restart-renewal threshold).
136        let policy = {
137            let roster = self.state.roster.lock().await;
138            roster.metadata.policy.clone()
139        };
140
141        // The self leaf always lives at certs_dir()/<hostname> — derived, not read
142        // from the roster (cert_path is no longer persisted, F13). Reuse the on-disk
143        // leaf unless it is within the renewal threshold.
144        {
145            let cert_dir = self.state.paths.certs_dir().join(&hostname);
146            let on_disk = (
147                std::fs::read_to_string(cert_dir.join("cert.pem")).ok(),
148                std::fs::read_to_string(cert_dir.join("key.pem")).ok(),
149            );
150            if let (Some(cert_pem), Some(key_pem)) = on_disk {
151                let due = leaf_not_after_utc(&cert_pem)
152                    .map(|na| {
153                        chrono::Utc::now()
154                            + chrono::Duration::days(i64::from(policy.renew_threshold_days))
155                            >= na
156                    })
157                    .unwrap_or(true); // unparseable → re-issue to be safe
158                if !due {
159                    let ca_guard = self.state.ca.lock().await;
160                    let ca = ca_guard.as_ref().ok_or_else(|| {
161                        if self.state.paths.is_ca_initialized() {
162                            CertmeshError::CaLocked
163                        } else {
164                            CertmeshError::CaNotInitialized
165                        }
166                    })?;
167                    let ca_cert_pem = ca.cert_pem.clone();
168                    drop(ca_guard);
169                    tracing::debug!(hostname = %hostname, "already self-enrolled, reusing existing cert");
170                    return Ok(SelfEnrollment {
171                        cert_pem,
172                        key_pem,
173                        ca_cert_pem,
174                    });
175                }
176                tracing::info!(hostname = %hostname, "CA self-cert within renewal threshold; re-issuing");
177            }
178        }
179
180        // Issue a fresh (or renewed) self leaf at the policy lifetime.
181        let ca_guard = self.state.ca.lock().await;
182        let ca = ca_guard.as_ref().ok_or_else(|| {
183            if self.state.paths.is_ca_initialized() {
184                CertmeshError::CaLocked
185            } else {
186                CertmeshError::CaNotInitialized
187            }
188        })?;
189        let issued = ca::issue_certificate(ca, &hostname, &sans, policy.leaf_lifetime_days)?;
190        let ca_cert_pem = ca.cert_pem.clone();
191        drop(ca_guard);
192
193        // Write cert files to the standard path (blocking I/O)
194        let cert_path = self.state.paths.certs_dir().join(&hostname);
195        let issued_clone = issued.clone();
196        let cert_dir = tokio::task::spawn_blocking(move || {
197            certfiles::write_cert_files_to(&cert_path, &issued_clone)
198        })
199        .await
200        .map_err(|e| CertmeshError::Internal(format!("cert write task: {e}")))??;
201
202        // Update the existing self entry in place, or insert as primary, then
203        // commit (ADR-017 F8). The update path covers both restart-renewal and
204        // concurrent self_enroll.
205        if let Err(e) = self
206            .state
207            .commit_roster(|roster| {
208                if let Some(member) = roster.find_member_mut(&hostname) {
209                    member.cert_fingerprint = issued.fingerprint.clone();
210                    member.cert_expires = issued.expires;
211                    member.cert_path = cert_dir.display().to_string();
212                } else {
213                    roster.members.push(roster::RosterMember {
214                        hostname: hostname.clone(),
215                        role: roster::MemberRole::Primary,
216                        enrolled_at: chrono::Utc::now(),
217                        enrolled_by: Some("self-enrollment".to_string()),
218                        cert_fingerprint: issued.fingerprint.clone(),
219                        cert_expires: issued.expires,
220                        cert_sans: sans.clone(),
221                        cert_path: cert_dir.display().to_string(),
222                        status: roster::MemberStatus::Active,
223                        reload_hook: None,
224                        last_seen: Some(chrono::Utc::now()),
225                        pinned_ca_fingerprint: None,
226                        proxy_entries: Vec::new(),
227                    });
228                }
229                Ok(())
230            })
231            .await
232        {
233            tracing::warn!(error = %e, "Failed to save roster after self-enrollment");
234        }
235
236        tracing::info!(hostname = %hostname, "Daemon self-enrolled as certmesh member");
237
238        // Audit the self-enroll issuance (ADR-017 F14) — the one issuance path that
239        // key-gens on the CA must leave a trail like any other trust decision.
240        let _ = audit::append_entry_to(
241            &self.state.paths.audit_log_path(),
242            "self_enroll",
243            &[
244                ("hostname", hostname.as_str()),
245                ("fingerprint", issued.fingerprint.as_str()),
246            ],
247        );
248
249        let _ = self.state.event_tx.send(CertmeshEvent::MemberJoined {
250            hostname,
251            fingerprint: issued.fingerprint,
252        });
253
254        // A leaf is now on disk → posture may have flipped Open→Authenticated.
255        self.state.republish_posture();
256
257        Ok(SelfEnrollment {
258            cert_pem: issued.cert_pem,
259            key_pem: issued.key_pem,
260            ca_cert_pem,
261        })
262    }
263}