chio_http_core/emergency.rs
1//! Phase 1.4 emergency kill-switch HTTP surface.
2//!
3//! This module is intentionally substrate-agnostic -- `chio-http-core`
4//! does not embed an HTTP server. It exposes:
5//!
6//! - Route constants used by every substrate adapter
7//! (`chio-tower`, `chio-api-protect`, hosted sidecars).
8//! - Request/response DTOs that serialize into the wire shapes
9//! documented in `STRUCTURAL-SECURITY-FIXES.md` section 5.4.
10//! - Pure handler functions that take parsed inputs and return a
11//! structured response. Each substrate adapter calls the handler
12//! from its own framework route, preserving framework-native
13//! streaming, tracing, and error-mapping behavior.
14//!
15//! Authentication: the handlers require an `X-Admin-Token` header
16//! whose value matches the string configured on [`EmergencyAdmin`].
17//! No new middleware layer is introduced. Adapters that already have
18//! their own auth middleware can either pass the caller's bearer
19//! token through as the admin token (when configured that way) or
20//! short-circuit the `expected_admin_token` check.
21
22use std::sync::Arc;
23
24use chio_kernel::{ChioKernel, KernelError};
25use serde::{Deserialize, Serialize};
26
27use crate::routes::{
28 EMERGENCY_ADMIN_TOKEN_HEADER, EMERGENCY_RESUME_PATH, EMERGENCY_STATUS_PATH, EMERGENCY_STOP_PATH,
29};
30
31/// Canonical JSON body for `POST /emergency-stop`.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct EmergencyStopRequest {
34 /// Operator-supplied rationale for the kill switch. Recorded on
35 /// the kernel and surfaced via `/emergency-status` so runbooks
36 /// can correlate the halt with an incident.
37 pub reason: String,
38}
39
40/// Wire response for `POST /emergency-stop`.
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct EmergencyStopResponse {
43 /// Always `true` for a successful stop.
44 pub stopped: bool,
45}
46
47/// Wire response for `POST /emergency-resume`.
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct EmergencyResumeResponse {
50 /// Always `false` for a successful resume.
51 pub stopped: bool,
52}
53
54/// Wire response for `GET /emergency-status`.
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct EmergencyStatusResponse {
57 /// Whether the kill switch is currently engaged.
58 pub stopped: bool,
59
60 /// RFC 3339 / ISO 8601 timestamp of the stop. `None` when the
61 /// kernel has never been stopped or is currently resumed.
62 #[serde(default, skip_serializing_if = "Option::is_none")]
63 pub since: Option<String>,
64
65 /// Operator-supplied reason for the current stop. `None` when
66 /// the kernel is running normally.
67 #[serde(default, skip_serializing_if = "Option::is_none")]
68 pub reason: Option<String>,
69}
70
71/// Errors returned by the emergency handlers. Each variant maps
72/// cleanly onto an HTTP status code via [`EmergencyHandlerError::status`].
73#[derive(Debug, Clone, PartialEq, Eq)]
74pub enum EmergencyHandlerError {
75 /// `X-Admin-Token` header missing or does not match the configured value.
76 /// Returns HTTP 401 and a minimal JSON error body.
77 Unauthorized,
78
79 /// Request body could not be parsed as the expected JSON shape. The
80 /// operator supplied bad input; returns HTTP 400.
81 BadRequest(String),
82
83 /// Kernel-side failure while toggling the kill switch. Fail-closed:
84 /// the handler has already engaged the stop (see
85 /// [`handle_emergency_stop`]); this error just reports what went wrong
86 /// after the flag flipped. Returns HTTP 500.
87 Kernel(String),
88}
89
90impl EmergencyHandlerError {
91 /// HTTP status code for this error.
92 #[must_use]
93 pub fn status(&self) -> u16 {
94 match self {
95 Self::Unauthorized => 401,
96 Self::BadRequest(_) => 400,
97 Self::Kernel(_) => 500,
98 }
99 }
100
101 /// Stable error code string (snake_case) for machine-readable error
102 /// payloads. Adapters serialize `{ "error": "<code>", "message": ... }`.
103 #[must_use]
104 pub fn code(&self) -> &'static str {
105 match self {
106 Self::Unauthorized => "unauthorized",
107 Self::BadRequest(_) => "bad_request",
108 Self::Kernel(_) => "internal_error",
109 }
110 }
111
112 /// Human-readable message.
113 #[must_use]
114 pub fn message(&self) -> String {
115 match self {
116 Self::Unauthorized => "missing or invalid X-Admin-Token header".to_string(),
117 Self::BadRequest(reason) | Self::Kernel(reason) => reason.clone(),
118 }
119 }
120
121 /// Wire body for this error response.
122 #[must_use]
123 pub fn body(&self) -> serde_json::Value {
124 serde_json::json!({
125 "error": self.code(),
126 "message": self.message(),
127 })
128 }
129}
130
131impl From<KernelError> for EmergencyHandlerError {
132 fn from(error: KernelError) -> Self {
133 Self::Kernel(error.to_string())
134 }
135}
136
137/// Admin handle bound to a kernel and a configured admin token.
138///
139/// The handle is cheap to clone (`Arc<ChioKernel>` + short strings) and
140/// safe to share across threads. It holds the only reference to the
141/// kernel needed by the emergency endpoints, so substrate adapters can
142/// construct one `EmergencyAdmin` at startup and pass it to every
143/// route registration.
144#[derive(Clone)]
145pub struct EmergencyAdmin {
146 kernel: Arc<ChioKernel>,
147 expected_admin_token: String,
148}
149
150impl std::fmt::Debug for EmergencyAdmin {
151 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152 f.debug_struct("EmergencyAdmin")
153 .field("admin_token_len", &self.expected_admin_token.len())
154 .finish_non_exhaustive()
155 }
156}
157
158impl EmergencyAdmin {
159 /// Create a new admin handle. `expected_admin_token` must match the
160 /// value of the `X-Admin-Token` header on every incoming admin call.
161 /// The token is compared with `==`; adapters that need constant-time
162 /// comparison can wrap the check in their own middleware before
163 /// delegating to the handler.
164 #[must_use]
165 pub fn new(kernel: Arc<ChioKernel>, expected_admin_token: String) -> Self {
166 Self {
167 kernel,
168 expected_admin_token,
169 }
170 }
171
172 /// Shared kernel reference, primarily for tests and for adapters
173 /// that want to re-use the same `Arc<ChioKernel>` for other routes.
174 #[must_use]
175 pub fn kernel(&self) -> &Arc<ChioKernel> {
176 &self.kernel
177 }
178
179 fn authorize(&self, admin_token: Option<&str>) -> Result<(), EmergencyHandlerError> {
180 match admin_token {
181 Some(token) if token == self.expected_admin_token => Ok(()),
182 _ => Err(EmergencyHandlerError::Unauthorized),
183 }
184 }
185}
186
187/// Handler for `POST /emergency-stop`.
188///
189/// Fail-closed: if the token check passes, the kernel's `emergency_stop`
190/// is invoked immediately. If it returns an error, the flag has still
191/// been set (the kernel flips its atomic before any fallible step) so
192/// the system is left in the safer stopped state even when the caller
193/// sees a 500.
194pub fn handle_emergency_stop(
195 admin: &EmergencyAdmin,
196 admin_token: Option<&str>,
197 body: &[u8],
198) -> Result<EmergencyStopResponse, EmergencyHandlerError> {
199 admin.authorize(admin_token)?;
200
201 let parsed: EmergencyStopRequest = serde_json::from_slice(body).map_err(|error| {
202 EmergencyHandlerError::BadRequest(format!("invalid emergency-stop request body: {error}"))
203 })?;
204
205 admin.kernel.emergency_stop(&parsed.reason)?;
206
207 Ok(EmergencyStopResponse { stopped: true })
208}
209
210/// Handler for `POST /emergency-resume`.
211///
212/// Body is ignored (any bytes, including empty, are accepted) so
213/// adapters can keep wiring identical to `POST /emergency-stop`.
214pub fn handle_emergency_resume(
215 admin: &EmergencyAdmin,
216 admin_token: Option<&str>,
217 _body: &[u8],
218) -> Result<EmergencyResumeResponse, EmergencyHandlerError> {
219 admin.authorize(admin_token)?;
220 admin.kernel.emergency_resume()?;
221 Ok(EmergencyResumeResponse { stopped: false })
222}
223
224/// Handler for `GET /emergency-status`.
225pub fn handle_emergency_status(
226 admin: &EmergencyAdmin,
227 admin_token: Option<&str>,
228) -> Result<EmergencyStatusResponse, EmergencyHandlerError> {
229 admin.authorize(admin_token)?;
230
231 let stopped = admin.kernel.is_emergency_stopped();
232 let since = admin
233 .kernel
234 .emergency_stopped_since()
235 .and_then(|unix_secs| {
236 // i64 is what chrono expects; secs fit comfortably for any
237 // realistic operator timestamp.
238 let secs = i64::try_from(unix_secs).ok()?;
239 chrono::DateTime::<chrono::Utc>::from_timestamp(secs, 0).map(|dt| dt.to_rfc3339())
240 });
241 let reason = admin.kernel.emergency_stop_reason();
242
243 Ok(EmergencyStatusResponse {
244 stopped,
245 since,
246 reason,
247 })
248}
249
250// Path constants re-exported at module scope so adapters can write
251// `emergency::EMERGENCY_STOP_PATH`.
252pub use crate::routes::EMERGENCY_ADMIN_TOKEN_HEADER as ADMIN_TOKEN_HEADER;
253pub use crate::routes::EMERGENCY_RESUME_PATH as RESUME_PATH;
254pub use crate::routes::EMERGENCY_STATUS_PATH as STATUS_PATH;
255pub use crate::routes::EMERGENCY_STOP_PATH as STOP_PATH;
256
257// Internal compile-time sanity: the module-level re-exports above must
258// remain in sync with `routes::`. A `const _` guard catches drift if
259// someone renames either set.
260const _: &str = EMERGENCY_STOP_PATH;
261const _: &str = EMERGENCY_RESUME_PATH;
262const _: &str = EMERGENCY_STATUS_PATH;
263const _: &str = EMERGENCY_ADMIN_TOKEN_HEADER;