Skip to main content

chio_http_core/
emergency.rs

1//! Phase 1.4 emergency kill-switch HTTP surface.
2//!
3//! This module is intentionally substrate-agnostic -- `chio-http-core`
4//! does not embed an HTTP server. It exposes:
5//!
6//! - Route constants used by every substrate adapter
7//!   (`chio-tower`, `chio-api-protect`, hosted sidecars).
8//! - Request/response DTOs that serialize into the wire shapes
9//!   documented in `STRUCTURAL-SECURITY-FIXES.md` section 5.4.
10//! - Pure handler functions that take parsed inputs and return a
11//!   structured response. Each substrate adapter calls the handler
12//!   from its own framework route, preserving framework-native
13//!   streaming, tracing, and error-mapping behavior.
14//!
15//! Authentication: the handlers require an `X-Admin-Token` header
16//! whose value matches the string configured on [`EmergencyAdmin`].
17//! No new middleware layer is introduced. Adapters that already have
18//! their own auth middleware can either pass the caller's bearer
19//! token through as the admin token (when configured that way) or
20//! short-circuit the `expected_admin_token` check.
21
22use std::sync::Arc;
23
24use chio_kernel::{ChioKernel, KernelError};
25use serde::{Deserialize, Serialize};
26
27use crate::routes::{
28    EMERGENCY_ADMIN_TOKEN_HEADER, EMERGENCY_RESUME_PATH, EMERGENCY_STATUS_PATH, EMERGENCY_STOP_PATH,
29};
30
31/// Canonical JSON body for `POST /emergency-stop`.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct EmergencyStopRequest {
34    /// Operator-supplied rationale for the kill switch. Recorded on
35    /// the kernel and surfaced via `/emergency-status` so runbooks
36    /// can correlate the halt with an incident.
37    pub reason: String,
38}
39
40/// Wire response for `POST /emergency-stop`.
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct EmergencyStopResponse {
43    /// Always `true` for a successful stop.
44    pub stopped: bool,
45}
46
47/// Wire response for `POST /emergency-resume`.
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct EmergencyResumeResponse {
50    /// Always `false` for a successful resume.
51    pub stopped: bool,
52}
53
54/// Wire response for `GET /emergency-status`.
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct EmergencyStatusResponse {
57    /// Whether the kill switch is currently engaged.
58    pub stopped: bool,
59
60    /// RFC 3339 / ISO 8601 timestamp of the stop. `None` when the
61    /// kernel has never been stopped or is currently resumed.
62    #[serde(default, skip_serializing_if = "Option::is_none")]
63    pub since: Option<String>,
64
65    /// Operator-supplied reason for the current stop. `None` when
66    /// the kernel is running normally.
67    #[serde(default, skip_serializing_if = "Option::is_none")]
68    pub reason: Option<String>,
69}
70
71/// Errors returned by the emergency handlers. Each variant maps
72/// cleanly onto an HTTP status code via [`EmergencyHandlerError::status`].
73#[derive(Debug, Clone, PartialEq, Eq)]
74pub enum EmergencyHandlerError {
75    /// `X-Admin-Token` header missing or does not match the configured value.
76    /// Returns HTTP 401 and a minimal JSON error body.
77    Unauthorized,
78
79    /// Request body could not be parsed as the expected JSON shape. The
80    /// operator supplied bad input; returns HTTP 400.
81    BadRequest(String),
82
83    /// Kernel-side failure while toggling the kill switch. Fail-closed:
84    /// the handler has already engaged the stop (see
85    /// [`handle_emergency_stop`]); this error just reports what went wrong
86    /// after the flag flipped. Returns HTTP 500.
87    Kernel(String),
88}
89
90impl EmergencyHandlerError {
91    /// HTTP status code for this error.
92    #[must_use]
93    pub fn status(&self) -> u16 {
94        match self {
95            Self::Unauthorized => 401,
96            Self::BadRequest(_) => 400,
97            Self::Kernel(_) => 500,
98        }
99    }
100
101    /// Stable error code string (snake_case) for machine-readable error
102    /// payloads. Adapters serialize `{ "error": "<code>", "message": ... }`.
103    #[must_use]
104    pub fn code(&self) -> &'static str {
105        match self {
106            Self::Unauthorized => "unauthorized",
107            Self::BadRequest(_) => "bad_request",
108            Self::Kernel(_) => "internal_error",
109        }
110    }
111
112    /// Human-readable message.
113    #[must_use]
114    pub fn message(&self) -> String {
115        match self {
116            Self::Unauthorized => "missing or invalid X-Admin-Token header".to_string(),
117            Self::BadRequest(reason) | Self::Kernel(reason) => reason.clone(),
118        }
119    }
120
121    /// Wire body for this error response.
122    #[must_use]
123    pub fn body(&self) -> serde_json::Value {
124        serde_json::json!({
125            "error": self.code(),
126            "message": self.message(),
127        })
128    }
129}
130
131impl From<KernelError> for EmergencyHandlerError {
132    fn from(error: KernelError) -> Self {
133        Self::Kernel(error.to_string())
134    }
135}
136
137/// Admin handle bound to a kernel and a configured admin token.
138///
139/// The handle is cheap to clone (`Arc<ChioKernel>` + short strings) and
140/// safe to share across threads. It holds the only reference to the
141/// kernel needed by the emergency endpoints, so substrate adapters can
142/// construct one `EmergencyAdmin` at startup and pass it to every
143/// route registration.
144#[derive(Clone)]
145pub struct EmergencyAdmin {
146    kernel: Arc<ChioKernel>,
147    expected_admin_token: String,
148}
149
150impl std::fmt::Debug for EmergencyAdmin {
151    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152        f.debug_struct("EmergencyAdmin")
153            .field("admin_token_len", &self.expected_admin_token.len())
154            .finish_non_exhaustive()
155    }
156}
157
158impl EmergencyAdmin {
159    /// Create a new admin handle. `expected_admin_token` must match the
160    /// value of the `X-Admin-Token` header on every incoming admin call.
161    /// The token is compared with `==`; adapters that need constant-time
162    /// comparison can wrap the check in their own middleware before
163    /// delegating to the handler.
164    #[must_use]
165    pub fn new(kernel: Arc<ChioKernel>, expected_admin_token: String) -> Self {
166        Self {
167            kernel,
168            expected_admin_token,
169        }
170    }
171
172    /// Shared kernel reference, primarily for tests and for adapters
173    /// that want to re-use the same `Arc<ChioKernel>` for other routes.
174    #[must_use]
175    pub fn kernel(&self) -> &Arc<ChioKernel> {
176        &self.kernel
177    }
178
179    fn authorize(&self, admin_token: Option<&str>) -> Result<(), EmergencyHandlerError> {
180        match admin_token {
181            Some(token) if token == self.expected_admin_token => Ok(()),
182            _ => Err(EmergencyHandlerError::Unauthorized),
183        }
184    }
185}
186
187/// Handler for `POST /emergency-stop`.
188///
189/// Fail-closed: if the token check passes, the kernel's `emergency_stop`
190/// is invoked immediately. If it returns an error, the flag has still
191/// been set (the kernel flips its atomic before any fallible step) so
192/// the system is left in the safer stopped state even when the caller
193/// sees a 500.
194pub fn handle_emergency_stop(
195    admin: &EmergencyAdmin,
196    admin_token: Option<&str>,
197    body: &[u8],
198) -> Result<EmergencyStopResponse, EmergencyHandlerError> {
199    admin.authorize(admin_token)?;
200
201    let parsed: EmergencyStopRequest = serde_json::from_slice(body).map_err(|error| {
202        EmergencyHandlerError::BadRequest(format!("invalid emergency-stop request body: {error}"))
203    })?;
204
205    admin.kernel.emergency_stop(&parsed.reason)?;
206
207    Ok(EmergencyStopResponse { stopped: true })
208}
209
210/// Handler for `POST /emergency-resume`.
211///
212/// Body is ignored (any bytes, including empty, are accepted) so
213/// adapters can keep wiring identical to `POST /emergency-stop`.
214pub fn handle_emergency_resume(
215    admin: &EmergencyAdmin,
216    admin_token: Option<&str>,
217    _body: &[u8],
218) -> Result<EmergencyResumeResponse, EmergencyHandlerError> {
219    admin.authorize(admin_token)?;
220    admin.kernel.emergency_resume()?;
221    Ok(EmergencyResumeResponse { stopped: false })
222}
223
224/// Handler for `GET /emergency-status`.
225pub fn handle_emergency_status(
226    admin: &EmergencyAdmin,
227    admin_token: Option<&str>,
228) -> Result<EmergencyStatusResponse, EmergencyHandlerError> {
229    admin.authorize(admin_token)?;
230
231    let stopped = admin.kernel.is_emergency_stopped();
232    let since = admin
233        .kernel
234        .emergency_stopped_since()
235        .and_then(|unix_secs| {
236            // i64 is what chrono expects; secs fit comfortably for any
237            // realistic operator timestamp.
238            let secs = i64::try_from(unix_secs).ok()?;
239            chrono::DateTime::<chrono::Utc>::from_timestamp(secs, 0).map(|dt| dt.to_rfc3339())
240        });
241    let reason = admin.kernel.emergency_stop_reason();
242
243    Ok(EmergencyStatusResponse {
244        stopped,
245        since,
246        reason,
247    })
248}
249
250// Path constants re-exported at module scope so adapters can write
251// `emergency::EMERGENCY_STOP_PATH`.
252pub use crate::routes::EMERGENCY_ADMIN_TOKEN_HEADER as ADMIN_TOKEN_HEADER;
253pub use crate::routes::EMERGENCY_RESUME_PATH as RESUME_PATH;
254pub use crate::routes::EMERGENCY_STATUS_PATH as STATUS_PATH;
255pub use crate::routes::EMERGENCY_STOP_PATH as STOP_PATH;
256
257// Internal compile-time sanity: the module-level re-exports above must
258// remain in sync with `routes::`. A `const _` guard catches drift if
259// someone renames either set.
260const _: &str = EMERGENCY_STOP_PATH;
261const _: &str = EMERGENCY_RESUME_PATH;
262const _: &str = EMERGENCY_STATUS_PATH;
263const _: &str = EMERGENCY_ADMIN_TOKEN_HEADER;