Skip to main content

cellos_server/
error.rs

1//! RFC 9457 Problem Details for HTTP APIs.
2//!
3//! Every error path in the server returns `application/problem+json` so
4//! that `cellctl` (and the web UI) can render structured diagnostics
5//! without parsing free-form strings. The `type` field is a stable
6//! identifier — clients may switch on it; the `title`/`detail` fields are
7//! human-readable and may change.
8
9use axum::body::{to_bytes, Body};
10use axum::http::{header, HeaderValue, StatusCode};
11use axum::response::{IntoResponse, Response};
12use axum::Json;
13use serde::Serialize;
14
15/// Stable error identifier. Adding a variant is a non-breaking change;
16/// renaming one IS breaking (clients pin on `type`).
17#[derive(Debug, Clone, Copy)]
18pub enum AppErrorKind {
19    Unauthorized,
20    BadRequest,
21    NotFound,
22    Conflict,
23    Internal,
24    /// FUZZ-WAVE-1 MED-1: axum's built-in extractors (Json, Path, Query)
25    /// reject malformed input with `text/plain`. We catch those rejections
26    /// in the response-mapping middleware and re-emit them as
27    /// problem+json under these stable `type` URIs.
28    PayloadTooLarge,
29    UnsupportedMediaType,
30    MethodNotAllowed,
31    /// FUZZ-CRIT-1: the upstream event store (JetStream/NATS) is
32    /// unreachable or timing out. Distinct from `Internal` because the
33    /// HTTP control plane itself is healthy — only the data tier behind
34    /// `/v1/events` is degraded.
35    ServiceUnavailable,
36    /// Discriminants from ADR-0010 §Enforcement: cellos-server admission
37    /// gate rejection reasons. Surfaced via `application/problem+json`
38    /// so cellctl can switch on `type` without parsing `detail`.
39    FormationCycle,
40    FormationMultipleCoordinators,
41    FormationNoCoordinator,
42    FormationAuthorityNotNarrowing,
43}
44
45impl AppErrorKind {
46    pub fn status(self) -> StatusCode {
47        match self {
48            AppErrorKind::Unauthorized => StatusCode::UNAUTHORIZED,
49            AppErrorKind::BadRequest
50            | AppErrorKind::FormationCycle
51            | AppErrorKind::FormationMultipleCoordinators
52            | AppErrorKind::FormationNoCoordinator
53            | AppErrorKind::FormationAuthorityNotNarrowing => StatusCode::BAD_REQUEST,
54            AppErrorKind::NotFound => StatusCode::NOT_FOUND,
55            AppErrorKind::MethodNotAllowed => StatusCode::METHOD_NOT_ALLOWED,
56            AppErrorKind::Conflict => StatusCode::CONFLICT,
57            AppErrorKind::PayloadTooLarge => StatusCode::PAYLOAD_TOO_LARGE,
58            AppErrorKind::UnsupportedMediaType => StatusCode::UNSUPPORTED_MEDIA_TYPE,
59            AppErrorKind::Internal => StatusCode::INTERNAL_SERVER_ERROR,
60            AppErrorKind::ServiceUnavailable => StatusCode::SERVICE_UNAVAILABLE,
61        }
62    }
63
64    /// `type` URI identifier per RFC 9457 §3.1. We use relative URI
65    /// references rooted at `/problems/` so the server's deployment URL
66    /// does not affect the stable identifier.
67    pub fn type_uri(self) -> &'static str {
68        match self {
69            AppErrorKind::Unauthorized => "/problems/unauthorized",
70            AppErrorKind::BadRequest => "/problems/bad-request",
71            AppErrorKind::NotFound => "/problems/not-found",
72            AppErrorKind::Conflict => "/problems/conflict",
73            AppErrorKind::Internal => "/problems/internal",
74            AppErrorKind::PayloadTooLarge => "/problems/payload-too-large",
75            AppErrorKind::UnsupportedMediaType => "/problems/unsupported-media-type",
76            AppErrorKind::MethodNotAllowed => "/problems/method-not-allowed",
77            AppErrorKind::ServiceUnavailable => "/problems/service-unavailable",
78            AppErrorKind::FormationCycle => "/problems/formation/cycle",
79            AppErrorKind::FormationMultipleCoordinators => {
80                "/problems/formation/multiple-coordinators"
81            }
82            AppErrorKind::FormationNoCoordinator => "/problems/formation/no-coordinator",
83            AppErrorKind::FormationAuthorityNotNarrowing => {
84                "/problems/formation/authority-not-narrowing"
85            }
86        }
87    }
88
89    pub fn title(self) -> &'static str {
90        match self {
91            AppErrorKind::Unauthorized => "Unauthorized",
92            AppErrorKind::BadRequest => "Bad Request",
93            AppErrorKind::NotFound => "Not Found",
94            AppErrorKind::Conflict => "Conflict",
95            AppErrorKind::Internal => "Internal Server Error",
96            AppErrorKind::PayloadTooLarge => "Payload Too Large",
97            AppErrorKind::UnsupportedMediaType => "Unsupported Media Type",
98            AppErrorKind::MethodNotAllowed => "Method Not Allowed",
99            AppErrorKind::ServiceUnavailable => "Event store unavailable",
100            AppErrorKind::FormationCycle => "Formation rejected: authority cycle",
101            AppErrorKind::FormationMultipleCoordinators => {
102                "Formation rejected: multiple coordinators"
103            }
104            AppErrorKind::FormationNoCoordinator => "Formation rejected: no coordinator",
105            AppErrorKind::FormationAuthorityNotNarrowing => {
106                "Formation rejected: authority does not narrow"
107            }
108        }
109    }
110}
111
112#[derive(Debug, Clone)]
113pub struct AppError {
114    pub kind: AppErrorKind,
115    pub detail: String,
116}
117
118impl AppError {
119    pub fn new(kind: AppErrorKind, detail: impl Into<String>) -> Self {
120        Self {
121            kind,
122            detail: detail.into(),
123        }
124    }
125
126    pub fn bad_request(detail: impl Into<String>) -> Self {
127        Self::new(AppErrorKind::BadRequest, detail)
128    }
129
130    pub fn unauthorized(detail: impl Into<String>) -> Self {
131        Self::new(AppErrorKind::Unauthorized, detail)
132    }
133
134    pub fn not_found(detail: impl Into<String>) -> Self {
135        Self::new(AppErrorKind::NotFound, detail)
136    }
137
138    pub fn internal(detail: impl Into<String>) -> Self {
139        Self::new(AppErrorKind::Internal, detail)
140    }
141
142    pub fn payload_too_large(detail: impl Into<String>) -> Self {
143        Self::new(AppErrorKind::PayloadTooLarge, detail)
144    }
145
146    pub fn unsupported_media_type(detail: impl Into<String>) -> Self {
147        Self::new(AppErrorKind::UnsupportedMediaType, detail)
148    }
149
150    pub fn method_not_allowed(detail: impl Into<String>) -> Self {
151        Self::new(AppErrorKind::MethodNotAllowed, detail)
152    }
153
154    /// Redacted 503 for upstream-data-tier failures. The `detail` text is
155    /// fixed at the type level so callers cannot accidentally splice
156    /// internal stream/subject names into the response body — the
157    /// FUZZ-CRIT-1 leak. Operators get the underlying cause via the WARN
158    /// log emitted at the call site, not via this user-visible body.
159    pub fn service_unavailable() -> Self {
160        Self::new(
161            AppErrorKind::ServiceUnavailable,
162            "Event store is temporarily unreachable; retry later",
163        )
164    }
165}
166
167/// Media type identifier per RFC 9457 §3.
168pub const PROBLEM_JSON_CT: &str = "application/problem+json";
169
170/// Build a problem+json response from a kind + detail string, bypassing
171/// the full `AppError` construction path. Used by fallbacks and the
172/// rejection-normalising middleware where we already know the status.
173pub fn problem_response(kind: AppErrorKind, detail: impl Into<String>) -> Response {
174    AppError::new(kind, detail).into_response()
175}
176
177/// Wire shape of the problem document (RFC 9457 §3.1).
178#[derive(Debug, Serialize)]
179struct ProblemDetails<'a> {
180    #[serde(rename = "type")]
181    type_uri: &'a str,
182    title: &'a str,
183    status: u16,
184    detail: &'a str,
185}
186
187impl IntoResponse for AppError {
188    fn into_response(self) -> Response {
189        let status = self.kind.status();
190        let body = ProblemDetails {
191            type_uri: self.kind.type_uri(),
192            title: self.kind.title(),
193            status: status.as_u16(),
194            detail: &self.detail,
195        };
196        let mut resp = (status, Json(body)).into_response();
197        // RFC 9457 §3 — the media type is `application/problem+json`.
198        resp.headers_mut().insert(
199            axum::http::header::CONTENT_TYPE,
200            axum::http::HeaderValue::from_static("application/problem+json"),
201        );
202        resp
203    }
204}
205
206impl From<anyhow::Error> for AppError {
207    fn from(e: anyhow::Error) -> Self {
208        AppError::internal(format!("{e:#}"))
209    }
210}
211
212impl From<serde_json::Error> for AppError {
213    fn from(e: serde_json::Error) -> Self {
214        AppError::bad_request(format!("invalid json: {e}"))
215    }
216}
217
218/// FUZZ-WAVE-1 MED-1 / MED-2: response-mapping middleware that
219/// guarantees every 4xx leaving the server carries
220/// `Content-Type: application/problem+json` (RFC 9457 §3).
221///
222/// axum's built-in extractors (`Json`, `Path`, `Query`,
223/// `DefaultBodyLimit`) reject malformed input by returning a bare
224/// `text/plain` body with the error string. The application-level
225/// `AppError` path is already problem+json; this layer brings axum's
226/// built-in rejections — plus the 404/405 fallbacks below — into the
227/// same wire shape.
228///
229/// Strategy: inspect the outgoing response. If status is 4xx **and**
230/// the existing Content-Type is **not** `application/problem+json`,
231/// drain the body, pick a kind from the status, and re-emit. Headers
232/// other than Content-Type/Content-Length are preserved verbatim — this
233/// matters for 405 where axum already set `Allow:`.
234///
235/// 2xx, 3xx, and 5xx responses pass through unchanged. The Critical
236/// finding in the wave-1 report (5xx leak) is out of scope for this
237/// fix; this middleware only normalises 4xx content-type.
238pub async fn normalize_problem_response(resp: Response) -> Response {
239    let status = resp.status();
240
241    if !status.is_client_error() {
242        return resp;
243    }
244
245    let is_problem_json = resp
246        .headers()
247        .get(header::CONTENT_TYPE)
248        .and_then(|v| v.to_str().ok())
249        .map(|ct| ct.starts_with(PROBLEM_JSON_CT))
250        .unwrap_or(false);
251
252    if is_problem_json {
253        return resp;
254    }
255
256    // Preserve headers we want to carry across the body rewrite. The
257    // `Allow` header on a 405 is the most important — RFC 9110 §15.5.6
258    // requires it and operators rely on it to discover the valid verbs.
259    let allow_header = resp.headers().get(header::ALLOW).cloned();
260
261    let (parts, body) = resp.into_parts();
262    // 64 KiB is more than enough for an axum rejection string. If a
263    // hostile upstream layer ever attaches a giant body to a 4xx we
264    // drop it on the floor and fall back to a generic detail.
265    let detail_bytes = to_bytes(body, 64 * 1024).await.unwrap_or_default();
266    let detail = std::str::from_utf8(&detail_bytes)
267        .unwrap_or("")
268        .trim()
269        .to_string();
270
271    let kind = match status {
272        StatusCode::BAD_REQUEST => AppErrorKind::BadRequest,
273        StatusCode::UNAUTHORIZED => AppErrorKind::Unauthorized,
274        StatusCode::NOT_FOUND => AppErrorKind::NotFound,
275        StatusCode::METHOD_NOT_ALLOWED => AppErrorKind::MethodNotAllowed,
276        StatusCode::CONFLICT => AppErrorKind::Conflict,
277        StatusCode::PAYLOAD_TOO_LARGE => AppErrorKind::PayloadTooLarge,
278        StatusCode::UNSUPPORTED_MEDIA_TYPE => AppErrorKind::UnsupportedMediaType,
279        // Other 4xx (422, 415, 429, ...) — fall back to a generic
280        // bad-request shape but keep the original status code below.
281        _ => AppErrorKind::BadRequest,
282    };
283
284    // Empty body (e.g. axum 0.7's built-in 404/405) produces a useless
285    // detail. Synthesize a sensible one so adopters see *something*
286    // structured.
287    let detail = if detail.is_empty() {
288        match status {
289            StatusCode::NOT_FOUND => "no route matched the request path".to_string(),
290            StatusCode::METHOD_NOT_ALLOWED => "HTTP method not allowed for this path".to_string(),
291            StatusCode::PAYLOAD_TOO_LARGE => "request body exceeds the per-route cap".to_string(),
292            _ => parts
293                .status
294                .canonical_reason()
295                .unwrap_or("client error")
296                .to_string(),
297        }
298    } else {
299        detail
300    };
301
302    let body = ProblemDetails {
303        type_uri: kind.type_uri(),
304        title: kind.title(),
305        status: status.as_u16(),
306        detail: &detail,
307    };
308    let body_bytes = serde_json::to_vec(&body)
309        .unwrap_or_else(|_| br#"{"type":"/problems/internal","title":"Internal Server Error","status":500,"detail":"failed to serialise problem document"}"#.to_vec());
310
311    let mut new = Response::builder()
312        .status(status)
313        .body(Body::from(body_bytes))
314        .expect("problem+json response build");
315
316    // Copy through every original header except those that no longer
317    // describe the rewritten body.
318    for (name, value) in parts.headers.iter() {
319        if name == header::CONTENT_TYPE || name == header::CONTENT_LENGTH {
320            continue;
321        }
322        new.headers_mut().append(name.clone(), value.clone());
323    }
324    new.headers_mut().insert(
325        header::CONTENT_TYPE,
326        HeaderValue::from_static(PROBLEM_JSON_CT),
327    );
328    // Preserve Allow if axum's method router set it and we didn't catch
329    // it in the loop above (header iteration is the canonical source,
330    // but this is a belt-and-braces guarantee for the 405 contract).
331    if let Some(v) = allow_header {
332        new.headers_mut().insert(header::ALLOW, v);
333    }
334
335    new
336}