api_debug_lab/cases.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! [`Case`] — the input fixture every rule consumes.
4//!
5//! A *case* is a snapshot of one customer-visible API failure: the
6//! request bytes (method, URL, headers, body), an optional response
7//! (status, headers, body), an optional sibling `server.log`, an
8//! optional sibling `secret.txt` for HMAC checks, and a free-form
9//! [`Context`] block carrying anything the rules need that isn't part
10//! of the wire protocol (auth-required flag, expected base URL,
11//! webhook envelope shape, idempotency hash, client deadline, pinned
12//! `now_unix`).
13//!
14//! Cases are laid out on disk one-directory-per-case under
15//! `fixtures/cases/<name>/`. Each directory is self-contained:
16//! `case.json` is the structured data, `server.log` is the bundled
17//! log (text or JSON-lines), `secret.txt` is the HMAC secret. This
18//! shape is deliberate: a real support engineer can drop a customer's
19//! captured artefacts into a directory of the same shape and run the
20//! diagnostic against it via [`Case::load`] or `api-debug-lab corpus`.
21//!
22//! ## Schema
23//!
24//! Every `case.json` is validated against
25//! `fixtures/cases.schema.json` (JSON Schema Draft 2020-12) by
26//! `tests/schema.rs`. The schema is the wire-level contract; this
27//! module is the deserialised mirror of it.
28
29use serde::{Deserialize, Serialize};
30use std::collections::BTreeMap;
31use std::fs;
32use std::path::{Path, PathBuf};
33use thiserror::Error;
34
35/// Errors returned by [`Case::load`].
36///
37/// All variants carry the path the loader was operating on so the CLI
38/// can produce actionable error messages.
39#[derive(Debug, Error)]
40pub enum CaseLoadError {
41 /// Filesystem-level failure reading the `case.json` file.
42 #[error("could not read case file {path}: {source}")]
43 Io {
44 /// Absolute path the loader attempted to read.
45 path: PathBuf,
46 /// Underlying I/O error.
47 source: std::io::Error,
48 },
49
50 /// `case.json` was found but did not deserialise into a [`Case`].
51 #[error("could not parse case file {path}: {source}")]
52 Parse {
53 /// Path that failed to parse.
54 path: PathBuf,
55 /// Underlying serde error (carries line / column).
56 source: serde_json::Error,
57 },
58
59 /// The provided name resolved to neither a file, a directory, nor
60 /// a known fixture under `fixtures/cases/` or `fixtures/cases/_negatives/`.
61 #[error("could not resolve case name {0}: no fixture directory found")]
62 UnknownCase(String),
63}
64
65/// Customer-visible severity tag for a case.
66///
67/// The CLI prints this in the `SEVERITY:` line of the human report;
68/// the JSON renderer serialises it lower-cased. The rule layer does
69/// not consume severity — it is operator metadata, not a signal.
70#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
71#[serde(rename_all = "lowercase")]
72pub enum Severity {
73 /// Cosmetic or informational; no customer impact.
74 Low,
75 /// Customer-impacting but localised; not paging-grade.
76 Medium,
77 /// Production fire; treat as paging-grade.
78 High,
79}
80
81impl Severity {
82 /// Render the severity as the same lowercase token used in JSON.
83 ///
84 /// Useful for the human formatter (`SEVERITY: medium`) and for
85 /// matching against tags in escalation notes.
86 pub fn as_str(self) -> &'static str {
87 match self {
88 Severity::Low => "low",
89 Severity::Medium => "medium",
90 Severity::High => "high",
91 }
92 }
93}
94
95/// HTTP request as captured by the customer or the proxy.
96///
97/// `headers` is a [`BTreeMap`] for deterministic iteration — it ends
98/// up in `curl` reproductions and in snapshot tests, both of which
99/// require byte-stability.
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct Request {
102 /// HTTP method, e.g. `"POST"`.
103 pub method: String,
104 /// Full request URL (scheme + host + path + query).
105 pub url: String,
106 /// Request headers. Header names should be lower-cased on disk so
107 /// that case-insensitive lookups via [`header`] work consistently.
108 #[serde(default)]
109 pub headers: BTreeMap<String, String>,
110 /// Raw request body. Stored as a string so HMAC verification sees
111 /// exactly the bytes the client sent (no JSON re-serialisation).
112 /// `None` means no body was sent.
113 #[serde(default)]
114 pub body: Option<String>,
115}
116
117/// HTTP response as observed by the customer.
118///
119/// Optional on the [`Case`]: a case captured before any response
120/// arrived (e.g., a timeout) has none.
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct Response {
123 /// HTTP status code (100–599).
124 pub status: u16,
125 /// Response headers.
126 #[serde(default)]
127 pub headers: BTreeMap<String, String>,
128 /// Raw response body, if any.
129 #[serde(default)]
130 pub body: Option<String>,
131}
132
133/// Free-form context the rules consume in addition to the wire
134/// request/response.
135///
136/// All fields are optional. Each rule documents which fields it
137/// consults; a rule that does not see what it needs simply does not
138/// fire. This is what lets the same `Case` shape serve multiple rules
139/// without coupling them.
140#[derive(Debug, Clone, Default, Serialize, Deserialize)]
141pub struct Context {
142 /// Whether the endpoint requires client authentication. The
143 /// `auth_missing` rule consults this to decide whether a missing
144 /// `Authorization` header is actually a problem.
145 #[serde(default)]
146 pub auth_required: bool,
147
148 /// Documented API base URL the client *should* be hitting. Used
149 /// by `config_dns_error` for hostname / TLD comparison.
150 #[serde(default)]
151 pub expected_base_url: Option<String>,
152
153 /// Webhook context for cases that involve HMAC signing.
154 #[serde(default)]
155 pub webhook: Option<WebhookCtx>,
156
157 /// Idempotency context for cases that involve `Idempotency-Key`
158 /// reuse.
159 #[serde(default)]
160 pub idempotency: Option<IdempotencyCtx>,
161
162 /// Documented per-request client deadline in milliseconds. Used by
163 /// `timeout_retry` to decide whether the derived elapsed exceeds
164 /// the customer-side budget.
165 #[serde(default)]
166 pub client_deadline_ms: Option<u64>,
167
168 /// Reference "now" (unix seconds) for stale-timestamp checks.
169 /// Pinning this in the fixture is what keeps `webhook_timestamp_stale`
170 /// deterministic across CI runs — the rule never reads the system
171 /// clock.
172 #[serde(default)]
173 pub now_unix: Option<i64>,
174}
175
176/// Selector for how a webhook signature header should be parsed,
177/// and the resulting HMAC signing-input shape.
178///
179/// Each variant maps to a real-world signing scheme used by a major
180/// developer-facing API. Adding a new variant means: extending this
181/// enum, extending `parse_envelope` in `src/rules.rs`, extending the
182/// `signing_input` match in `WebhookSignatureMismatch::evaluate`, and
183/// extending the `envelope_format` enum in `cases.schema.json`.
184#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
185#[serde(rename_all = "snake_case")]
186pub enum EnvelopeFormat {
187 /// Header value is a raw hex digest, optionally prefixed `sha256=`.
188 /// Timestamp comes from a separate `timestamp_header`.
189 /// Signing input: `"{ts}.{body}"`.
190 #[default]
191 Raw,
192 /// Stripe-style envelope: `t=<unix_ts>,v1=<sig>,v0=<sig>,...`.
193 /// Timestamp comes from the envelope's `t=` field; `timestamp_header`
194 /// is ignored.
195 /// Signing input: `"{ts}.{body}"`.
196 StripeV1,
197 /// Slack v0 envelope: `X-Slack-Signature: v0=<hex>`. Timestamp
198 /// comes from a separate `X-Slack-Request-Timestamp` header.
199 /// Signing input: `"v0:{ts}:{body}"`.
200 SlackV0,
201 /// GitHub-style HMAC: `X-Hub-Signature-256: sha256=<hex>`. There
202 /// is no timestamp claim; `webhook_timestamp_stale` cannot fire.
203 /// Signing input: `{body}` (raw body, no prefix).
204 GithubHmac,
205}
206
207/// Webhook context for a case that involves HMAC signing.
208///
209/// `secret_path` resolves relative to the case's `fixture_dir` and is
210/// loaded lazily by [`Case::load_secret`] only when a webhook rule
211/// fires (so non-webhook fixtures need no on-disk secret file).
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct WebhookCtx {
214 /// Path to the signing secret, relative to the fixture directory.
215 /// The file is read verbatim; its trailing newline (if any) is
216 /// stripped by [`Case::load_secret`].
217 pub secret_path: String,
218 /// Name of the header that carries the signature. For
219 /// [`EnvelopeFormat::Raw`] this is the digest header; for
220 /// [`EnvelopeFormat::StripeV1`] it is the envelope header that
221 /// contains both the digest and the timestamp.
222 pub signature_header: String,
223 /// Name of the header that carries the timestamp the sender hashed.
224 /// Ignored when `envelope_format` is `stripe_v1` (the timestamp
225 /// then comes from the envelope's `t=` field).
226 pub timestamp_header: String,
227 /// Maximum acceptable absolute drift (seconds) between
228 /// [`Context::now_unix`] and the timestamp the sender used.
229 pub tolerance_seconds: i64,
230 /// How the signature header should be parsed. Defaults to
231 /// [`EnvelopeFormat::Raw`] so v0.1.0 fixtures continue to validate
232 /// without changes.
233 #[serde(default)]
234 pub envelope_format: EnvelopeFormat,
235}
236
237/// Idempotency context for a case where an `Idempotency-Key` is in
238/// play.
239///
240/// Real APIs (Stripe, many fintech providers) store the SHA-256 of the
241/// body that arrived under a given key. A retry with the same key but
242/// a different body is rejected. The `idempotency_collision` rule
243/// recomputes the digest of the current request body and compares.
244#[derive(Debug, Clone, Serialize, Deserialize)]
245pub struct IdempotencyCtx {
246 /// Name of the idempotency-key header (e.g., `"idempotency-key"`).
247 pub header: String,
248 /// Hex-encoded SHA-256 of the body the server originally stored
249 /// under this idempotency key. Must be exactly 64 hex characters.
250 pub stored_body_sha256: String,
251}
252
253/// One bundled (or user-supplied) failure case.
254///
255/// Construct via [`Case::load`]; do not deserialise directly because
256/// the loader populates `fixture_dir` and `log_path` from the on-disk
257/// layout, which serde alone cannot do.
258#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct Case {
260 /// Stable identifier; matches the fixture directory name.
261 pub name: String,
262 /// One-sentence description of what the fixture demonstrates.
263 pub description: String,
264 /// Customer-facing severity tag. Not consumed by rules.
265 pub severity: Severity,
266 /// HTTP request bytes captured for this case.
267 pub request: Request,
268 /// HTTP response bytes, if any. `None` means the case captures a
269 /// pre-response failure (typical for timeouts).
270 #[serde(default)]
271 pub response: Option<Response>,
272 /// Free-form context the rules consume.
273 #[serde(default)]
274 pub context: Context,
275 /// Ground-truth label: the rule that should fire as primary on
276 /// this case, or `None` if the case must remain unclassified.
277 /// Used by `tests/calibration.rs` and `tests/calibration_regression.rs`
278 /// as the single source of truth. Optional on disk; cases without
279 /// a label are excluded from the calibration corpus.
280 #[serde(default)]
281 pub expected_rule_id: Option<String>,
282 /// Path to a sibling `server.log`, if present. Populated by
283 /// [`Case::load`]; not part of `case.json` on disk.
284 #[serde(skip)]
285 pub log_path: Option<PathBuf>,
286 /// Directory containing the loaded `case.json`. Populated by
287 /// [`Case::load`]; used to resolve `secret_path` and to walk
288 /// sibling files (`server.log`, `secret.txt`).
289 #[serde(skip)]
290 pub fixture_dir: PathBuf,
291}
292
293impl Case {
294 /// Load a case by name or by path.
295 ///
296 /// The lookup order is:
297 ///
298 /// 1. If `name_or_path` points at an existing file, load that file.
299 /// 2. If it points at an existing directory, load `<dir>/case.json`.
300 /// 3. Otherwise treat it as a name and resolve against
301 /// `<fixtures_root>/cases/<name>/case.json` first, then
302 /// `<fixtures_root>/cases/_negatives/<name>/case.json`.
303 ///
304 /// The third step is what lets `api-debug-lab diagnose
305 /// upstream_401` find a negative fixture without the caller having
306 /// to type the underscore-prefix path.
307 ///
308 /// On success, `fixture_dir` is set to the directory containing
309 /// the loaded `case.json` and `log_path` is set when a sibling
310 /// `server.log` exists.
311 ///
312 /// # Examples
313 ///
314 /// ```no_run
315 /// use api_debug_lab::Case;
316 /// use std::path::Path;
317 ///
318 /// let case = Case::load("auth_missing", Path::new("fixtures"))?;
319 /// assert_eq!(case.name, "auth_missing");
320 /// # Ok::<(), api_debug_lab::CaseLoadError>(())
321 /// ```
322 pub fn load(name_or_path: &str, fixtures_root: &Path) -> Result<Self, CaseLoadError> {
323 let candidate = Path::new(name_or_path);
324 let json_path = if candidate.is_file() {
325 candidate.to_path_buf()
326 } else if candidate.is_dir() {
327 candidate.join("case.json")
328 } else {
329 let dir = fixtures_root.join("cases").join(name_or_path);
330 if dir.is_dir() {
331 dir.join("case.json")
332 } else {
333 let neg_dir = fixtures_root
334 .join("cases")
335 .join("_negatives")
336 .join(name_or_path);
337 if neg_dir.is_dir() {
338 neg_dir.join("case.json")
339 } else {
340 if let Some(case) = crate::embedded::load(name_or_path) {
341 return Ok(case);
342 }
343 return Err(CaseLoadError::UnknownCase(name_or_path.to_string()));
344 }
345 }
346 };
347
348 let raw = fs::read_to_string(&json_path).map_err(|source| CaseLoadError::Io {
349 path: json_path.clone(),
350 source,
351 })?;
352 let mut case: Case = serde_json::from_str(&raw).map_err(|source| CaseLoadError::Parse {
353 path: json_path.clone(),
354 source,
355 })?;
356 // The two on-disk-derived fields are populated here rather than
357 // via serde so that a `Case` constructed in a test (e.g. for
358 // proptest) does not need to fabricate plausible paths.
359 case.fixture_dir = json_path.parent().unwrap_or(Path::new(".")).to_path_buf();
360 let log_candidate = case.fixture_dir.join("server.log");
361 if log_candidate.is_file() {
362 case.log_path = Some(log_candidate);
363 }
364 Ok(case)
365 }
366
367 /// Read the sibling `server.log` if one is present.
368 ///
369 /// Returns `None` for cases that do not bundle a log. Reading is
370 /// lazy: rules that do not consult logs (e.g. `auth_missing`) pay
371 /// no I/O cost.
372 pub fn load_log(&self) -> Option<String> {
373 self.log_path
374 .as_ref()
375 .and_then(|p| fs::read_to_string(p).ok())
376 .or_else(|| crate::embedded::log_for(&self.name, &self.fixture_dir))
377 }
378
379 /// Read the webhook signing secret (`fixture_dir/<secret_path>`).
380 ///
381 /// Returns `None` if the case has no webhook context or if the
382 /// file cannot be read. The trailing newline (if any) is stripped
383 /// so the secret bytes are exactly what the sender used.
384 pub fn load_secret(&self) -> Option<Vec<u8>> {
385 let webhook = self.context.webhook.as_ref()?;
386 let path = self.fixture_dir.join(&webhook.secret_path);
387 fs::read_to_string(path)
388 .ok()
389 .map(|raw| raw.trim_end_matches('\n').as_bytes().to_vec())
390 .or_else(|| crate::embedded::secret_for(&self.name, &self.fixture_dir))
391 }
392}
393
394/// Enumerate the bundled positive fixtures.
395///
396/// Returns the names (one per directory) under
397/// `<fixtures_root>/cases/`, sorted alphabetically and excluding any
398/// directory whose name starts with `_` (the convention for negative
399/// fixtures and other internal-only sets like `_calibration/`).
400///
401/// The `list-cases` subcommand calls this; the `corpus` subcommand
402/// does not — corpus walks the tree directly and includes negatives.
403pub fn list_cases(fixtures_root: &Path) -> Vec<String> {
404 let cases_dir = fixtures_root.join("cases");
405 let Ok(entries) = fs::read_dir(&cases_dir) else {
406 return crate::embedded::positive_names();
407 };
408 let mut names: Vec<String> = entries
409 .filter_map(|e| e.ok())
410 .filter(|e| e.path().is_dir())
411 .filter_map(|e| {
412 let name = e.file_name().to_string_lossy().into_owned();
413 if name.starts_with('_') {
414 None
415 } else if e.path().join("case.json").is_file() {
416 Some(name)
417 } else {
418 None
419 }
420 })
421 .collect();
422 names.sort();
423 names
424}
425
426/// Case-insensitive header lookup.
427///
428/// HTTP header names are case-insensitive on the wire (RFC 9110). The
429/// fixtures store them lower-cased by convention, but rules call
430/// `header(headers, "Authorization")` and `header(headers, "x-signature")`
431/// interchangeably — this helper makes both work.
432///
433/// Returns the value of the first matching header. Iteration order is
434/// stable because [`BTreeMap`] is ordered, but in practice header
435/// names are unique within a case fixture.
436pub fn header<'a>(map: &'a BTreeMap<String, String>, name: &str) -> Option<&'a str> {
437 let target = name.to_ascii_lowercase();
438 map.iter()
439 .find(|(k, _)| k.to_ascii_lowercase() == target)
440 .map(|(_, v)| v.as_str())
441}