Skip to main content

api_debug_lab/
cases.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! [`Case`] — the input fixture every rule consumes.
4//!
5//! A *case* is a snapshot of one customer-visible API failure: the
6//! request bytes (method, URL, headers, body), an optional response
7//! (status, headers, body), an optional sibling `server.log`, an
8//! optional sibling `secret.txt` for HMAC checks, and a free-form
9//! [`Context`] block carrying anything the rules need that isn't part
10//! of the wire protocol (auth-required flag, expected base URL,
11//! webhook envelope shape, idempotency hash, client deadline, pinned
12//! `now_unix`).
13//!
14//! Cases are laid out on disk one-directory-per-case under
15//! `fixtures/cases/<name>/`. Each directory is self-contained:
16//! `case.json` is the structured data, `server.log` is the bundled
17//! log (text or JSON-lines), `secret.txt` is the HMAC secret. This
18//! shape is deliberate: a real support engineer can drop a customer's
19//! captured artefacts into a directory of the same shape and run the
20//! diagnostic against it via [`Case::load`] or `api-debug-lab corpus`.
21//!
22//! ## Schema
23//!
24//! Every `case.json` is validated against
25//! `fixtures/cases.schema.json` (JSON Schema Draft 2020-12) by
26//! `tests/schema.rs`. The schema is the wire-level contract; this
27//! module is the deserialised mirror of it.
28
29use serde::{Deserialize, Serialize};
30use std::collections::BTreeMap;
31use std::fs;
32use std::path::{Path, PathBuf};
33use thiserror::Error;
34
35/// Errors returned by [`Case::load`].
36///
37/// All variants carry the path the loader was operating on so the CLI
38/// can produce actionable error messages.
39#[derive(Debug, Error)]
40pub enum CaseLoadError {
41    /// Filesystem-level failure reading the `case.json` file.
42    #[error("could not read case file {path}: {source}")]
43    Io {
44        /// Absolute path the loader attempted to read.
45        path: PathBuf,
46        /// Underlying I/O error.
47        source: std::io::Error,
48    },
49
50    /// `case.json` was found but did not deserialise into a [`Case`].
51    #[error("could not parse case file {path}: {source}")]
52    Parse {
53        /// Path that failed to parse.
54        path: PathBuf,
55        /// Underlying serde error (carries line / column).
56        source: serde_json::Error,
57    },
58
59    /// The provided name resolved to neither a file, a directory, nor
60    /// a known fixture under `fixtures/cases/` or `fixtures/cases/_negatives/`.
61    #[error("could not resolve case name {0}: no fixture directory found")]
62    UnknownCase(String),
63}
64
65/// Customer-visible severity tag for a case.
66///
67/// The CLI prints this in the `SEVERITY:` line of the human report;
68/// the JSON renderer serialises it lower-cased. The rule layer does
69/// not consume severity — it is operator metadata, not a signal.
70#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
71#[serde(rename_all = "lowercase")]
72pub enum Severity {
73    /// Cosmetic or informational; no customer impact.
74    Low,
75    /// Customer-impacting but localised; not paging-grade.
76    Medium,
77    /// Production fire; treat as paging-grade.
78    High,
79}
80
81impl Severity {
82    /// Render the severity as the same lowercase token used in JSON.
83    ///
84    /// Useful for the human formatter (`SEVERITY: medium`) and for
85    /// matching against tags in escalation notes.
86    pub fn as_str(self) -> &'static str {
87        match self {
88            Severity::Low => "low",
89            Severity::Medium => "medium",
90            Severity::High => "high",
91        }
92    }
93}
94
95/// HTTP request as captured by the customer or the proxy.
96///
97/// `headers` is a [`BTreeMap`] for deterministic iteration — it ends
98/// up in `curl` reproductions and in snapshot tests, both of which
99/// require byte-stability.
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct Request {
102    /// HTTP method, e.g. `"POST"`.
103    pub method: String,
104    /// Full request URL (scheme + host + path + query).
105    pub url: String,
106    /// Request headers. Header names should be lower-cased on disk so
107    /// that case-insensitive lookups via [`header`] work consistently.
108    #[serde(default)]
109    pub headers: BTreeMap<String, String>,
110    /// Raw request body. Stored as a string so HMAC verification sees
111    /// exactly the bytes the client sent (no JSON re-serialisation).
112    /// `None` means no body was sent.
113    #[serde(default)]
114    pub body: Option<String>,
115}
116
117/// HTTP response as observed by the customer.
118///
119/// Optional on the [`Case`]: a case captured before any response
120/// arrived (e.g., a timeout) has none.
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct Response {
123    /// HTTP status code (100–599).
124    pub status: u16,
125    /// Response headers.
126    #[serde(default)]
127    pub headers: BTreeMap<String, String>,
128    /// Raw response body, if any.
129    #[serde(default)]
130    pub body: Option<String>,
131}
132
133/// Free-form context the rules consume in addition to the wire
134/// request/response.
135///
136/// All fields are optional. Each rule documents which fields it
137/// consults; a rule that does not see what it needs simply does not
138/// fire. This is what lets the same `Case` shape serve multiple rules
139/// without coupling them.
140#[derive(Debug, Clone, Default, Serialize, Deserialize)]
141pub struct Context {
142    /// Whether the endpoint requires client authentication. The
143    /// `auth_missing` rule consults this to decide whether a missing
144    /// `Authorization` header is actually a problem.
145    #[serde(default)]
146    pub auth_required: bool,
147
148    /// Documented API base URL the client *should* be hitting. Used
149    /// by `config_dns_error` for hostname / TLD comparison.
150    #[serde(default)]
151    pub expected_base_url: Option<String>,
152
153    /// Webhook context for cases that involve HMAC signing.
154    #[serde(default)]
155    pub webhook: Option<WebhookCtx>,
156
157    /// Idempotency context for cases that involve `Idempotency-Key`
158    /// reuse.
159    #[serde(default)]
160    pub idempotency: Option<IdempotencyCtx>,
161
162    /// Documented per-request client deadline in milliseconds. Used by
163    /// `timeout_retry` to decide whether the derived elapsed exceeds
164    /// the customer-side budget.
165    #[serde(default)]
166    pub client_deadline_ms: Option<u64>,
167
168    /// Reference "now" (unix seconds) for stale-timestamp checks.
169    /// Pinning this in the fixture is what keeps `webhook_timestamp_stale`
170    /// deterministic across CI runs — the rule never reads the system
171    /// clock.
172    #[serde(default)]
173    pub now_unix: Option<i64>,
174}
175
176/// Selector for how a webhook signature header should be parsed,
177/// and the resulting HMAC signing-input shape.
178///
179/// Each variant maps to a real-world signing scheme used by a major
180/// developer-facing API. Adding a new variant means: extending this
181/// enum, extending `parse_envelope` in `src/rules.rs`, extending the
182/// `signing_input` match in `WebhookSignatureMismatch::evaluate`, and
183/// extending the `envelope_format` enum in `cases.schema.json`.
184#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
185#[serde(rename_all = "snake_case")]
186pub enum EnvelopeFormat {
187    /// Header value is a raw hex digest, optionally prefixed `sha256=`.
188    /// Timestamp comes from a separate `timestamp_header`.
189    /// Signing input: `"{ts}.{body}"`.
190    #[default]
191    Raw,
192    /// Stripe-style envelope: `t=<unix_ts>,v1=<sig>,v0=<sig>,...`.
193    /// Timestamp comes from the envelope's `t=` field; `timestamp_header`
194    /// is ignored.
195    /// Signing input: `"{ts}.{body}"`.
196    StripeV1,
197    /// Slack v0 envelope: `X-Slack-Signature: v0=<hex>`. Timestamp
198    /// comes from a separate `X-Slack-Request-Timestamp` header.
199    /// Signing input: `"v0:{ts}:{body}"`.
200    SlackV0,
201    /// GitHub-style HMAC: `X-Hub-Signature-256: sha256=<hex>`. There
202    /// is no timestamp claim; `webhook_timestamp_stale` cannot fire.
203    /// Signing input: `{body}` (raw body, no prefix).
204    GithubHmac,
205}
206
207/// Webhook context for a case that involves HMAC signing.
208///
209/// `secret_path` resolves relative to the case's `fixture_dir` and is
210/// loaded lazily by [`Case::load_secret`] only when a webhook rule
211/// fires (so non-webhook fixtures need no on-disk secret file).
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct WebhookCtx {
214    /// Path to the signing secret, relative to the fixture directory.
215    /// The file is read verbatim; its trailing newline (if any) is
216    /// stripped by [`Case::load_secret`].
217    pub secret_path: String,
218    /// Name of the header that carries the signature. For
219    /// [`EnvelopeFormat::Raw`] this is the digest header; for
220    /// [`EnvelopeFormat::StripeV1`] it is the envelope header that
221    /// contains both the digest and the timestamp.
222    pub signature_header: String,
223    /// Name of the header that carries the timestamp the sender hashed.
224    /// Ignored when `envelope_format` is `stripe_v1` (the timestamp
225    /// then comes from the envelope's `t=` field).
226    pub timestamp_header: String,
227    /// Maximum acceptable absolute drift (seconds) between
228    /// [`Context::now_unix`] and the timestamp the sender used.
229    pub tolerance_seconds: i64,
230    /// How the signature header should be parsed. Defaults to
231    /// [`EnvelopeFormat::Raw`] so v0.1.0 fixtures continue to validate
232    /// without changes.
233    #[serde(default)]
234    pub envelope_format: EnvelopeFormat,
235}
236
237/// Idempotency context for a case where an `Idempotency-Key` is in
238/// play.
239///
240/// Real APIs (Stripe, many fintech providers) store the SHA-256 of the
241/// body that arrived under a given key. A retry with the same key but
242/// a different body is rejected. The `idempotency_collision` rule
243/// recomputes the digest of the current request body and compares.
244#[derive(Debug, Clone, Serialize, Deserialize)]
245pub struct IdempotencyCtx {
246    /// Name of the idempotency-key header (e.g., `"idempotency-key"`).
247    pub header: String,
248    /// Hex-encoded SHA-256 of the body the server originally stored
249    /// under this idempotency key. Must be exactly 64 hex characters.
250    pub stored_body_sha256: String,
251}
252
253/// One bundled (or user-supplied) failure case.
254///
255/// Construct via [`Case::load`]; do not deserialise directly because
256/// the loader populates `fixture_dir` and `log_path` from the on-disk
257/// layout, which serde alone cannot do.
258#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct Case {
260    /// Stable identifier; matches the fixture directory name.
261    pub name: String,
262    /// One-sentence description of what the fixture demonstrates.
263    pub description: String,
264    /// Customer-facing severity tag. Not consumed by rules.
265    pub severity: Severity,
266    /// HTTP request bytes captured for this case.
267    pub request: Request,
268    /// HTTP response bytes, if any. `None` means the case captures a
269    /// pre-response failure (typical for timeouts).
270    #[serde(default)]
271    pub response: Option<Response>,
272    /// Free-form context the rules consume.
273    #[serde(default)]
274    pub context: Context,
275    /// Ground-truth label: the rule that should fire as primary on
276    /// this case, or `None` if the case must remain unclassified.
277    /// Used by `tests/calibration.rs` and `tests/calibration_regression.rs`
278    /// as the single source of truth. Optional on disk; cases without
279    /// a label are excluded from the calibration corpus.
280    #[serde(default)]
281    pub expected_rule_id: Option<String>,
282    /// Path to a sibling `server.log`, if present. Populated by
283    /// [`Case::load`]; not part of `case.json` on disk.
284    #[serde(skip)]
285    pub log_path: Option<PathBuf>,
286    /// Directory containing the loaded `case.json`. Populated by
287    /// [`Case::load`]; used to resolve `secret_path` and to walk
288    /// sibling files (`server.log`, `secret.txt`).
289    #[serde(skip)]
290    pub fixture_dir: PathBuf,
291}
292
293impl Case {
294    /// Load a case by name or by path.
295    ///
296    /// The lookup order is:
297    ///
298    /// 1. If `name_or_path` points at an existing file, load that file.
299    /// 2. If it points at an existing directory, load `<dir>/case.json`.
300    /// 3. Otherwise treat it as a name and resolve against
301    ///    `<fixtures_root>/cases/<name>/case.json` first, then
302    ///    `<fixtures_root>/cases/_negatives/<name>/case.json`.
303    ///
304    /// The third step is what lets `api-debug-lab diagnose
305    /// upstream_401` find a negative fixture without the caller having
306    /// to type the underscore-prefix path.
307    ///
308    /// On success, `fixture_dir` is set to the directory containing
309    /// the loaded `case.json` and `log_path` is set when a sibling
310    /// `server.log` exists.
311    ///
312    /// # Examples
313    ///
314    /// ```no_run
315    /// use api_debug_lab::Case;
316    /// use std::path::Path;
317    ///
318    /// let case = Case::load("auth_missing", Path::new("fixtures"))?;
319    /// assert_eq!(case.name, "auth_missing");
320    /// # Ok::<(), api_debug_lab::CaseLoadError>(())
321    /// ```
322    pub fn load(name_or_path: &str, fixtures_root: &Path) -> Result<Self, CaseLoadError> {
323        let candidate = Path::new(name_or_path);
324        let json_path = if candidate.is_file() {
325            candidate.to_path_buf()
326        } else if candidate.is_dir() {
327            candidate.join("case.json")
328        } else {
329            let dir = fixtures_root.join("cases").join(name_or_path);
330            if dir.is_dir() {
331                dir.join("case.json")
332            } else {
333                let neg_dir = fixtures_root
334                    .join("cases")
335                    .join("_negatives")
336                    .join(name_or_path);
337                if neg_dir.is_dir() {
338                    neg_dir.join("case.json")
339                } else {
340                    if let Some(case) = crate::embedded::load(name_or_path) {
341                        return Ok(case);
342                    }
343                    return Err(CaseLoadError::UnknownCase(name_or_path.to_string()));
344                }
345            }
346        };
347
348        let raw = fs::read_to_string(&json_path).map_err(|source| CaseLoadError::Io {
349            path: json_path.clone(),
350            source,
351        })?;
352        let mut case: Case = serde_json::from_str(&raw).map_err(|source| CaseLoadError::Parse {
353            path: json_path.clone(),
354            source,
355        })?;
356        // The two on-disk-derived fields are populated here rather than
357        // via serde so that a `Case` constructed in a test (e.g. for
358        // proptest) does not need to fabricate plausible paths.
359        case.fixture_dir = json_path.parent().unwrap_or(Path::new(".")).to_path_buf();
360        let log_candidate = case.fixture_dir.join("server.log");
361        if log_candidate.is_file() {
362            case.log_path = Some(log_candidate);
363        }
364        Ok(case)
365    }
366
367    /// Read the sibling `server.log` if one is present.
368    ///
369    /// Returns `None` for cases that do not bundle a log. Reading is
370    /// lazy: rules that do not consult logs (e.g. `auth_missing`) pay
371    /// no I/O cost.
372    pub fn load_log(&self) -> Option<String> {
373        self.log_path
374            .as_ref()
375            .and_then(|p| fs::read_to_string(p).ok())
376            .or_else(|| crate::embedded::log_for(&self.name, &self.fixture_dir))
377    }
378
379    /// Read the webhook signing secret (`fixture_dir/<secret_path>`).
380    ///
381    /// Returns `None` if the case has no webhook context or if the
382    /// file cannot be read. The trailing newline (if any) is stripped
383    /// so the secret bytes are exactly what the sender used.
384    pub fn load_secret(&self) -> Option<Vec<u8>> {
385        let webhook = self.context.webhook.as_ref()?;
386        let path = self.fixture_dir.join(&webhook.secret_path);
387        fs::read_to_string(path)
388            .ok()
389            .map(|raw| raw.trim_end_matches('\n').as_bytes().to_vec())
390            .or_else(|| crate::embedded::secret_for(&self.name, &self.fixture_dir))
391    }
392}
393
394/// Enumerate the bundled positive fixtures.
395///
396/// Returns the names (one per directory) under
397/// `<fixtures_root>/cases/`, sorted alphabetically and excluding any
398/// directory whose name starts with `_` (the convention for negative
399/// fixtures and other internal-only sets like `_calibration/`).
400///
401/// The `list-cases` subcommand calls this; the `corpus` subcommand
402/// does not — corpus walks the tree directly and includes negatives.
403pub fn list_cases(fixtures_root: &Path) -> Vec<String> {
404    let cases_dir = fixtures_root.join("cases");
405    let Ok(entries) = fs::read_dir(&cases_dir) else {
406        return crate::embedded::positive_names();
407    };
408    let mut names: Vec<String> = entries
409        .filter_map(|e| e.ok())
410        .filter(|e| e.path().is_dir())
411        .filter_map(|e| {
412            let name = e.file_name().to_string_lossy().into_owned();
413            if name.starts_with('_') {
414                None
415            } else if e.path().join("case.json").is_file() {
416                Some(name)
417            } else {
418                None
419            }
420        })
421        .collect();
422    names.sort();
423    names
424}
425
426/// Case-insensitive header lookup.
427///
428/// HTTP header names are case-insensitive on the wire (RFC 9110). The
429/// fixtures store them lower-cased by convention, but rules call
430/// `header(headers, "Authorization")` and `header(headers, "x-signature")`
431/// interchangeably — this helper makes both work.
432///
433/// Returns the value of the first matching header. Iteration order is
434/// stable because [`BTreeMap`] is ordered, but in practice header
435/// names are unique within a case fixture.
436pub fn header<'a>(map: &'a BTreeMap<String, String>, name: &str) -> Option<&'a str> {
437    let target = name.to_ascii_lowercase();
438    map.iter()
439        .find(|(k, _)| k.to_ascii_lowercase() == target)
440        .map(|(_, v)| v.as_str())
441}