Skip to main content

api_debug_lab/
rules.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! The rule layer.
4//!
5//! Eight rules cover seven failure modes; the webhook case is split
6//! into a signature-mismatch rule and a timestamp-staleness rule so
7//! they can fire independently and arbitration ranks them by
8//! confidence. Public surface:
9//!
10//! - The [`Rule`] trait. One implementation per failure mode.
11//! - [`all_rules`] — returns the registered rules in evaluation order.
12//! - [`diagnose`] — runs every rule, sorts by descending confidence,
13//!   returns a [`Report`].
14//! - [`diagnose_traced`] — same as [`diagnose`] but also returns a
15//!   [`RuleTrace`] per rule (wall-clock timing + outcome). Used by
16//!   the CLI's `--trace` flag and by `benches/diagnose.rs`.
17//!
18//! ## Adding a rule
19//!
20//! 1. Add a private struct that implements [`Rule`].
21//! 2. Register it in [`all_rules`].
22//! 3. Add a positive fixture (`fixtures/cases/<name>/case.json`) and
23//!    a paired negative under `_negatives/` that looks similar but
24//!    must not classify.
25//! 4. Add an `expected_rule_id` label to every calibration fixture
26//!    that should exercise the rule.
27//! 5. Document the rule's confidence rubric in
28//!    `docs/confidence_model.md`.
29//! 6. Run `cargo test` and `cargo insta review` to accept the new
30//!    snapshots.
31//!
32//! ## Confidence model
33//!
34//! Confidence values are not arbitrary; the rubric in
35//! `docs/confidence_model.md` lays out the bands (dispositive,
36//! strong, moderate, inadmissible) and `tests/calibration.rs`
37//! enforces them via Brier score over the labelled corpus.
38
39use std::collections::BTreeMap;
40
41use crate::cases::{header, Case, EnvelopeFormat};
42use crate::evidence::Evidence;
43use crate::report::{self, Diagnosis, Report};
44use hmac::{Hmac, Mac};
45use sha2::{Digest, Sha256};
46use url::Url;
47
48/// A single log line, parsed lazily as JSON if it begins with `{`,
49/// otherwise treated as whitespace-separated `key=value` text.
50///
51/// Both formats expose the same query API ([`LogLine::field`],
52/// [`LogLine::contains_ci`]) so the rules above this layer do not
53/// branch on log shape. The JSON path is taken only when the line
54/// starts with `{`; for text logs no `serde_json` work happens.
55///
56/// The struct borrows the original line; the optional JSON value is
57/// the one allocation incurred per JSONL line. For the small fixtures
58/// in this lab (≤ 8 lines per log) the cost is negligible.
59struct LogLine<'a> {
60    /// The raw input line (borrowed from the loaded log string).
61    raw: &'a str,
62    /// `Some(parsed)` when the line was valid JSON; `None` when the
63    /// line was treated as text.
64    json: Option<serde_json::Value>,
65}
66
67impl<'a> LogLine<'a> {
68    /// Construct a `LogLine` from a raw line. The leading `{`
69    /// triggers a single `serde_json` parse attempt; if it fails or
70    /// the line does not start with `{`, the line is treated as
71    /// text. Both paths converge on the same query API below.
72    fn parse(raw: &'a str) -> Self {
73        let json = if raw.trim_start().starts_with('{') {
74            serde_json::from_str(raw).ok()
75        } else {
76            None
77        };
78        Self { raw, json }
79    }
80
81    /// Return the original (un-parsed) line bytes. Useful for
82    /// echoing into evidence verbatim and for timestamp parsing
83    /// (RFC3339 is the same shape in both formats).
84    fn raw(&self) -> &'a str {
85        self.raw
86    }
87
88    /// Read a field value. JSON path takes precedence; fallback is
89    /// whitespace-separated `key=value` text. Returns the value as
90    /// an owned string so callers do not have to track the borrow
91    /// across format branches.
92    fn field(&self, key: &str) -> Option<String> {
93        if let Some(v) = self.json.as_ref().and_then(|j| j.get(key)) {
94            return Some(match v {
95                serde_json::Value::String(s) => s.clone(),
96                serde_json::Value::Number(n) => n.to_string(),
97                serde_json::Value::Bool(b) => b.to_string(),
98                _ => v.to_string(),
99            });
100        }
101        let prefix = format!("{key}=");
102        self.raw
103            .split_whitespace()
104            .find_map(|t| t.strip_prefix(&prefix))
105            .map(|s| s.trim_matches('"').to_string())
106    }
107
108    /// Case-insensitive substring match.
109    ///
110    /// For JSON lines this matches against any string value or any
111    /// key in the top-level object — the value form catches
112    /// `status:"upstream_timeout"` and the key form catches the
113    /// presence of an `error_message` key. For text lines it falls
114    /// back to a lowercased substring check on the raw bytes.
115    ///
116    /// `needle_lc` must already be lower-case; the caller is
117    /// responsible for that.
118    fn contains_ci(&self, needle_lc: &str) -> bool {
119        if let Some(v) = &self.json {
120            if let Some(obj) = v.as_object() {
121                for value in obj.values() {
122                    if let Some(s) = value.as_str() {
123                        if s.to_ascii_lowercase().contains(needle_lc) {
124                            return true;
125                        }
126                    }
127                }
128            }
129            if let Some(obj) = v.as_object() {
130                for key in obj.keys() {
131                    if key.to_ascii_lowercase().contains(needle_lc) {
132                        return true;
133                    }
134                }
135            }
136            return false;
137        }
138        self.raw.to_ascii_lowercase().contains(needle_lc)
139    }
140}
141
142/// Parse the leading RFC3339-ish timestamp (`YYYY-MM-DDTHH:MM:SS.sssZ`)
143/// out of a log line and return milliseconds since the Unix epoch
144/// (1970-01-01T00:00:00Z).
145///
146/// This is *not* a full RFC3339 parser; it does not handle timezone
147/// offsets other than `Z` and does not handle leap seconds. It does
148/// handle dates in proleptic Gregorian calendar via Howard Hinnant's
149/// `days_from_civil` algorithm, so timestamps that span midnight UTC
150/// produce the right elapsed difference. Returns `None` if the line
151/// does not start with a recognisable timestamp.
152///
153/// The returned value is `i64` (signed) so timestamps before 1970
154/// remain representable; the `timeout_retry` rule converts to `u64`
155/// after subtracting one timestamp from another.
156fn parse_timestamp_ms(line: &str) -> Option<i64> {
157    let t_idx = line.find('T')?;
158    let date_str = &line[..t_idx];
159    let after_t = line.get(t_idx + 1..)?;
160    let z_idx = after_t.find('Z')?;
161    let time_str = &after_t[..z_idx];
162
163    // Parse "YYYY-MM-DD"
164    let mut date_iter = date_str.split('-');
165    let year: i64 = date_iter.next()?.parse().ok()?;
166    let month: u32 = date_iter.next()?.parse().ok()?;
167    let day: u32 = date_iter.next()?.parse().ok()?;
168    if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
169        return None;
170    }
171
172    // Parse "HH:MM:SS[.frac]"
173    let mut iter = time_str.split(':');
174    let h: i64 = iter.next()?.parse().ok()?;
175    let m: i64 = iter.next()?.parse().ok()?;
176    let s_part = iter.next()?;
177    let (sec_str, frac_str) = match s_part.split_once('.') {
178        Some((s, f)) => (s, f),
179        None => (s_part, "0"),
180    };
181    let s: i64 = sec_str.parse().ok()?;
182    let mut frac_padded = frac_str.to_string();
183    while frac_padded.len() < 3 {
184        frac_padded.push('0');
185    }
186    let millis: i64 = frac_padded.get(..3)?.parse().ok()?;
187
188    let days = days_from_civil(year, month, day);
189    let ms_of_day = ((h * 60 + m) * 60 + s) * 1000 + millis;
190    Some(days * 86_400_000 + ms_of_day)
191}
192
193/// Days since the Unix epoch (1970-01-01) for a proleptic Gregorian
194/// date. Howard Hinnant's `days_from_civil` algorithm — exact, no
195/// table lookups, handles dates from year ±5,879,000 without
196/// overflow.
197///
198/// See: <http://howardhinnant.github.io/date_algorithms.html>
199fn days_from_civil(y: i64, m: u32, d: u32) -> i64 {
200    let y = if m <= 2 { y - 1 } else { y };
201    let era = y.div_euclid(400);
202    let yoe = (y - era * 400) as u32; // [0, 399]
203    let mp = if m > 2 { m - 3 } else { m + 9 }; // March-based [0, 11]
204    let doy = (153 * mp + 2) / 5 + d - 1; // [0, 365]
205    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; // [0, 146096]
206    era * 146_097 + doe as i64 - 719_468
207}
208
209/// Parsed view of a webhook's signature-bearing header for the
210/// envelope formats this lab supports.
211struct WebhookEnvelope {
212    /// Unix timestamp claimed by the envelope (or read from the
213    /// configured timestamp header for the `raw` format).
214    timestamp: Option<i64>,
215    /// Candidate signatures to compare the recomputed HMAC against.
216    /// For `raw` this has one element; for `stripe_v1` it can have
217    /// `v1=` and `v0=` entries.
218    signatures: Vec<String>,
219    /// Best human label for the provided signature, used in evidence.
220    label: String,
221}
222
223/// Parse the signature-bearing header into the canonical
224/// [`WebhookEnvelope`] shape used by both webhook rules.
225///
226/// For [`EnvelopeFormat::Raw`]: the header value is a single hex
227/// digest (optionally `sha256=`-prefixed); the timestamp comes from
228/// `ts_header_value` (which is read from a separate header).
229///
230/// For [`EnvelopeFormat::StripeV1`]: the header is parsed as
231/// `t=<unix_ts>,v1=<sig>,v0=<sig>,...`. Multiple `v1`/`v0`
232/// entries are all collected so that key rotation (multiple active
233/// secrets, both signed) can pass if any matches.
234fn parse_envelope(
235    format: EnvelopeFormat,
236    sig_header_value: &str,
237    ts_header_value: &str,
238) -> WebhookEnvelope {
239    match format {
240        EnvelopeFormat::Raw => {
241            let normalised = sig_header_value
242                .trim()
243                .trim_start_matches("sha256=")
244                .to_string();
245            let timestamp: Option<i64> = ts_header_value.trim().parse().ok();
246            WebhookEnvelope {
247                timestamp,
248                signatures: vec![normalised],
249                label: sig_header_value.to_string(),
250            }
251        }
252        EnvelopeFormat::StripeV1 => {
253            let mut timestamp: Option<i64> = None;
254            let mut signatures: Vec<String> = Vec::new();
255            for part in sig_header_value.split(',') {
256                let part = part.trim();
257                if let Some((k, v)) = part.split_once('=') {
258                    match k {
259                        "t" => timestamp = v.trim().parse().ok(),
260                        "v1" | "v0" => signatures.push(v.trim().to_string()),
261                        _ => {}
262                    }
263                }
264            }
265            WebhookEnvelope {
266                timestamp,
267                signatures,
268                label: sig_header_value.to_string(),
269            }
270        }
271        EnvelopeFormat::SlackV0 => {
272            // Slack header value: "v0=<hex>" (single signature).
273            let normalised = sig_header_value
274                .trim()
275                .trim_start_matches("v0=")
276                .to_string();
277            let timestamp: Option<i64> = ts_header_value.trim().parse().ok();
278            WebhookEnvelope {
279                timestamp,
280                signatures: vec![normalised],
281                label: sig_header_value.to_string(),
282            }
283        }
284        EnvelopeFormat::GithubHmac => {
285            // GitHub does not send a timestamp; `ts_header_value` is
286            // ignored, and `webhook_timestamp_stale` cannot fire on
287            // this envelope by construction.
288            let normalised = sig_header_value
289                .trim()
290                .trim_start_matches("sha256=")
291                .to_string();
292            WebhookEnvelope {
293                timestamp: None,
294                signatures: vec![normalised],
295                label: sig_header_value.to_string(),
296            }
297        }
298    }
299}
300
301/// One diagnostic rule.
302///
303/// A rule looks at a [`Case`] and either fires (returns a
304/// [`Diagnosis`]) or stays silent (returns `None`). Rules are pure:
305/// they do not mutate the case, do not read environment variables,
306/// and do not perform network I/O. Local file reads (logs, secrets)
307/// go through [`Case::load_log`] and [`Case::load_secret`].
308///
309/// `Send + Sync` is required so that the registered rules can sit
310/// behind a `Box<dyn Rule>` and be safely shared across threads if
311/// a future caller wants to parallelise the sweep — the current
312/// orchestrator runs them sequentially because the per-case latency
313/// is single-digit microseconds.
314pub trait Rule: Send + Sync {
315    /// Stable identifier used in reports, logs, snapshot tests, and
316    /// the calibration corpus. Must be unique within [`all_rules`]
317    /// and match the corresponding fixture's `expected_rule_id`.
318    fn id(&self) -> &str;
319
320    /// Evaluate this rule against the case. Return `Some(diagnosis)`
321    /// to fire, `None` to stay silent.
322    ///
323    /// Implementations should return early with `None` whenever a
324    /// required signal is absent (no auth context, no webhook
325    /// secret, no log file, etc.). The orchestrator does not penalise
326    /// silent rules; only firing rules contribute to the report.
327    fn evaluate(&self, case: &Case) -> Option<Diagnosis>;
328}
329
330/// The eight bundled rules, stored as a `'static` slice over
331/// zero-sized rule structs. `all_rules()` returns this slice
332/// directly — no heap allocation per call.
333///
334/// Order is not significant for correctness — the orchestrator sorts
335/// by confidence — but it does control trace output and tie-breaking
336/// when confidences are equal. The current order roughly matches
337/// request-lifecycle phase (auth → payload parse → rate limit →
338/// webhook verify → upstream timeout → config → idempotency).
339static RULES: &[&dyn Rule] = &[
340    &AuthMissing,
341    &BadJsonPayload,
342    &RateLimited,
343    &WebhookSignatureMismatch,
344    &WebhookTimestampStale,
345    &TimeoutRetry,
346    &ConfigDnsError,
347    &IdempotencyCollision,
348];
349
350/// Return the registered rules in evaluation order.
351///
352/// Rules are zero-sized structs, so the returned slice is a pointer
353/// into static memory and `all_rules()` is essentially free.
354pub fn all_rules() -> &'static [&'static dyn Rule] {
355    RULES
356}
357
358/// Per-rule trace entry produced by [`diagnose_traced`].
359#[derive(Debug, Clone)]
360pub struct RuleTrace {
361    /// Stable identifier of the rule (matches `Rule::id`).
362    pub rule_id: String,
363    /// Wall-clock duration of `Rule::evaluate` for this case.
364    pub duration: std::time::Duration,
365    /// Confidence emitted, or `None` if the rule did not fire.
366    pub confidence: Option<f32>,
367}
368
369/// Run every rule, sort firing diagnoses by descending confidence, and return
370/// a [`Report`] with the top hit as `primary` and the rest in `also_considered`.
371///
372/// Tie-breaking is alphabetical on `rule_id` so output is byte-stable.
373///
374/// # Examples
375///
376/// ```no_run
377/// use api_debug_lab::{diagnose, Case};
378/// use std::path::Path;
379///
380/// let case = Case::load("auth_missing", Path::new("fixtures"))?;
381/// let report = diagnose(&case);
382/// assert_eq!(report.primary.unwrap().rule_id, "auth_missing");
383/// # Ok::<(), api_debug_lab::CaseLoadError>(())
384/// ```
385pub fn diagnose(case: &Case) -> Report {
386    let (report, _trace) = diagnose_traced(case);
387    report
388}
389
390/// Same as [`diagnose`] but also returns a per-rule trace recording each
391/// rule's wall-clock evaluation time and whether it fired.
392///
393/// Useful for `--trace` output and for benchmarking. Trace entries are
394/// returned in the order rules ran (the order from [`all_rules`]), not
395/// the order they appear in the report.
396///
397/// # Examples
398///
399/// ```no_run
400/// use api_debug_lab::{diagnose_traced, Case};
401/// use std::path::Path;
402///
403/// let case = Case::load("auth_missing", Path::new("fixtures"))?;
404/// let (report, traces) = diagnose_traced(&case);
405/// assert_eq!(traces.len(), 8); // one trace per rule
406/// assert!(report.primary.is_some());
407/// # Ok::<(), api_debug_lab::CaseLoadError>(())
408/// ```
409pub fn diagnose_traced(case: &Case) -> (Report, Vec<RuleTrace>) {
410    let rules = all_rules();
411    let mut hits: Vec<Diagnosis> = Vec::with_capacity(rules.len());
412    let mut traces: Vec<RuleTrace> = Vec::with_capacity(rules.len());
413    for rule in rules {
414        let start = std::time::Instant::now();
415        let outcome = rule.evaluate(case);
416        let duration = start.elapsed();
417        let confidence = outcome.as_ref().map(|d| d.confidence);
418        traces.push(RuleTrace {
419            rule_id: rule.id().to_string(),
420            duration,
421            confidence,
422        });
423        if let Some(d) = outcome {
424            hits.push(d);
425        }
426    }
427    hits.sort_by(|a, b| {
428        b.confidence
429            .partial_cmp(&a.confidence)
430            .unwrap_or(std::cmp::Ordering::Equal)
431            .then_with(|| a.rule_id.cmp(&b.rule_id))
432    });
433    let mut iter = hits.into_iter();
434    let primary = iter.next();
435    let also_considered: Vec<Diagnosis> = iter.collect();
436    let report = Report {
437        case_name: case.name.clone(),
438        severity: case.severity,
439        primary,
440        also_considered,
441        reproduction: report::reproduction(case),
442    };
443    (report, traces)
444}
445
446// ---------------------------------------------------------------------------
447// Rule 1 — auth_missing
448//
449// Fires when an `auth_required` route received a request with no
450// `Authorization` header. Confidence is 0.95 when the response also
451// returned 401 (three independent signals agree); 0.60 when the
452// request was captured before any response (signals 1 + 2 only).
453// ---------------------------------------------------------------------------
454
455struct AuthMissing;
456
457impl Rule for AuthMissing {
458    fn id(&self) -> &str {
459        "auth_missing"
460    }
461    fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
462        // Required-precondition gates first: no need to inspect a route
463        // that does not require auth or that already carries a token.
464        if !case.context.auth_required {
465            return None;
466        }
467        if header(&case.request.headers, "authorization").is_some() {
468            return None;
469        }
470        let status = case.response.as_ref().map(|r| r.status).unwrap_or(0);
471        let mut evidence = vec![
472            Evidence::with(
473                "Authorization header absent in request",
474                "request.headers.authorization",
475            ),
476            Evidence::with(
477                format!(
478                    "Endpoint {} {} flagged auth_required=true",
479                    case.request.method, case.request.url
480                ),
481                "case.context.auth_required",
482            ),
483        ];
484        let confidence = if status == 401 {
485            evidence.push(Evidence::with(
486                "Response status 401 Unauthorized",
487                "response.status",
488            ));
489            0.95
490        } else {
491            0.60
492        };
493        Some(Diagnosis {
494            rule_id: self.id().into(),
495            likely_cause: "Missing Authorization header".into(),
496            confidence,
497            evidence,
498            next_steps: vec![
499                "Add an Authorization: Bearer <token> header to the request.".into(),
500                "Confirm the token has not expired.".into(),
501                "Verify the token's scope covers the requested operation.".into(),
502            ],
503            escalation: "Customer request failed because the Authorization header was \
504                         absent. The API rejected the request before payload processing. \
505                         Ask the customer to retry with a valid bearer token and confirm \
506                         the token's scope."
507                .into(),
508        })
509    }
510}
511
512// ---------------------------------------------------------------------------
513// Rule 2 — bad_json_payload
514//
515// Actually parses the request body with `serde_json` and reports the
516// real parse error and byte offset, rather than guessing from the
517// status code. The negative fixture `valid_json_schema_fail` proves
518// this rule does not fire when the body parses but fails downstream
519// schema validation — a different remediation entirely.
520// ---------------------------------------------------------------------------
521
522struct BadJsonPayload;
523
524impl Rule for BadJsonPayload {
525    fn id(&self) -> &str {
526        "bad_json_payload"
527    }
528    fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
529        // Need a body and a JSON content type — without these the rule
530        // has no input to parse.
531        let body = case.request.body.as_deref()?;
532        let ct = header(&case.request.headers, "content-type").unwrap_or("");
533        if !ct.contains("application/json") {
534            return None;
535        }
536        // Record the actual parser error including byte offset so the
537        // evidence is provably about the bytes the customer sent, not
538        // a heuristic about status codes.
539        let parse_err = match serde_json::from_str::<serde_json::Value>(body) {
540            Ok(_) => return None,
541            Err(e) => e,
542        };
543        let status = case.response.as_ref().map(|r| r.status).unwrap_or(0);
544        let mut evidence = vec![
545            Evidence::with(
546                format!(
547                    "serde_json parse error at line {} column {}: {}",
548                    parse_err.line(),
549                    parse_err.column(),
550                    parse_err
551                ),
552                "request.body",
553            ),
554            Evidence::with(
555                format!("Content-Type was {ct}; body could not be parsed"),
556                "request.headers.content-type",
557            ),
558        ];
559        let confidence = if matches!(status, 400 | 422) {
560            evidence.push(Evidence::with(
561                format!("Response status {status} confirms server rejected payload"),
562                "response.status",
563            ));
564            0.95
565        } else {
566            0.70
567        };
568        Some(Diagnosis {
569            rule_id: self.id().into(),
570            likely_cause: "Invalid JSON payload".into(),
571            confidence,
572            evidence,
573            next_steps: vec![
574                "Validate the payload against the documented request schema.".into(),
575                "Re-emit the body using a JSON serialiser (avoid hand-built strings).".into(),
576                "If the issue persists, log the raw request bytes before send.".into(),
577            ],
578            escalation: "The request body could not be parsed as JSON. The server \
579                         rejected the request before any business logic ran. Ask the \
580                         customer to share the exact bytes they sent and the producer \
581                         that built them."
582                .into(),
583        })
584    }
585}
586
587// ---------------------------------------------------------------------------
588// Rule 3 — rate_limited
589//
590// 429 alone is not enough to be useful in a ticket — the customer
591// already knows. The rule's job is to extract the rate-limit math
592// from headers (`Retry-After`, `X-RateLimit-Remaining`,
593// `X-RateLimit-Reset`) so the next-step guidance is a number, not
594// a vibe.
595// ---------------------------------------------------------------------------
596
597struct RateLimited;
598
599impl Rule for RateLimited {
600    fn id(&self) -> &str {
601        "rate_limited"
602    }
603    fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
604        let resp = case.response.as_ref()?;
605        // The negative fixture `non_429_high_traffic` carries
606        // rate-limit headers on a 200 response; this gate is what
607        // keeps the rule from over-firing on those.
608        if resp.status != 429 {
609            return None;
610        }
611        let mut evidence = vec![Evidence::with(
612            "Response status 429 Too Many Requests",
613            "response.status",
614        )];
615        let mut confidence: f32 = 0.70;
616        if let Some(remaining) = header(&resp.headers, "x-ratelimit-remaining") {
617            evidence.push(Evidence::with(
618                format!("X-RateLimit-Remaining: {remaining}"),
619                "response.headers.x-ratelimit-remaining",
620            ));
621            if remaining.trim() == "0" {
622                confidence = confidence.max(0.95);
623            } else {
624                confidence = confidence.max(0.85);
625            }
626        }
627        if let Some(retry_after) = header(&resp.headers, "retry-after") {
628            evidence.push(Evidence::with(
629                format!("Retry-After: {retry_after} seconds"),
630                "response.headers.retry-after",
631            ));
632            confidence = confidence.max(0.95);
633        }
634        if let Some(reset) = header(&resp.headers, "x-ratelimit-reset") {
635            evidence.push(Evidence::with(
636                format!("X-RateLimit-Reset (epoch): {reset}"),
637                "response.headers.x-ratelimit-reset",
638            ));
639        }
640        Some(Diagnosis {
641            rule_id: self.id().into(),
642            likely_cause: "Rate limit exceeded".into(),
643            confidence,
644            evidence,
645            next_steps: vec![
646                "Honour the Retry-After header before resending.".into(),
647                "Implement client-side exponential backoff with jitter.".into(),
648                "Reduce request frequency or request a higher quota.".into(),
649            ],
650            escalation: "Customer is hitting the documented rate limit. Confirm whether \
651                         the spike is intentional (campaign / migration) or a runaway \
652                         loop, and whether a temporary quota bump is appropriate."
653                .into(),
654        })
655    }
656}
657
658// ---------------------------------------------------------------------------
659// Rule 4 — webhook_signature_mismatch
660//
661// The dispositive rule of the lab. It actually recomputes the HMAC
662// over `"{timestamp}.{body}"` using the bundled secret and compares
663// against the provided signature(s). For Stripe v1 envelopes both
664// `v1=` and `v0=` candidates are checked — a single match counts as
665// pass. Confidence is a flat 0.92: HMAC mismatch is mathematical
666// proof of *some* divergence (secret, body, or timestamp prefix);
667// there is no weaker form of "the digests don't match".
668// ---------------------------------------------------------------------------
669
670struct WebhookSignatureMismatch;
671
672impl Rule for WebhookSignatureMismatch {
673    fn id(&self) -> &str {
674        "webhook_signature_mismatch"
675    }
676    fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
677        let webhook = case.context.webhook.as_ref()?;
678        let secret = case.load_secret()?;
679        let provided_raw = header(&case.request.headers, &webhook.signature_header)?;
680        let ts_raw = header(&case.request.headers, &webhook.timestamp_header).unwrap_or("");
681        // `parse_envelope` returns one signature for raw envelopes and
682        // potentially several for stripe_v1 (v1, v0, ...). All are
683        // treated as pass-if-any-matches.
684        let env = parse_envelope(webhook.envelope_format, provided_raw, ts_raw);
685        if env.signatures.is_empty() {
686            return None;
687        }
688        // The signing-input shape is per-envelope. Each variant maps
689        // to the documented scheme of a real-world API (see
690        // `EnvelopeFormat` in `src/cases.rs`).
691        let timestamp = env.timestamp.map(|t| t.to_string()).unwrap_or_default();
692        let body = case.request.body.as_deref().unwrap_or("");
693        let signing_input = match webhook.envelope_format {
694            EnvelopeFormat::Raw | EnvelopeFormat::StripeV1 => {
695                format!("{timestamp}.{body}")
696            }
697            EnvelopeFormat::SlackV0 => format!("v0:{timestamp}:{body}"),
698            EnvelopeFormat::GithubHmac => body.to_string(),
699        };
700        let mut mac = <Hmac<Sha256> as Mac>::new_from_slice(&secret).ok()?;
701        mac.update(signing_input.as_bytes());
702        let expected = hex::encode(mac.finalize().into_bytes());
703        if env
704            .signatures
705            .iter()
706            .any(|s| s.eq_ignore_ascii_case(&expected))
707        {
708            return None;
709        }
710        let envelope_label = match webhook.envelope_format {
711            EnvelopeFormat::Raw => "raw",
712            EnvelopeFormat::StripeV1 => "stripe_v1",
713            EnvelopeFormat::SlackV0 => "slack_v0",
714            EnvelopeFormat::GithubHmac => "github_hmac",
715        };
716        let evidence = vec![
717            Evidence::with(
718                format!(
719                    "Provided {} ({envelope_label}): {}",
720                    webhook.signature_header, env.label
721                ),
722                format!(
723                    "request.headers.{}",
724                    webhook.signature_header.to_lowercase()
725                ),
726            ),
727            Evidence::with(
728                format!("Expected (HMAC-SHA256 over '{{timestamp}}.{{body}}'): {expected}"),
729                "computed",
730            ),
731            Evidence::with(
732                format!("Signing input length: {} bytes", signing_input.len()),
733                "computed",
734            ),
735        ];
736        // HMAC mismatch is dispositive: either secret, body, or timestamp prefix
737        // differs. Rated higher than timestamp drift, which is inferential.
738        Some(Diagnosis {
739            rule_id: self.id().into(),
740            likely_cause: "Webhook signature does not match recomputed HMAC".into(),
741            confidence: 0.92,
742            evidence,
743            next_steps: vec![
744                "Confirm the active signing secret matches the one used by the sender.".into(),
745                "Verify the receiver hashes the raw request body (not a re-serialised copy)."
746                    .into(),
747                "Inspect any proxy / middleware that may rewrite the body before validation."
748                    .into(),
749            ],
750            escalation: "Recomputed HMAC differs from the provided signature. The most \
751                         common causes are a rotated-but-not-deployed secret, a body \
752                         being re-serialised (whitespace / key order changes), or a \
753                         proxy mutating the request. Confirm with the customer which \
754                         secret revision is active on their side."
755                .into(),
756        })
757    }
758}
759
760// ---------------------------------------------------------------------------
761// Rule 5 — webhook_timestamp_stale
762//
763// Computes the absolute drift between the timestamp the sender used
764// (from the configured timestamp_header for raw envelopes, or from
765// the envelope's `t=` field for Stripe v1) and the reference
766// `now_unix` pinned in the case. Fires when drift > tolerance_seconds.
767//
768// Confidence tiers:
769//   * drift > 10× tolerance  → 0.90 (systemic, almost certainly not skew)
770//   * drift >  1× tolerance  → 0.85 (could be benign clock skew)
771//
772// Rated lower than `webhook_signature_mismatch` because clock skew has
773// benign causes (NTP hiccup) while HMAC mismatch does not.
774// ---------------------------------------------------------------------------
775
776struct WebhookTimestampStale;
777
778impl Rule for WebhookTimestampStale {
779    fn id(&self) -> &str {
780        "webhook_timestamp_stale"
781    }
782    fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
783        let webhook = case.context.webhook.as_ref()?;
784        let now = case.context.now_unix?;
785        let provided_sig = header(&case.request.headers, &webhook.signature_header).unwrap_or("");
786        let ts_raw = header(&case.request.headers, &webhook.timestamp_header).unwrap_or("");
787        let env = parse_envelope(webhook.envelope_format, provided_sig, ts_raw);
788        let ts = env.timestamp?;
789        let drift = now - ts;
790        if drift.abs() <= webhook.tolerance_seconds {
791            return None;
792        }
793        let direction = if drift >= 0 { "behind" } else { "ahead of" };
794        let (source_label, source_pointer) = match webhook.envelope_format {
795            EnvelopeFormat::Raw | EnvelopeFormat::SlackV0 => (
796                webhook.timestamp_header.clone(),
797                format!(
798                    "request.headers.{}",
799                    webhook.timestamp_header.to_lowercase()
800                ),
801            ),
802            EnvelopeFormat::StripeV1 => (
803                format!("{} (stripe_v1 t=)", webhook.signature_header),
804                format!(
805                    "request.headers.{}",
806                    webhook.signature_header.to_lowercase()
807                ),
808            ),
809            // Unreachable: GithubHmac has no timestamp; the early
810            // return at `let ts = env.timestamp?` prevents this match
811            // from running. Keeping a reasonable label for safety.
812            EnvelopeFormat::GithubHmac => (
813                "github_hmac (no timestamp)".to_string(),
814                format!(
815                    "request.headers.{}",
816                    webhook.signature_header.to_lowercase()
817                ),
818            ),
819        };
820        let evidence = vec![
821            Evidence::with(
822                format!(
823                    "{}: {} ({} {} reference now)",
824                    source_label,
825                    ts,
826                    drift.abs(),
827                    direction
828                ),
829                source_pointer,
830            ),
831            Evidence::with(
832                format!(
833                    "Tolerance is {} seconds; observed drift {} seconds",
834                    webhook.tolerance_seconds,
835                    drift.abs()
836                ),
837                "case.context.webhook.tolerance_seconds",
838            ),
839        ];
840        let confidence = if drift.abs() > webhook.tolerance_seconds * 10 {
841            0.90
842        } else {
843            0.85
844        };
845        Some(Diagnosis {
846            rule_id: self.id().into(),
847            likely_cause: "Webhook timestamp outside tolerance window".into(),
848            confidence,
849            evidence,
850            next_steps: vec![
851                "Check NTP / clock skew between sender and receiver.".into(),
852                "Confirm the timestamp header reflects the time the payload was signed, \
853                 not the time it was forwarded."
854                    .into(),
855                "If retries are stored on disk before delivery, refresh the signature \
856                 immediately before the actual send."
857                    .into(),
858            ],
859            escalation: "Webhook timestamp is outside the configured tolerance. This \
860                         often indicates clock skew, queued retries that re-send a \
861                         long-stored payload, or a misconfigured replay window."
862                .into(),
863        })
864    }
865}
866
867// ---------------------------------------------------------------------------
868// Rule 6 — timeout_retry
869//
870// Walks the bundled `server.log` (text or JSON-lines, auto-detected
871// per line) looking for timeout-bearing entries. Groups them by
872// `request_id` so interleaved streams do not pool together (the
873// negative `single_timeout_no_retry` and the positive
874// `timeout_retry_partial_outage` both exercise this).
875//
876// Elapsed is *derived* from RFC3339 timestamp prefixes via
877// `parse_timestamp_ms`, not read from a logged convenience field.
878// This makes the rule survive logs that lack `total_elapsed_ms` and
879// mirrors how a real support engineer would compute the number.
880//
881// Confidence tiers:
882//   * derived elapsed > documented client_deadline_ms → 0.90
883//   * max attempt observed ≥ 3 (retry exhaustion)     → 0.85
884//   * client deadline documented but not exceeded     → 0.85
885//   * just ≥ 2 timeouts, no deadline, attempt < 3     → 0.65
886// ---------------------------------------------------------------------------
887
888struct TimeoutRetry;
889
890impl Rule for TimeoutRetry {
891    fn id(&self) -> &str {
892        "timeout_retry"
893    }
894    fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
895        let log = case.load_log()?;
896
897        // Group timeouts by request_id so interleaved request streams do not
898        // pollute each other's attempt counts. The rule fires for the worst
899        // offender (most attempts; ties broken by total elapsed).
900        struct Stream<'a> {
901            request_id: String,
902            timeouts: Vec<(u32, LogLine<'a>)>,
903            max_attempt: u32,
904            elapsed_ms: Option<u64>,
905        }
906
907        let mut streams: BTreeMap<String, Stream<'_>> = BTreeMap::new();
908        let mut unknown_id_timeouts: Vec<(u32, LogLine<'_>)> = Vec::new();
909
910        for (idx, raw) in log.lines().enumerate() {
911            let line = LogLine::parse(raw);
912            if !(line.contains_ci("timeout") || line.contains_ci("timed out")) {
913                continue;
914            }
915            let line_no = (idx as u32) + 1;
916            match line.field("request_id") {
917                Some(rid) => {
918                    let entry = streams.entry(rid.clone()).or_insert_with(|| Stream {
919                        request_id: rid,
920                        timeouts: Vec::new(),
921                        max_attempt: 0,
922                        elapsed_ms: None,
923                    });
924                    if let Some(a) = line.field("attempt").and_then(|s| s.parse::<u32>().ok()) {
925                        entry.max_attempt = entry.max_attempt.max(a);
926                    }
927                    entry.timeouts.push((line_no, line));
928                }
929                None => unknown_id_timeouts.push((line_no, line)),
930            }
931        }
932
933        let total_timeouts: usize =
934            streams.values().map(|s| s.timeouts.len()).sum::<usize>() + unknown_id_timeouts.len();
935        if total_timeouts < 2 {
936            return None;
937        }
938
939        // Derive elapsed_ms per stream as the span from the first to the last
940        // log line bearing this request_id (not just timeout-bearing lines —
941        // the final retries-exhausted error usually has a different reason
942        // string but is part of the same customer-facing duration). This
943        // replaces a hand-logged convenience field with a measurement.
944        for stream in streams.values_mut() {
945            let mut min_ms: Option<i64> = None;
946            let mut max_ms: Option<i64> = None;
947            for raw in log.lines() {
948                let line = LogLine::parse(raw);
949                if line.field("request_id").as_deref() != Some(stream.request_id.as_str()) {
950                    continue;
951                }
952                if let Some(ms) = parse_timestamp_ms(raw) {
953                    min_ms = Some(min_ms.map_or(ms, |m| m.min(ms)));
954                    max_ms = Some(max_ms.map_or(ms, |m| m.max(ms)));
955                }
956            }
957            // Subtraction is safe — `b > a` guarantees non-negative;
958            // the cast to u64 stays in range because elapsed_ms can
959            // never exceed `i64::MAX` on any plausible log span.
960            if let (Some(a), Some(b)) = (min_ms, max_ms) {
961                if b > a {
962                    stream.elapsed_ms = Some((b - a) as u64);
963                }
964            }
965        }
966
967        // Pick the worst offender: most timeouts, then highest max_attempt.
968        let primary_stream = streams
969            .values()
970            .max_by_key(|s| (s.timeouts.len(), s.max_attempt));
971
972        let mut evidence: Vec<Evidence> = Vec::new();
973        if let Some(s) = primary_stream {
974            evidence.push(Evidence::with(
975                format!(
976                    "request_id={} accounts for {} timeout entries (max attempt={})",
977                    s.request_id,
978                    s.timeouts.len(),
979                    s.max_attempt
980                ),
981                "server.log",
982            ));
983            for (line_no, line) in s.timeouts.iter().take(4) {
984                evidence.push(Evidence::at_line(
985                    format!("timeout entry: {}", truncate(line.raw(), 160)),
986                    "server.log",
987                    *line_no,
988                ));
989            }
990            if let Some(elapsed) = s.elapsed_ms {
991                evidence.push(Evidence::with(
992                    format!(
993                        "elapsed (derived from log timestamps): {} ms across {} attempts",
994                        elapsed,
995                        s.timeouts.len()
996                    ),
997                    "computed",
998                ));
999            }
1000        }
1001        // Count distinct request_ids in the *whole* log, not just streams
1002        // with timeouts. If more than one is present, the rule has actively
1003        // resisted pooling timeouts across unrelated requests.
1004        let all_request_ids: std::collections::BTreeSet<String> = log
1005            .lines()
1006            .filter_map(|raw| LogLine::parse(raw).field("request_id"))
1007            .collect();
1008        if all_request_ids.len() > 1 {
1009            evidence.push(Evidence::with(
1010                format!(
1011                    "log contains {} distinct request_ids; rule grouped timeouts by request_id rather than pooling",
1012                    all_request_ids.len()
1013                ),
1014                "server.log",
1015            ));
1016        }
1017
1018        let mut confidence: f32 = 0.65;
1019        if let Some(s) = primary_stream {
1020            if s.max_attempt >= 3 {
1021                confidence = confidence.max(0.85);
1022                evidence.push(Evidence::with(
1023                    format!(
1024                        "max attempt observed: {} (suggests retry exhaustion)",
1025                        s.max_attempt
1026                    ),
1027                    "server.log",
1028                ));
1029            }
1030            if let (Some(elapsed), Some(deadline)) = (s.elapsed_ms, case.context.client_deadline_ms)
1031            {
1032                if elapsed > deadline {
1033                    confidence = confidence.max(0.90);
1034                    evidence.push(Evidence::with(
1035                        format!(
1036                            "derived elapsed {} ms exceeds documented client deadline {} ms",
1037                            elapsed, deadline
1038                        ),
1039                        "computed",
1040                    ));
1041                }
1042            }
1043        }
1044        if let Some(deadline_ms) = case.context.client_deadline_ms {
1045            evidence.push(Evidence::with(
1046                format!("documented client deadline: {deadline_ms} ms"),
1047                "case.context.client_deadline_ms",
1048            ));
1049            confidence = confidence.max(0.85);
1050        }
1051
1052        Some(Diagnosis {
1053            rule_id: self.id().into(),
1054            likely_cause: "Upstream timeout with retries exhausted".into(),
1055            confidence,
1056            evidence,
1057            next_steps: vec![
1058                "Inspect upstream latency for the affected endpoint.".into(),
1059                "Verify retry policy (max attempts, backoff, jitter).".into(),
1060                "If the deadline is shorter than upstream p99, raise it or reduce work.".into(),
1061            ],
1062            escalation: "Client retried the request multiple times before failing. \
1063                         Confirm whether upstream latency spiked, whether the retry \
1064                         budget is appropriate for the documented client deadline, and \
1065                         whether idempotency keys protect against duplicate side \
1066                         effects on retry."
1067                .into(),
1068        })
1069    }
1070}
1071
1072// ---------------------------------------------------------------------------
1073// Rule 7 — config_dns_error
1074//
1075// Compares the request URL's host (and scheme) against the
1076// documented `expected_base_url`. The rule is interesting because of
1077// the near-miss detector: if the two hosts differ in exactly one
1078// dot-delimited label by Hamming distance ≤ 2, the rule reports
1079// "near-miss label: X differs from documented Y by ≤2 chars
1080// (typo?)" with confidence 0.90. This is what catches
1081// `acme-co.exemple` vs `acme-co.example`.
1082//
1083// Confidence tiers:
1084//   * one-label near-miss (typo) → 0.90
1085//   * scheme mismatch            → 0.80
1086//   * host mismatch with no near-miss → 0.75
1087// ---------------------------------------------------------------------------
1088
1089struct ConfigDnsError;
1090
1091impl Rule for ConfigDnsError {
1092    fn id(&self) -> &str {
1093        "config_dns_error"
1094    }
1095    fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
1096        let expected_base = case.context.expected_base_url.as_ref()?;
1097        let expected = Url::parse(expected_base).ok()?;
1098        let actual = Url::parse(&case.request.url).ok()?;
1099        let exp_host = expected.host_str()?;
1100        let act_host = actual.host_str()?;
1101        if act_host == exp_host && actual.scheme() == expected.scheme() {
1102            return None;
1103        }
1104        let mut evidence = vec![
1105            Evidence::with(format!("Request host: {act_host}"), "request.url"),
1106            Evidence::with(
1107                format!("Documented base host: {exp_host}"),
1108                "case.context.expected_base_url",
1109            ),
1110        ];
1111        let mut confidence: f32 = 0.75;
1112        if actual.scheme() != expected.scheme() {
1113            evidence.push(Evidence::with(
1114                format!(
1115                    "Scheme differs: request={}, expected={}",
1116                    actual.scheme(),
1117                    expected.scheme()
1118                ),
1119                "request.url",
1120            ));
1121            confidence = confidence.max(0.80);
1122        }
1123        if let Some(hint) = near_miss_hint(act_host, exp_host) {
1124            evidence.push(Evidence::with(hint, "computed"));
1125            confidence = confidence.max(0.90);
1126        }
1127        Some(Diagnosis {
1128            rule_id: self.id().into(),
1129            likely_cause: "API base URL or hostname does not match documented endpoint".into(),
1130            confidence,
1131            evidence,
1132            next_steps: vec![
1133                "Confirm the API base URL in the customer's environment configuration.".into(),
1134                "Run `dig` / `nslookup` against the documented host to rule out DNS issues.".into(),
1135                "Check for environment variable overrides (staging vs production).".into(),
1136            ],
1137            escalation: "Customer is targeting a host that does not match the documented \
1138                         API base. The most common causes are a stale base-URL config, a \
1139                         staging endpoint left in production, or a typo in a TLD or \
1140                         subdomain. Verify the deploying revision before assuming a DNS \
1141                         outage."
1142                .into(),
1143        })
1144    }
1145}
1146
1147// ---------------------------------------------------------------------------
1148// Rule 8 — idempotency_collision
1149//
1150// Recomputes SHA-256 of the current request body and compares
1151// against `context.idempotency.stored_body_sha256` — the hash the
1152// server stored under this `Idempotency-Key` on the first send. A
1153// mismatch means the customer reused the key with a different body,
1154// which Stripe and many real APIs reject with 422.
1155//
1156// Confidence tiers:
1157//   * 422 + hash mismatch          → 0.93 (Stripe-shape)
1158//   * other 4xx + hash mismatch    → 0.80
1159//   * non-error response, mismatch → 0.70
1160// ---------------------------------------------------------------------------
1161
1162struct IdempotencyCollision;
1163
1164impl Rule for IdempotencyCollision {
1165    fn id(&self) -> &str {
1166        "idempotency_collision"
1167    }
1168    fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
1169        let idem = case.context.idempotency.as_ref()?;
1170        let key = header(&case.request.headers, &idem.header)?;
1171        let body = case.request.body.as_deref().unwrap_or("");
1172        let mut hasher = Sha256::new();
1173        hasher.update(body.as_bytes());
1174        let actual = hex::encode(hasher.finalize());
1175        if actual.eq_ignore_ascii_case(&idem.stored_body_sha256) {
1176            return None;
1177        }
1178        let status = case.response.as_ref().map(|r| r.status).unwrap_or(0);
1179        let mut evidence = vec![
1180            Evidence::with(
1181                format!("Idempotency-Key: {key}"),
1182                format!("request.headers.{}", idem.header.to_lowercase()),
1183            ),
1184            Evidence::with(
1185                format!("Stored body SHA-256: {}", idem.stored_body_sha256),
1186                "case.context.idempotency.stored_body_sha256",
1187            ),
1188            Evidence::with(format!("Current body SHA-256: {actual}"), "computed"),
1189            Evidence::with(
1190                format!("Current body length: {} bytes", body.len()),
1191                "request.body",
1192            ),
1193        ];
1194        let confidence = if status == 422 {
1195            evidence.push(Evidence::with(
1196                "Response status 422 confirms server rejected duplicate-key with different body",
1197                "response.status",
1198            ));
1199            0.93
1200        } else if (400..500).contains(&status) {
1201            0.80
1202        } else {
1203            0.70
1204        };
1205        Some(Diagnosis {
1206            rule_id: self.id().into(),
1207            likely_cause: "Idempotency-Key reused with a different request body".into(),
1208            confidence,
1209            evidence,
1210            next_steps: vec![
1211                "Generate a fresh Idempotency-Key for any logically new request.".into(),
1212                "If retrying, send byte-identical body bytes used on the first attempt.".into(),
1213                "Check whether a serialiser or middleware is adding fields between attempts."
1214                    .into(),
1215            ],
1216            escalation: "Customer reused an Idempotency-Key with a different body, so the \
1217                         server returned its stored-body-mismatch error. Confirm whether \
1218                         their retry logic captures the body before its first send and \
1219                         replays the same bytes, or whether a logging / proxy layer is \
1220                         re-serialising between attempts."
1221                .into(),
1222        })
1223    }
1224}
1225
1226/// Heuristic that flags hostname near-misses worth reporting as
1227/// "this looks like a typo, not a configuration drift."
1228///
1229/// Two hosts with the same number of dot-delimited labels that
1230/// differ in exactly one label, where the differing labels are the
1231/// same length and within Hamming distance 2, are reported as a
1232/// near-miss. This catches `acme-co.exemple` vs `acme-co.example`
1233/// without firing on `staging-api.acme-co.example` vs
1234/// `api.acme-co.example` (different label count).
1235///
1236/// As a fallback, when the suffix-most labels differ outright (TLD
1237/// mismatch like `.local` vs `.example`), a TLD-differs hint is
1238/// emitted instead.
1239fn near_miss_hint(actual: &str, expected: &str) -> Option<String> {
1240    let a_parts: Vec<&str> = actual.rsplit('.').collect();
1241    let e_parts: Vec<&str> = expected.rsplit('.').collect();
1242    if a_parts.len() == e_parts.len() {
1243        let mut diffs = 0usize;
1244        let mut diff_label: Option<(&str, &str)> = None;
1245        for (a, e) in a_parts.iter().zip(e_parts.iter()) {
1246            if a != e {
1247                diffs += 1;
1248                diff_label = Some((a, e));
1249            }
1250        }
1251        if diffs == 1 {
1252            let (a, e) = diff_label?;
1253            // Same length + Hamming ≤ 2 captures realistic typos
1254            // ("exemple" / "example" — one letter swap) without
1255            // matching unrelated short labels like "api" vs "abc".
1256            if a.len() == e.len() && hamming(a, e) <= 2 {
1257                return Some(format!(
1258                    "near-miss label: '{a}' differs from documented '{e}' by ≤2 chars (typo?)"
1259                ));
1260            }
1261        }
1262    }
1263    if !actual.ends_with(expected.split('.').next_back().unwrap_or("")) {
1264        return Some(format!(
1265            "TLD differs: request '{actual}' vs documented '{expected}'"
1266        ));
1267    }
1268    None
1269}
1270
1271/// Number of differing characters between two strings of equal length.
1272///
1273/// Used by [`near_miss_hint`] to decide whether two same-length
1274/// labels are close enough to count as a typo. The chars are walked
1275/// pairwise; differences are counted. The function does not handle
1276/// unequal-length strings — callers gate on length first.
1277fn hamming(a: &str, b: &str) -> usize {
1278    a.chars().zip(b.chars()).filter(|(x, y)| x != y).count()
1279}
1280
1281/// Cap a string at `max` bytes, appending an ellipsis if truncated.
1282///
1283/// Used in evidence messages so a noisy log line never blows up the
1284/// human report. Truncation is byte-based (cheap); since logs are
1285/// ASCII-clean, the resulting string is still valid UTF-8.
1286fn truncate(s: &str, max: usize) -> String {
1287    if s.len() <= max {
1288        s.to_string()
1289    } else {
1290        format!("{}\u{2026}", &s[..max])
1291    }
1292}
1293
1294// ---------------------------------------------------------------------------
1295// Unit tests for private helpers.
1296//
1297// Mutation testing (`cargo mutants --file src/rules.rs`) flagged the
1298// helpers below as the largest coverage gaps in the suite — their
1299// arithmetic / boundary mutants survive when nothing tests the
1300// helpers directly. These tests exist to kill those mutants.
1301// ---------------------------------------------------------------------------
1302
1303#[cfg(test)]
1304mod private_helper_tests {
1305    use super::*;
1306
1307    // days_from_civil: exact reference values from Hinnant's table.
1308    // Each row verifies one mutation class (year boundary, leap year,
1309    // March-based month mapping, era boundary).
1310
1311    #[test]
1312    fn days_from_civil_unix_epoch() {
1313        assert_eq!(days_from_civil(1970, 1, 1), 0);
1314    }
1315
1316    #[test]
1317    fn days_from_civil_one_day_after_epoch() {
1318        assert_eq!(days_from_civil(1970, 1, 2), 1);
1319    }
1320
1321    #[test]
1322    fn days_from_civil_one_year_after_epoch() {
1323        assert_eq!(days_from_civil(1971, 1, 1), 365);
1324    }
1325
1326    #[test]
1327    fn days_from_civil_2000_leap_day() {
1328        // 2000 is a century divisible by 400 → leap year.
1329        // Days from 1970-01-01 to 2000-02-29:
1330        //   30 years × 365 + 8 leap days (1972, 76, 80, 84, 88, 92, 96, 2000-Feb-29 not yet)
1331        //   = 10950 + 7 = 10957 to 2000-01-01
1332        //   + 31 (Jan) + 29 (Feb 1..29) - 1 (zero-indexed Feb 29) = 10957 + 59 = 11016
1333        // Verified via Python: datetime.date(2000, 2, 29).toordinal() - datetime.date(1970, 1, 1).toordinal()
1334        assert_eq!(days_from_civil(2000, 2, 29), 11016);
1335    }
1336
1337    #[test]
1338    fn days_from_civil_2100_not_leap() {
1339        // 2100 is divisible by 100 but not 400 → not a leap year.
1340        // March 1, 2100 should be exactly 365 + ... days (no extra leap).
1341        // We just check March 1 lands one day after Feb 28.
1342        let feb28 = days_from_civil(2100, 2, 28);
1343        let mar01 = days_from_civil(2100, 3, 1);
1344        assert_eq!(mar01 - feb28, 1, "2100 must not be a leap year");
1345    }
1346
1347    #[test]
1348    fn days_from_civil_2400_leap() {
1349        // 2400 is divisible by 400 → leap year.
1350        let feb29 = days_from_civil(2400, 2, 29);
1351        let mar01 = days_from_civil(2400, 3, 1);
1352        assert_eq!(mar01 - feb29, 1, "2400 must be a leap year");
1353    }
1354
1355    #[test]
1356    fn days_from_civil_pre_epoch() {
1357        // 1969-12-31 is one day before the epoch → -1.
1358        assert_eq!(days_from_civil(1969, 12, 31), -1);
1359    }
1360
1361    // parse_timestamp_ms: exercises both the date parser (above) and
1362    // the time-of-day parser. Cross-day spans must produce monotonic
1363    // milliseconds.
1364
1365    #[test]
1366    fn parse_timestamp_ms_unix_epoch_zero() {
1367        assert_eq!(parse_timestamp_ms("1970-01-01T00:00:00.000Z"), Some(0));
1368    }
1369
1370    #[test]
1371    fn parse_timestamp_ms_one_second_after_epoch() {
1372        assert_eq!(parse_timestamp_ms("1970-01-01T00:00:01.000Z"), Some(1000));
1373    }
1374
1375    #[test]
1376    fn parse_timestamp_ms_pads_short_fractions() {
1377        // ".5" must mean 500 ms, not 5 ms.
1378        assert_eq!(parse_timestamp_ms("1970-01-01T00:00:00.5Z"), Some(500));
1379    }
1380
1381    #[test]
1382    fn parse_timestamp_ms_returns_none_on_garbage() {
1383        assert_eq!(parse_timestamp_ms("not a timestamp"), None);
1384        assert_eq!(parse_timestamp_ms(""), None);
1385    }
1386
1387    #[test]
1388    fn parse_timestamp_ms_rejects_invalid_month() {
1389        // Month 13 is rejected by the upfront range guard.
1390        assert_eq!(parse_timestamp_ms("1970-13-01T00:00:00.000Z"), None);
1391    }
1392
1393    #[test]
1394    fn parse_timestamp_ms_cross_midnight_is_monotone() {
1395        let before = parse_timestamp_ms("2026-12-31T23:59:59.500Z").unwrap();
1396        let after = parse_timestamp_ms("2027-01-01T00:00:01.500Z").unwrap();
1397        assert_eq!(after - before, 2000, "cross-midnight span must be 2 s");
1398    }
1399
1400    // hamming: exhaustive small-input coverage.
1401
1402    #[test]
1403    fn hamming_identical_strings() {
1404        assert_eq!(hamming("abc", "abc"), 0);
1405    }
1406
1407    #[test]
1408    fn hamming_one_char_diff() {
1409        assert_eq!(hamming("abc", "abd"), 1);
1410    }
1411
1412    #[test]
1413    fn hamming_all_diff() {
1414        assert_eq!(hamming("abc", "xyz"), 3);
1415    }
1416
1417    #[test]
1418    fn hamming_empty_strings() {
1419        assert_eq!(hamming("", ""), 0);
1420    }
1421
1422    // near_miss_hint: covers the typo branch, the TLD-differs branch,
1423    // and the no-hint branch.
1424
1425    #[test]
1426    fn near_miss_hint_typo_label() {
1427        let hint = near_miss_hint("api.acme.exemple", "api.acme.example");
1428        let h = hint.expect("typo near-miss must produce a hint");
1429        assert!(h.contains("near-miss"), "{h}");
1430    }
1431
1432    #[test]
1433    fn near_miss_hint_completely_different_tld() {
1434        let hint = near_miss_hint("api.acme.local", "api.acme.example");
1435        let h = hint.expect("TLD-differs must produce a hint");
1436        assert!(h.contains("TLD differs"), "{h}");
1437    }
1438
1439    #[test]
1440    fn near_miss_hint_label_count_differs() {
1441        // staging.api.acme.example has 4 labels; api.acme.example has 3.
1442        // No near-miss heuristic applies; depending on TLD comparison
1443        // this can still emit a TLD-differs hint, but only when the
1444        // suffix-most label genuinely differs. For matching TLDs the
1445        // result is `None`.
1446        let hint = near_miss_hint("staging.api.acme.example", "api.acme.example");
1447        // Either None or a hint that names a real divergence.
1448        if let Some(h) = hint {
1449            assert!(h.contains("TLD") || h.contains("near-miss"), "{h}");
1450        }
1451    }
1452
1453    // truncate: boundary on `max`.
1454
1455    #[test]
1456    fn truncate_under_limit_passes_through() {
1457        assert_eq!(truncate("hi", 10), "hi");
1458    }
1459
1460    #[test]
1461    fn truncate_at_limit_passes_through() {
1462        assert_eq!(truncate("hello", 5), "hello");
1463    }
1464
1465    #[test]
1466    fn truncate_over_limit_appends_ellipsis() {
1467        assert_eq!(truncate("hello!", 5), "hello\u{2026}");
1468    }
1469
1470    // parse_envelope: per-format dispatch.
1471
1472    #[test]
1473    fn parse_envelope_raw_strips_sha256_prefix() {
1474        let env = parse_envelope(EnvelopeFormat::Raw, "sha256=deadbeef", "1700000000");
1475        assert_eq!(env.signatures, vec!["deadbeef".to_string()]);
1476        assert_eq!(env.timestamp, Some(1_700_000_000));
1477    }
1478
1479    #[test]
1480    fn parse_envelope_stripe_v1_collects_v1_and_v0() {
1481        let env = parse_envelope(
1482            EnvelopeFormat::StripeV1,
1483            "t=1700000000,v1=aaaa,v0=bbbb",
1484            "ignored",
1485        );
1486        assert_eq!(env.signatures, vec!["aaaa".to_string(), "bbbb".to_string()]);
1487        assert_eq!(env.timestamp, Some(1_700_000_000));
1488    }
1489
1490    #[test]
1491    fn parse_envelope_slack_v0_strips_prefix() {
1492        let env = parse_envelope(EnvelopeFormat::SlackV0, "v0=cafef00d", "1700000000");
1493        assert_eq!(env.signatures, vec!["cafef00d".to_string()]);
1494        assert_eq!(env.timestamp, Some(1_700_000_000));
1495    }
1496
1497    #[test]
1498    fn parse_envelope_github_hmac_has_no_timestamp() {
1499        let env = parse_envelope(
1500            EnvelopeFormat::GithubHmac,
1501            "sha256=feedface",
1502            "this should be ignored",
1503        );
1504        assert_eq!(env.signatures, vec!["feedface".to_string()]);
1505        assert_eq!(env.timestamp, None, "GitHub envelope claims no timestamp");
1506    }
1507}