1use std::collections::BTreeMap;
40
41use crate::cases::{header, Case, EnvelopeFormat};
42use crate::evidence::Evidence;
43use crate::report::{self, Diagnosis, Report};
44use hmac::{Hmac, Mac};
45use sha2::{Digest, Sha256};
46use url::Url;
47
48struct LogLine<'a> {
60 raw: &'a str,
62 json: Option<serde_json::Value>,
65}
66
67impl<'a> LogLine<'a> {
68 fn parse(raw: &'a str) -> Self {
73 let json = if raw.trim_start().starts_with('{') {
74 serde_json::from_str(raw).ok()
75 } else {
76 None
77 };
78 Self { raw, json }
79 }
80
81 fn raw(&self) -> &'a str {
85 self.raw
86 }
87
88 fn field(&self, key: &str) -> Option<String> {
93 if let Some(v) = self.json.as_ref().and_then(|j| j.get(key)) {
94 return Some(match v {
95 serde_json::Value::String(s) => s.clone(),
96 serde_json::Value::Number(n) => n.to_string(),
97 serde_json::Value::Bool(b) => b.to_string(),
98 _ => v.to_string(),
99 });
100 }
101 let prefix = format!("{key}=");
102 self.raw
103 .split_whitespace()
104 .find_map(|t| t.strip_prefix(&prefix))
105 .map(|s| s.trim_matches('"').to_string())
106 }
107
108 fn contains_ci(&self, needle_lc: &str) -> bool {
119 if let Some(v) = &self.json {
120 if let Some(obj) = v.as_object() {
121 for value in obj.values() {
122 if let Some(s) = value.as_str() {
123 if s.to_ascii_lowercase().contains(needle_lc) {
124 return true;
125 }
126 }
127 }
128 }
129 if let Some(obj) = v.as_object() {
130 for key in obj.keys() {
131 if key.to_ascii_lowercase().contains(needle_lc) {
132 return true;
133 }
134 }
135 }
136 return false;
137 }
138 self.raw.to_ascii_lowercase().contains(needle_lc)
139 }
140}
141
142fn parse_timestamp_ms(line: &str) -> Option<i64> {
157 let t_idx = line.find('T')?;
158 let date_str = &line[..t_idx];
159 let after_t = line.get(t_idx + 1..)?;
160 let z_idx = after_t.find('Z')?;
161 let time_str = &after_t[..z_idx];
162
163 let mut date_iter = date_str.split('-');
165 let year: i64 = date_iter.next()?.parse().ok()?;
166 let month: u32 = date_iter.next()?.parse().ok()?;
167 let day: u32 = date_iter.next()?.parse().ok()?;
168 if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
169 return None;
170 }
171
172 let mut iter = time_str.split(':');
174 let h: i64 = iter.next()?.parse().ok()?;
175 let m: i64 = iter.next()?.parse().ok()?;
176 let s_part = iter.next()?;
177 let (sec_str, frac_str) = match s_part.split_once('.') {
178 Some((s, f)) => (s, f),
179 None => (s_part, "0"),
180 };
181 let s: i64 = sec_str.parse().ok()?;
182 let mut frac_padded = frac_str.to_string();
183 while frac_padded.len() < 3 {
184 frac_padded.push('0');
185 }
186 let millis: i64 = frac_padded.get(..3)?.parse().ok()?;
187
188 let days = days_from_civil(year, month, day);
189 let ms_of_day = ((h * 60 + m) * 60 + s) * 1000 + millis;
190 Some(days * 86_400_000 + ms_of_day)
191}
192
193fn days_from_civil(y: i64, m: u32, d: u32) -> i64 {
200 let y = if m <= 2 { y - 1 } else { y };
201 let era = y.div_euclid(400);
202 let yoe = (y - era * 400) as u32; let mp = if m > 2 { m - 3 } else { m + 9 }; let doy = (153 * mp + 2) / 5 + d - 1; let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; era * 146_097 + doe as i64 - 719_468
207}
208
209struct WebhookEnvelope {
212 timestamp: Option<i64>,
215 signatures: Vec<String>,
219 label: String,
221}
222
223fn parse_envelope(
235 format: EnvelopeFormat,
236 sig_header_value: &str,
237 ts_header_value: &str,
238) -> WebhookEnvelope {
239 match format {
240 EnvelopeFormat::Raw => {
241 let normalised = sig_header_value
242 .trim()
243 .trim_start_matches("sha256=")
244 .to_string();
245 let timestamp: Option<i64> = ts_header_value.trim().parse().ok();
246 WebhookEnvelope {
247 timestamp,
248 signatures: vec![normalised],
249 label: sig_header_value.to_string(),
250 }
251 }
252 EnvelopeFormat::StripeV1 => {
253 let mut timestamp: Option<i64> = None;
254 let mut signatures: Vec<String> = Vec::new();
255 for part in sig_header_value.split(',') {
256 let part = part.trim();
257 if let Some((k, v)) = part.split_once('=') {
258 match k {
259 "t" => timestamp = v.trim().parse().ok(),
260 "v1" | "v0" => signatures.push(v.trim().to_string()),
261 _ => {}
262 }
263 }
264 }
265 WebhookEnvelope {
266 timestamp,
267 signatures,
268 label: sig_header_value.to_string(),
269 }
270 }
271 EnvelopeFormat::SlackV0 => {
272 let normalised = sig_header_value
274 .trim()
275 .trim_start_matches("v0=")
276 .to_string();
277 let timestamp: Option<i64> = ts_header_value.trim().parse().ok();
278 WebhookEnvelope {
279 timestamp,
280 signatures: vec![normalised],
281 label: sig_header_value.to_string(),
282 }
283 }
284 EnvelopeFormat::GithubHmac => {
285 let normalised = sig_header_value
289 .trim()
290 .trim_start_matches("sha256=")
291 .to_string();
292 WebhookEnvelope {
293 timestamp: None,
294 signatures: vec![normalised],
295 label: sig_header_value.to_string(),
296 }
297 }
298 }
299}
300
301pub trait Rule: Send + Sync {
315 fn id(&self) -> &str;
319
320 fn evaluate(&self, case: &Case) -> Option<Diagnosis>;
328}
329
330static RULES: &[&dyn Rule] = &[
340 &AuthMissing,
341 &BadJsonPayload,
342 &RateLimited,
343 &WebhookSignatureMismatch,
344 &WebhookTimestampStale,
345 &TimeoutRetry,
346 &ConfigDnsError,
347 &IdempotencyCollision,
348];
349
350pub fn all_rules() -> &'static [&'static dyn Rule] {
355 RULES
356}
357
358#[derive(Debug, Clone)]
360pub struct RuleTrace {
361 pub rule_id: String,
363 pub duration: std::time::Duration,
365 pub confidence: Option<f32>,
367}
368
369pub fn diagnose(case: &Case) -> Report {
386 let (report, _trace) = diagnose_traced(case);
387 report
388}
389
390pub fn diagnose_traced(case: &Case) -> (Report, Vec<RuleTrace>) {
410 let rules = all_rules();
411 let mut hits: Vec<Diagnosis> = Vec::with_capacity(rules.len());
412 let mut traces: Vec<RuleTrace> = Vec::with_capacity(rules.len());
413 for rule in rules {
414 let start = std::time::Instant::now();
415 let outcome = rule.evaluate(case);
416 let duration = start.elapsed();
417 let confidence = outcome.as_ref().map(|d| d.confidence);
418 traces.push(RuleTrace {
419 rule_id: rule.id().to_string(),
420 duration,
421 confidence,
422 });
423 if let Some(d) = outcome {
424 hits.push(d);
425 }
426 }
427 hits.sort_by(|a, b| {
428 b.confidence
429 .partial_cmp(&a.confidence)
430 .unwrap_or(std::cmp::Ordering::Equal)
431 .then_with(|| a.rule_id.cmp(&b.rule_id))
432 });
433 let mut iter = hits.into_iter();
434 let primary = iter.next();
435 let also_considered: Vec<Diagnosis> = iter.collect();
436 let report = Report {
437 case_name: case.name.clone(),
438 severity: case.severity,
439 primary,
440 also_considered,
441 reproduction: report::reproduction(case),
442 };
443 (report, traces)
444}
445
446struct AuthMissing;
456
457impl Rule for AuthMissing {
458 fn id(&self) -> &str {
459 "auth_missing"
460 }
461 fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
462 if !case.context.auth_required {
465 return None;
466 }
467 if header(&case.request.headers, "authorization").is_some() {
468 return None;
469 }
470 let status = case.response.as_ref().map(|r| r.status).unwrap_or(0);
471 let mut evidence = vec![
472 Evidence::with(
473 "Authorization header absent in request",
474 "request.headers.authorization",
475 ),
476 Evidence::with(
477 format!(
478 "Endpoint {} {} flagged auth_required=true",
479 case.request.method, case.request.url
480 ),
481 "case.context.auth_required",
482 ),
483 ];
484 let confidence = if status == 401 {
485 evidence.push(Evidence::with(
486 "Response status 401 Unauthorized",
487 "response.status",
488 ));
489 0.95
490 } else {
491 0.60
492 };
493 Some(Diagnosis {
494 rule_id: self.id().into(),
495 likely_cause: "Missing Authorization header".into(),
496 confidence,
497 evidence,
498 next_steps: vec![
499 "Add an Authorization: Bearer <token> header to the request.".into(),
500 "Confirm the token has not expired.".into(),
501 "Verify the token's scope covers the requested operation.".into(),
502 ],
503 escalation: "Customer request failed because the Authorization header was \
504 absent. The API rejected the request before payload processing. \
505 Ask the customer to retry with a valid bearer token and confirm \
506 the token's scope."
507 .into(),
508 })
509 }
510}
511
512struct BadJsonPayload;
523
524impl Rule for BadJsonPayload {
525 fn id(&self) -> &str {
526 "bad_json_payload"
527 }
528 fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
529 let body = case.request.body.as_deref()?;
532 let ct = header(&case.request.headers, "content-type").unwrap_or("");
533 if !ct.contains("application/json") {
534 return None;
535 }
536 let parse_err = match serde_json::from_str::<serde_json::Value>(body) {
540 Ok(_) => return None,
541 Err(e) => e,
542 };
543 let status = case.response.as_ref().map(|r| r.status).unwrap_or(0);
544 let mut evidence = vec![
545 Evidence::with(
546 format!(
547 "serde_json parse error at line {} column {}: {}",
548 parse_err.line(),
549 parse_err.column(),
550 parse_err
551 ),
552 "request.body",
553 ),
554 Evidence::with(
555 format!("Content-Type was {ct}; body could not be parsed"),
556 "request.headers.content-type",
557 ),
558 ];
559 let confidence = if matches!(status, 400 | 422) {
560 evidence.push(Evidence::with(
561 format!("Response status {status} confirms server rejected payload"),
562 "response.status",
563 ));
564 0.95
565 } else {
566 0.70
567 };
568 Some(Diagnosis {
569 rule_id: self.id().into(),
570 likely_cause: "Invalid JSON payload".into(),
571 confidence,
572 evidence,
573 next_steps: vec![
574 "Validate the payload against the documented request schema.".into(),
575 "Re-emit the body using a JSON serialiser (avoid hand-built strings).".into(),
576 "If the issue persists, log the raw request bytes before send.".into(),
577 ],
578 escalation: "The request body could not be parsed as JSON. The server \
579 rejected the request before any business logic ran. Ask the \
580 customer to share the exact bytes they sent and the producer \
581 that built them."
582 .into(),
583 })
584 }
585}
586
587struct RateLimited;
598
599impl Rule for RateLimited {
600 fn id(&self) -> &str {
601 "rate_limited"
602 }
603 fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
604 let resp = case.response.as_ref()?;
605 if resp.status != 429 {
609 return None;
610 }
611 let mut evidence = vec![Evidence::with(
612 "Response status 429 Too Many Requests",
613 "response.status",
614 )];
615 let mut confidence: f32 = 0.70;
616 if let Some(remaining) = header(&resp.headers, "x-ratelimit-remaining") {
617 evidence.push(Evidence::with(
618 format!("X-RateLimit-Remaining: {remaining}"),
619 "response.headers.x-ratelimit-remaining",
620 ));
621 if remaining.trim() == "0" {
622 confidence = confidence.max(0.95);
623 } else {
624 confidence = confidence.max(0.85);
625 }
626 }
627 if let Some(retry_after) = header(&resp.headers, "retry-after") {
628 evidence.push(Evidence::with(
629 format!("Retry-After: {retry_after} seconds"),
630 "response.headers.retry-after",
631 ));
632 confidence = confidence.max(0.95);
633 }
634 if let Some(reset) = header(&resp.headers, "x-ratelimit-reset") {
635 evidence.push(Evidence::with(
636 format!("X-RateLimit-Reset (epoch): {reset}"),
637 "response.headers.x-ratelimit-reset",
638 ));
639 }
640 Some(Diagnosis {
641 rule_id: self.id().into(),
642 likely_cause: "Rate limit exceeded".into(),
643 confidence,
644 evidence,
645 next_steps: vec![
646 "Honour the Retry-After header before resending.".into(),
647 "Implement client-side exponential backoff with jitter.".into(),
648 "Reduce request frequency or request a higher quota.".into(),
649 ],
650 escalation: "Customer is hitting the documented rate limit. Confirm whether \
651 the spike is intentional (campaign / migration) or a runaway \
652 loop, and whether a temporary quota bump is appropriate."
653 .into(),
654 })
655 }
656}
657
658struct WebhookSignatureMismatch;
671
672impl Rule for WebhookSignatureMismatch {
673 fn id(&self) -> &str {
674 "webhook_signature_mismatch"
675 }
676 fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
677 let webhook = case.context.webhook.as_ref()?;
678 let secret = case.load_secret()?;
679 let provided_raw = header(&case.request.headers, &webhook.signature_header)?;
680 let ts_raw = header(&case.request.headers, &webhook.timestamp_header).unwrap_or("");
681 let env = parse_envelope(webhook.envelope_format, provided_raw, ts_raw);
685 if env.signatures.is_empty() {
686 return None;
687 }
688 let timestamp = env.timestamp.map(|t| t.to_string()).unwrap_or_default();
692 let body = case.request.body.as_deref().unwrap_or("");
693 let signing_input = match webhook.envelope_format {
694 EnvelopeFormat::Raw | EnvelopeFormat::StripeV1 => {
695 format!("{timestamp}.{body}")
696 }
697 EnvelopeFormat::SlackV0 => format!("v0:{timestamp}:{body}"),
698 EnvelopeFormat::GithubHmac => body.to_string(),
699 };
700 let mut mac = <Hmac<Sha256> as Mac>::new_from_slice(&secret).ok()?;
701 mac.update(signing_input.as_bytes());
702 let expected = hex::encode(mac.finalize().into_bytes());
703 if env
704 .signatures
705 .iter()
706 .any(|s| s.eq_ignore_ascii_case(&expected))
707 {
708 return None;
709 }
710 let envelope_label = match webhook.envelope_format {
711 EnvelopeFormat::Raw => "raw",
712 EnvelopeFormat::StripeV1 => "stripe_v1",
713 EnvelopeFormat::SlackV0 => "slack_v0",
714 EnvelopeFormat::GithubHmac => "github_hmac",
715 };
716 let evidence = vec![
717 Evidence::with(
718 format!(
719 "Provided {} ({envelope_label}): {}",
720 webhook.signature_header, env.label
721 ),
722 format!(
723 "request.headers.{}",
724 webhook.signature_header.to_lowercase()
725 ),
726 ),
727 Evidence::with(
728 format!("Expected (HMAC-SHA256 over '{{timestamp}}.{{body}}'): {expected}"),
729 "computed",
730 ),
731 Evidence::with(
732 format!("Signing input length: {} bytes", signing_input.len()),
733 "computed",
734 ),
735 ];
736 Some(Diagnosis {
739 rule_id: self.id().into(),
740 likely_cause: "Webhook signature does not match recomputed HMAC".into(),
741 confidence: 0.92,
742 evidence,
743 next_steps: vec![
744 "Confirm the active signing secret matches the one used by the sender.".into(),
745 "Verify the receiver hashes the raw request body (not a re-serialised copy)."
746 .into(),
747 "Inspect any proxy / middleware that may rewrite the body before validation."
748 .into(),
749 ],
750 escalation: "Recomputed HMAC differs from the provided signature. The most \
751 common causes are a rotated-but-not-deployed secret, a body \
752 being re-serialised (whitespace / key order changes), or a \
753 proxy mutating the request. Confirm with the customer which \
754 secret revision is active on their side."
755 .into(),
756 })
757 }
758}
759
760struct WebhookTimestampStale;
777
778impl Rule for WebhookTimestampStale {
779 fn id(&self) -> &str {
780 "webhook_timestamp_stale"
781 }
782 fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
783 let webhook = case.context.webhook.as_ref()?;
784 let now = case.context.now_unix?;
785 let provided_sig = header(&case.request.headers, &webhook.signature_header).unwrap_or("");
786 let ts_raw = header(&case.request.headers, &webhook.timestamp_header).unwrap_or("");
787 let env = parse_envelope(webhook.envelope_format, provided_sig, ts_raw);
788 let ts = env.timestamp?;
789 let drift = now - ts;
790 if drift.abs() <= webhook.tolerance_seconds {
791 return None;
792 }
793 let direction = if drift >= 0 { "behind" } else { "ahead of" };
794 let (source_label, source_pointer) = match webhook.envelope_format {
795 EnvelopeFormat::Raw | EnvelopeFormat::SlackV0 => (
796 webhook.timestamp_header.clone(),
797 format!(
798 "request.headers.{}",
799 webhook.timestamp_header.to_lowercase()
800 ),
801 ),
802 EnvelopeFormat::StripeV1 => (
803 format!("{} (stripe_v1 t=)", webhook.signature_header),
804 format!(
805 "request.headers.{}",
806 webhook.signature_header.to_lowercase()
807 ),
808 ),
809 EnvelopeFormat::GithubHmac => (
813 "github_hmac (no timestamp)".to_string(),
814 format!(
815 "request.headers.{}",
816 webhook.signature_header.to_lowercase()
817 ),
818 ),
819 };
820 let evidence = vec![
821 Evidence::with(
822 format!(
823 "{}: {} ({} {} reference now)",
824 source_label,
825 ts,
826 drift.abs(),
827 direction
828 ),
829 source_pointer,
830 ),
831 Evidence::with(
832 format!(
833 "Tolerance is {} seconds; observed drift {} seconds",
834 webhook.tolerance_seconds,
835 drift.abs()
836 ),
837 "case.context.webhook.tolerance_seconds",
838 ),
839 ];
840 let confidence = if drift.abs() > webhook.tolerance_seconds * 10 {
841 0.90
842 } else {
843 0.85
844 };
845 Some(Diagnosis {
846 rule_id: self.id().into(),
847 likely_cause: "Webhook timestamp outside tolerance window".into(),
848 confidence,
849 evidence,
850 next_steps: vec![
851 "Check NTP / clock skew between sender and receiver.".into(),
852 "Confirm the timestamp header reflects the time the payload was signed, \
853 not the time it was forwarded."
854 .into(),
855 "If retries are stored on disk before delivery, refresh the signature \
856 immediately before the actual send."
857 .into(),
858 ],
859 escalation: "Webhook timestamp is outside the configured tolerance. This \
860 often indicates clock skew, queued retries that re-send a \
861 long-stored payload, or a misconfigured replay window."
862 .into(),
863 })
864 }
865}
866
867struct TimeoutRetry;
889
890impl Rule for TimeoutRetry {
891 fn id(&self) -> &str {
892 "timeout_retry"
893 }
894 fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
895 let log = case.load_log()?;
896
897 struct Stream<'a> {
901 request_id: String,
902 timeouts: Vec<(u32, LogLine<'a>)>,
903 max_attempt: u32,
904 elapsed_ms: Option<u64>,
905 }
906
907 let mut streams: BTreeMap<String, Stream<'_>> = BTreeMap::new();
908 let mut unknown_id_timeouts: Vec<(u32, LogLine<'_>)> = Vec::new();
909
910 for (idx, raw) in log.lines().enumerate() {
911 let line = LogLine::parse(raw);
912 if !(line.contains_ci("timeout") || line.contains_ci("timed out")) {
913 continue;
914 }
915 let line_no = (idx as u32) + 1;
916 match line.field("request_id") {
917 Some(rid) => {
918 let entry = streams.entry(rid.clone()).or_insert_with(|| Stream {
919 request_id: rid,
920 timeouts: Vec::new(),
921 max_attempt: 0,
922 elapsed_ms: None,
923 });
924 if let Some(a) = line.field("attempt").and_then(|s| s.parse::<u32>().ok()) {
925 entry.max_attempt = entry.max_attempt.max(a);
926 }
927 entry.timeouts.push((line_no, line));
928 }
929 None => unknown_id_timeouts.push((line_no, line)),
930 }
931 }
932
933 let total_timeouts: usize =
934 streams.values().map(|s| s.timeouts.len()).sum::<usize>() + unknown_id_timeouts.len();
935 if total_timeouts < 2 {
936 return None;
937 }
938
939 for stream in streams.values_mut() {
945 let mut min_ms: Option<i64> = None;
946 let mut max_ms: Option<i64> = None;
947 for raw in log.lines() {
948 let line = LogLine::parse(raw);
949 if line.field("request_id").as_deref() != Some(stream.request_id.as_str()) {
950 continue;
951 }
952 if let Some(ms) = parse_timestamp_ms(raw) {
953 min_ms = Some(min_ms.map_or(ms, |m| m.min(ms)));
954 max_ms = Some(max_ms.map_or(ms, |m| m.max(ms)));
955 }
956 }
957 if let (Some(a), Some(b)) = (min_ms, max_ms) {
961 if b > a {
962 stream.elapsed_ms = Some((b - a) as u64);
963 }
964 }
965 }
966
967 let primary_stream = streams
969 .values()
970 .max_by_key(|s| (s.timeouts.len(), s.max_attempt));
971
972 let mut evidence: Vec<Evidence> = Vec::new();
973 if let Some(s) = primary_stream {
974 evidence.push(Evidence::with(
975 format!(
976 "request_id={} accounts for {} timeout entries (max attempt={})",
977 s.request_id,
978 s.timeouts.len(),
979 s.max_attempt
980 ),
981 "server.log",
982 ));
983 for (line_no, line) in s.timeouts.iter().take(4) {
984 evidence.push(Evidence::at_line(
985 format!("timeout entry: {}", truncate(line.raw(), 160)),
986 "server.log",
987 *line_no,
988 ));
989 }
990 if let Some(elapsed) = s.elapsed_ms {
991 evidence.push(Evidence::with(
992 format!(
993 "elapsed (derived from log timestamps): {} ms across {} attempts",
994 elapsed,
995 s.timeouts.len()
996 ),
997 "computed",
998 ));
999 }
1000 }
1001 let all_request_ids: std::collections::BTreeSet<String> = log
1005 .lines()
1006 .filter_map(|raw| LogLine::parse(raw).field("request_id"))
1007 .collect();
1008 if all_request_ids.len() > 1 {
1009 evidence.push(Evidence::with(
1010 format!(
1011 "log contains {} distinct request_ids; rule grouped timeouts by request_id rather than pooling",
1012 all_request_ids.len()
1013 ),
1014 "server.log",
1015 ));
1016 }
1017
1018 let mut confidence: f32 = 0.65;
1019 if let Some(s) = primary_stream {
1020 if s.max_attempt >= 3 {
1021 confidence = confidence.max(0.85);
1022 evidence.push(Evidence::with(
1023 format!(
1024 "max attempt observed: {} (suggests retry exhaustion)",
1025 s.max_attempt
1026 ),
1027 "server.log",
1028 ));
1029 }
1030 if let (Some(elapsed), Some(deadline)) = (s.elapsed_ms, case.context.client_deadline_ms)
1031 {
1032 if elapsed > deadline {
1033 confidence = confidence.max(0.90);
1034 evidence.push(Evidence::with(
1035 format!(
1036 "derived elapsed {} ms exceeds documented client deadline {} ms",
1037 elapsed, deadline
1038 ),
1039 "computed",
1040 ));
1041 }
1042 }
1043 }
1044 if let Some(deadline_ms) = case.context.client_deadline_ms {
1045 evidence.push(Evidence::with(
1046 format!("documented client deadline: {deadline_ms} ms"),
1047 "case.context.client_deadline_ms",
1048 ));
1049 confidence = confidence.max(0.85);
1050 }
1051
1052 Some(Diagnosis {
1053 rule_id: self.id().into(),
1054 likely_cause: "Upstream timeout with retries exhausted".into(),
1055 confidence,
1056 evidence,
1057 next_steps: vec![
1058 "Inspect upstream latency for the affected endpoint.".into(),
1059 "Verify retry policy (max attempts, backoff, jitter).".into(),
1060 "If the deadline is shorter than upstream p99, raise it or reduce work.".into(),
1061 ],
1062 escalation: "Client retried the request multiple times before failing. \
1063 Confirm whether upstream latency spiked, whether the retry \
1064 budget is appropriate for the documented client deadline, and \
1065 whether idempotency keys protect against duplicate side \
1066 effects on retry."
1067 .into(),
1068 })
1069 }
1070}
1071
1072struct ConfigDnsError;
1090
1091impl Rule for ConfigDnsError {
1092 fn id(&self) -> &str {
1093 "config_dns_error"
1094 }
1095 fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
1096 let expected_base = case.context.expected_base_url.as_ref()?;
1097 let expected = Url::parse(expected_base).ok()?;
1098 let actual = Url::parse(&case.request.url).ok()?;
1099 let exp_host = expected.host_str()?;
1100 let act_host = actual.host_str()?;
1101 if act_host == exp_host && actual.scheme() == expected.scheme() {
1102 return None;
1103 }
1104 let mut evidence = vec![
1105 Evidence::with(format!("Request host: {act_host}"), "request.url"),
1106 Evidence::with(
1107 format!("Documented base host: {exp_host}"),
1108 "case.context.expected_base_url",
1109 ),
1110 ];
1111 let mut confidence: f32 = 0.75;
1112 if actual.scheme() != expected.scheme() {
1113 evidence.push(Evidence::with(
1114 format!(
1115 "Scheme differs: request={}, expected={}",
1116 actual.scheme(),
1117 expected.scheme()
1118 ),
1119 "request.url",
1120 ));
1121 confidence = confidence.max(0.80);
1122 }
1123 if let Some(hint) = near_miss_hint(act_host, exp_host) {
1124 evidence.push(Evidence::with(hint, "computed"));
1125 confidence = confidence.max(0.90);
1126 }
1127 Some(Diagnosis {
1128 rule_id: self.id().into(),
1129 likely_cause: "API base URL or hostname does not match documented endpoint".into(),
1130 confidence,
1131 evidence,
1132 next_steps: vec![
1133 "Confirm the API base URL in the customer's environment configuration.".into(),
1134 "Run `dig` / `nslookup` against the documented host to rule out DNS issues.".into(),
1135 "Check for environment variable overrides (staging vs production).".into(),
1136 ],
1137 escalation: "Customer is targeting a host that does not match the documented \
1138 API base. The most common causes are a stale base-URL config, a \
1139 staging endpoint left in production, or a typo in a TLD or \
1140 subdomain. Verify the deploying revision before assuming a DNS \
1141 outage."
1142 .into(),
1143 })
1144 }
1145}
1146
1147struct IdempotencyCollision;
1163
1164impl Rule for IdempotencyCollision {
1165 fn id(&self) -> &str {
1166 "idempotency_collision"
1167 }
1168 fn evaluate(&self, case: &Case) -> Option<Diagnosis> {
1169 let idem = case.context.idempotency.as_ref()?;
1170 let key = header(&case.request.headers, &idem.header)?;
1171 let body = case.request.body.as_deref().unwrap_or("");
1172 let mut hasher = Sha256::new();
1173 hasher.update(body.as_bytes());
1174 let actual = hex::encode(hasher.finalize());
1175 if actual.eq_ignore_ascii_case(&idem.stored_body_sha256) {
1176 return None;
1177 }
1178 let status = case.response.as_ref().map(|r| r.status).unwrap_or(0);
1179 let mut evidence = vec![
1180 Evidence::with(
1181 format!("Idempotency-Key: {key}"),
1182 format!("request.headers.{}", idem.header.to_lowercase()),
1183 ),
1184 Evidence::with(
1185 format!("Stored body SHA-256: {}", idem.stored_body_sha256),
1186 "case.context.idempotency.stored_body_sha256",
1187 ),
1188 Evidence::with(format!("Current body SHA-256: {actual}"), "computed"),
1189 Evidence::with(
1190 format!("Current body length: {} bytes", body.len()),
1191 "request.body",
1192 ),
1193 ];
1194 let confidence = if status == 422 {
1195 evidence.push(Evidence::with(
1196 "Response status 422 confirms server rejected duplicate-key with different body",
1197 "response.status",
1198 ));
1199 0.93
1200 } else if (400..500).contains(&status) {
1201 0.80
1202 } else {
1203 0.70
1204 };
1205 Some(Diagnosis {
1206 rule_id: self.id().into(),
1207 likely_cause: "Idempotency-Key reused with a different request body".into(),
1208 confidence,
1209 evidence,
1210 next_steps: vec![
1211 "Generate a fresh Idempotency-Key for any logically new request.".into(),
1212 "If retrying, send byte-identical body bytes used on the first attempt.".into(),
1213 "Check whether a serialiser or middleware is adding fields between attempts."
1214 .into(),
1215 ],
1216 escalation: "Customer reused an Idempotency-Key with a different body, so the \
1217 server returned its stored-body-mismatch error. Confirm whether \
1218 their retry logic captures the body before its first send and \
1219 replays the same bytes, or whether a logging / proxy layer is \
1220 re-serialising between attempts."
1221 .into(),
1222 })
1223 }
1224}
1225
1226fn near_miss_hint(actual: &str, expected: &str) -> Option<String> {
1240 let a_parts: Vec<&str> = actual.rsplit('.').collect();
1241 let e_parts: Vec<&str> = expected.rsplit('.').collect();
1242 if a_parts.len() == e_parts.len() {
1243 let mut diffs = 0usize;
1244 let mut diff_label: Option<(&str, &str)> = None;
1245 for (a, e) in a_parts.iter().zip(e_parts.iter()) {
1246 if a != e {
1247 diffs += 1;
1248 diff_label = Some((a, e));
1249 }
1250 }
1251 if diffs == 1 {
1252 let (a, e) = diff_label?;
1253 if a.len() == e.len() && hamming(a, e) <= 2 {
1257 return Some(format!(
1258 "near-miss label: '{a}' differs from documented '{e}' by ≤2 chars (typo?)"
1259 ));
1260 }
1261 }
1262 }
1263 if !actual.ends_with(expected.split('.').next_back().unwrap_or("")) {
1264 return Some(format!(
1265 "TLD differs: request '{actual}' vs documented '{expected}'"
1266 ));
1267 }
1268 None
1269}
1270
1271fn hamming(a: &str, b: &str) -> usize {
1278 a.chars().zip(b.chars()).filter(|(x, y)| x != y).count()
1279}
1280
1281fn truncate(s: &str, max: usize) -> String {
1287 if s.len() <= max {
1288 s.to_string()
1289 } else {
1290 format!("{}\u{2026}", &s[..max])
1291 }
1292}
1293
1294#[cfg(test)]
1304mod private_helper_tests {
1305 use super::*;
1306
1307 #[test]
1312 fn days_from_civil_unix_epoch() {
1313 assert_eq!(days_from_civil(1970, 1, 1), 0);
1314 }
1315
1316 #[test]
1317 fn days_from_civil_one_day_after_epoch() {
1318 assert_eq!(days_from_civil(1970, 1, 2), 1);
1319 }
1320
1321 #[test]
1322 fn days_from_civil_one_year_after_epoch() {
1323 assert_eq!(days_from_civil(1971, 1, 1), 365);
1324 }
1325
1326 #[test]
1327 fn days_from_civil_2000_leap_day() {
1328 assert_eq!(days_from_civil(2000, 2, 29), 11016);
1335 }
1336
1337 #[test]
1338 fn days_from_civil_2100_not_leap() {
1339 let feb28 = days_from_civil(2100, 2, 28);
1343 let mar01 = days_from_civil(2100, 3, 1);
1344 assert_eq!(mar01 - feb28, 1, "2100 must not be a leap year");
1345 }
1346
1347 #[test]
1348 fn days_from_civil_2400_leap() {
1349 let feb29 = days_from_civil(2400, 2, 29);
1351 let mar01 = days_from_civil(2400, 3, 1);
1352 assert_eq!(mar01 - feb29, 1, "2400 must be a leap year");
1353 }
1354
1355 #[test]
1356 fn days_from_civil_pre_epoch() {
1357 assert_eq!(days_from_civil(1969, 12, 31), -1);
1359 }
1360
1361 #[test]
1366 fn parse_timestamp_ms_unix_epoch_zero() {
1367 assert_eq!(parse_timestamp_ms("1970-01-01T00:00:00.000Z"), Some(0));
1368 }
1369
1370 #[test]
1371 fn parse_timestamp_ms_one_second_after_epoch() {
1372 assert_eq!(parse_timestamp_ms("1970-01-01T00:00:01.000Z"), Some(1000));
1373 }
1374
1375 #[test]
1376 fn parse_timestamp_ms_pads_short_fractions() {
1377 assert_eq!(parse_timestamp_ms("1970-01-01T00:00:00.5Z"), Some(500));
1379 }
1380
1381 #[test]
1382 fn parse_timestamp_ms_returns_none_on_garbage() {
1383 assert_eq!(parse_timestamp_ms("not a timestamp"), None);
1384 assert_eq!(parse_timestamp_ms(""), None);
1385 }
1386
1387 #[test]
1388 fn parse_timestamp_ms_rejects_invalid_month() {
1389 assert_eq!(parse_timestamp_ms("1970-13-01T00:00:00.000Z"), None);
1391 }
1392
1393 #[test]
1394 fn parse_timestamp_ms_cross_midnight_is_monotone() {
1395 let before = parse_timestamp_ms("2026-12-31T23:59:59.500Z").unwrap();
1396 let after = parse_timestamp_ms("2027-01-01T00:00:01.500Z").unwrap();
1397 assert_eq!(after - before, 2000, "cross-midnight span must be 2 s");
1398 }
1399
1400 #[test]
1403 fn hamming_identical_strings() {
1404 assert_eq!(hamming("abc", "abc"), 0);
1405 }
1406
1407 #[test]
1408 fn hamming_one_char_diff() {
1409 assert_eq!(hamming("abc", "abd"), 1);
1410 }
1411
1412 #[test]
1413 fn hamming_all_diff() {
1414 assert_eq!(hamming("abc", "xyz"), 3);
1415 }
1416
1417 #[test]
1418 fn hamming_empty_strings() {
1419 assert_eq!(hamming("", ""), 0);
1420 }
1421
1422 #[test]
1426 fn near_miss_hint_typo_label() {
1427 let hint = near_miss_hint("api.acme.exemple", "api.acme.example");
1428 let h = hint.expect("typo near-miss must produce a hint");
1429 assert!(h.contains("near-miss"), "{h}");
1430 }
1431
1432 #[test]
1433 fn near_miss_hint_completely_different_tld() {
1434 let hint = near_miss_hint("api.acme.local", "api.acme.example");
1435 let h = hint.expect("TLD-differs must produce a hint");
1436 assert!(h.contains("TLD differs"), "{h}");
1437 }
1438
1439 #[test]
1440 fn near_miss_hint_label_count_differs() {
1441 let hint = near_miss_hint("staging.api.acme.example", "api.acme.example");
1447 if let Some(h) = hint {
1449 assert!(h.contains("TLD") || h.contains("near-miss"), "{h}");
1450 }
1451 }
1452
1453 #[test]
1456 fn truncate_under_limit_passes_through() {
1457 assert_eq!(truncate("hi", 10), "hi");
1458 }
1459
1460 #[test]
1461 fn truncate_at_limit_passes_through() {
1462 assert_eq!(truncate("hello", 5), "hello");
1463 }
1464
1465 #[test]
1466 fn truncate_over_limit_appends_ellipsis() {
1467 assert_eq!(truncate("hello!", 5), "hello\u{2026}");
1468 }
1469
1470 #[test]
1473 fn parse_envelope_raw_strips_sha256_prefix() {
1474 let env = parse_envelope(EnvelopeFormat::Raw, "sha256=deadbeef", "1700000000");
1475 assert_eq!(env.signatures, vec!["deadbeef".to_string()]);
1476 assert_eq!(env.timestamp, Some(1_700_000_000));
1477 }
1478
1479 #[test]
1480 fn parse_envelope_stripe_v1_collects_v1_and_v0() {
1481 let env = parse_envelope(
1482 EnvelopeFormat::StripeV1,
1483 "t=1700000000,v1=aaaa,v0=bbbb",
1484 "ignored",
1485 );
1486 assert_eq!(env.signatures, vec!["aaaa".to_string(), "bbbb".to_string()]);
1487 assert_eq!(env.timestamp, Some(1_700_000_000));
1488 }
1489
1490 #[test]
1491 fn parse_envelope_slack_v0_strips_prefix() {
1492 let env = parse_envelope(EnvelopeFormat::SlackV0, "v0=cafef00d", "1700000000");
1493 assert_eq!(env.signatures, vec!["cafef00d".to_string()]);
1494 assert_eq!(env.timestamp, Some(1_700_000_000));
1495 }
1496
1497 #[test]
1498 fn parse_envelope_github_hmac_has_no_timestamp() {
1499 let env = parse_envelope(
1500 EnvelopeFormat::GithubHmac,
1501 "sha256=feedface",
1502 "this should be ignored",
1503 );
1504 assert_eq!(env.signatures, vec!["feedface".to_string()]);
1505 assert_eq!(env.timestamp, None, "GitHub envelope claims no timestamp");
1506 }
1507}