use crate::check::{CheckOutcome, MatchKind, UncertainReason};
use crate::escalation::TransportTier;
use crate::site::ProtectionKind;
use std::collections::HashMap;
pub const DEFAULT_THRESHOLD_RATIO: f32 = 0.6;
pub const DEFAULT_MIN_SCANS: u32 = 3;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct EscalationFinding {
pub site: String,
pub scans_seen: u32,
pub escalation_evidence: u32,
pub dominant_reason: EvidenceKind,
pub suggested_protection: ProtectionKind,
}
impl EscalationFinding {
#[must_use]
pub fn ratio(&self) -> f32 {
if self.scans_seen == 0 {
0.0
} else {
f32::from(u16::try_from(self.escalation_evidence).unwrap_or(u16::MAX))
/ f32::from(u16::try_from(self.scans_seen).unwrap_or(u16::MAX))
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum EvidenceKind {
CloudflareChallenge,
RateLimited,
}
impl EvidenceKind {
#[must_use]
pub const fn suggested_protection(self) -> ProtectionKind {
match self {
Self::CloudflareChallenge | Self::RateLimited => ProtectionKind::Cloudflare,
}
}
}
#[derive(Default, Debug)]
struct SiteTally {
scans_seen: u32,
cloudflare_evidence: u32,
ratelimit_evidence: u32,
}
impl SiteTally {
fn total_evidence(&self) -> u32 {
self.cloudflare_evidence + self.ratelimit_evidence
}
fn dominant(&self) -> Option<EvidenceKind> {
if self.total_evidence() == 0 {
return None;
}
if self.cloudflare_evidence >= self.ratelimit_evidence {
Some(EvidenceKind::CloudflareChallenge)
} else {
Some(EvidenceKind::RateLimited)
}
}
}
fn classify(outcome: &CheckOutcome) -> Option<EvidenceKind> {
if matches!(outcome.transport, Some(TransportTier::Browser)) && outcome.escalations >= 1 {
return Some(EvidenceKind::CloudflareChallenge);
}
if outcome.kind == MatchKind::Uncertain {
match outcome.reason.as_ref()? {
UncertainReason::CloudflareChallenge => return Some(EvidenceKind::CloudflareChallenge),
UncertainReason::RateLimited => return Some(EvidenceKind::RateLimited),
_ => {}
}
}
None
}
pub fn analyze_escalation_history<'a>(
scans: impl IntoIterator<Item = &'a [CheckOutcome]>,
threshold_ratio: f32,
min_scans: u32,
) -> Vec<EscalationFinding> {
let mut tallies: HashMap<String, SiteTally> = HashMap::new();
for outcomes in scans {
for outcome in outcomes {
let entry = tallies.entry(outcome.site.clone()).or_default();
entry.scans_seen += 1;
match classify(outcome) {
Some(EvidenceKind::CloudflareChallenge) => entry.cloudflare_evidence += 1,
Some(EvidenceKind::RateLimited) => entry.ratelimit_evidence += 1,
None => {}
}
}
}
let mut findings: Vec<EscalationFinding> = tallies
.into_iter()
.filter_map(|(site, tally)| {
if tally.scans_seen < min_scans {
return None;
}
let dominant = tally.dominant()?;
let evidence = tally.total_evidence();
let ratio = f32::from(u16::try_from(evidence).unwrap_or(u16::MAX))
/ f32::from(u16::try_from(tally.scans_seen).unwrap_or(u16::MAX));
if ratio < threshold_ratio {
return None;
}
Some(EscalationFinding {
site,
scans_seen: tally.scans_seen,
escalation_evidence: evidence,
dominant_reason: dominant,
suggested_protection: dominant.suggested_protection(),
})
})
.collect();
findings.sort_by(|a, b| {
b.ratio()
.partial_cmp(&a.ratio())
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.site.cmp(&b.site))
});
findings
}
#[cfg(test)]
mod tests {
use super::*;
use crate::check::CheckOutcome;
fn outcome(site: &str, kind: MatchKind, reason: Option<UncertainReason>) -> CheckOutcome {
CheckOutcome {
site: site.to_owned(),
url: format!("https://{site}.example/foo"),
kind,
reason,
elapsed_ms: 100,
evidence: Vec::new(),
enrichment: std::collections::BTreeMap::new(),
transport: None,
escalations: 0,
}
}
fn outcome_browser_escalated(site: &str) -> CheckOutcome {
CheckOutcome {
site: site.to_owned(),
url: format!("https://{site}.example/foo"),
kind: MatchKind::Found,
reason: None,
elapsed_ms: 200,
evidence: Vec::new(),
enrichment: std::collections::BTreeMap::new(),
transport: Some(TransportTier::Browser),
escalations: 1,
}
}
fn outcome_http_uncertain_cf(site: &str) -> CheckOutcome {
outcome(
site,
MatchKind::Uncertain,
Some(UncertainReason::CloudflareChallenge),
)
}
fn outcome_http_uncertain_rl(site: &str) -> CheckOutcome {
outcome(
site,
MatchKind::Uncertain,
Some(UncertainReason::RateLimited),
)
}
fn outcome_http_found(site: &str) -> CheckOutcome {
outcome(site, MatchKind::Found, None)
}
#[test]
fn consistent_escalation_produces_finding() {
let scans: Vec<Vec<CheckOutcome>> = (0..5)
.map(|_| vec![outcome_browser_escalated("CDNed")])
.collect();
let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].site, "CDNed");
assert_eq!(findings[0].scans_seen, 5);
assert_eq!(findings[0].escalation_evidence, 5);
assert!((findings[0].ratio() - 1.0).abs() < f32::EPSILON);
assert_eq!(findings[0].suggested_protection, ProtectionKind::Cloudflare);
}
#[test]
fn http_only_site_does_not_get_flagged() {
let scans: Vec<Vec<CheckOutcome>> = (0..10)
.map(|_| vec![outcome_http_found("GitHub")])
.collect();
let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
assert!(findings.is_empty(), "{findings:?}");
}
#[test]
fn intermittent_escalation_below_threshold_skipped() {
let mut scans: Vec<Vec<CheckOutcome>> = Vec::new();
for _ in 0..2 {
scans.push(vec![outcome_browser_escalated("FlakyEdge")]);
}
for _ in 0..8 {
scans.push(vec![outcome_http_found("FlakyEdge")]);
}
let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
assert!(findings.is_empty(), "{findings:?}");
}
#[test]
fn too_few_scans_skipped_even_at_full_ratio() {
let scans: Vec<Vec<CheckOutcome>> = (0..2)
.map(|_| vec![outcome_browser_escalated("RareSite")])
.collect();
let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
assert!(findings.is_empty(), "{findings:?}");
}
#[test]
fn http_uncertain_with_should_escalate_reason_counts_too() {
let scans: Vec<Vec<CheckOutcome>> = (0..4)
.map(|_| vec![outcome_http_uncertain_cf("WalledOff")])
.collect();
let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].site, "WalledOff");
assert_eq!(
findings[0].dominant_reason,
EvidenceKind::CloudflareChallenge
);
}
#[test]
fn dominant_reason_picks_higher_count() {
let mut scans: Vec<Vec<CheckOutcome>> = Vec::new();
for _ in 0..4 {
scans.push(vec![outcome_http_uncertain_cf("Mixed")]);
}
scans.push(vec![outcome_http_uncertain_rl("Mixed")]);
let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
assert_eq!(findings.len(), 1);
assert_eq!(
findings[0].dominant_reason,
EvidenceKind::CloudflareChallenge
);
}
#[test]
fn findings_sorted_by_ratio_then_name() {
let mut scans: Vec<Vec<CheckOutcome>> = Vec::new();
for _ in 0..5 {
scans.push(vec![
outcome_browser_escalated("Aardvark"),
outcome_browser_escalated("Beaver"),
]);
}
scans[3] = vec![
outcome_browser_escalated("Aardvark"),
outcome_http_found("Beaver"),
];
scans[4] = vec![
outcome_browser_escalated("Aardvark"),
outcome_http_found("Beaver"),
];
let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.5, 3);
assert_eq!(findings.len(), 2);
assert_eq!(findings[0].site, "Aardvark");
assert!(findings[0].ratio() > findings[1].ratio());
}
#[test]
fn empty_input_returns_empty() {
let findings: Vec<EscalationFinding> =
analyze_escalation_history(std::iter::empty::<&[CheckOutcome]>(), 0.5, 1);
assert!(findings.is_empty());
}
}