1use crate::check::{CheckOutcome, MatchKind, UncertainReason};
22use crate::escalation::TransportTier;
23use crate::site::ProtectionKind;
24use std::collections::HashMap;
25
26pub const DEFAULT_THRESHOLD_RATIO: f32 = 0.6;
30
31pub const DEFAULT_MIN_SCANS: u32 = 3;
35
36#[derive(Debug, Clone, PartialEq, Eq)]
40pub struct EscalationFinding {
41 pub site: String,
43 pub scans_seen: u32,
45 pub escalation_evidence: u32,
51 pub dominant_reason: EvidenceKind,
54 pub suggested_protection: ProtectionKind,
57}
58
59impl EscalationFinding {
60 #[must_use]
63 pub fn ratio(&self) -> f32 {
64 if self.scans_seen == 0 {
65 0.0
66 } else {
67 f32::from(u16::try_from(self.escalation_evidence).unwrap_or(u16::MAX))
68 / f32::from(u16::try_from(self.scans_seen).unwrap_or(u16::MAX))
69 }
70 }
71}
72
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
79#[non_exhaustive]
80pub enum EvidenceKind {
81 CloudflareChallenge,
84 RateLimited,
87}
88
89impl EvidenceKind {
90 #[must_use]
96 pub const fn suggested_protection(self) -> ProtectionKind {
97 match self {
98 Self::CloudflareChallenge | Self::RateLimited => ProtectionKind::Cloudflare,
99 }
100 }
101}
102
103#[derive(Default, Debug)]
105struct SiteTally {
106 scans_seen: u32,
107 cloudflare_evidence: u32,
108 ratelimit_evidence: u32,
109}
110
111impl SiteTally {
112 fn total_evidence(&self) -> u32 {
113 self.cloudflare_evidence + self.ratelimit_evidence
114 }
115
116 fn dominant(&self) -> Option<EvidenceKind> {
117 if self.total_evidence() == 0 {
118 return None;
119 }
120 if self.cloudflare_evidence >= self.ratelimit_evidence {
121 Some(EvidenceKind::CloudflareChallenge)
122 } else {
123 Some(EvidenceKind::RateLimited)
124 }
125 }
126}
127
128fn classify(outcome: &CheckOutcome) -> Option<EvidenceKind> {
137 if matches!(outcome.transport, Some(TransportTier::Browser)) && outcome.escalations >= 1 {
138 return Some(EvidenceKind::CloudflareChallenge);
144 }
145 if outcome.kind == MatchKind::Uncertain {
146 match outcome.reason.as_ref()? {
147 UncertainReason::CloudflareChallenge => return Some(EvidenceKind::CloudflareChallenge),
148 UncertainReason::RateLimited => return Some(EvidenceKind::RateLimited),
149 _ => {}
150 }
151 }
152 None
153}
154
155pub fn analyze_escalation_history<'a>(
164 scans: impl IntoIterator<Item = &'a [CheckOutcome]>,
165 threshold_ratio: f32,
166 min_scans: u32,
167) -> Vec<EscalationFinding> {
168 let mut tallies: HashMap<String, SiteTally> = HashMap::new();
169 for outcomes in scans {
170 for outcome in outcomes {
171 let entry = tallies.entry(outcome.site.clone()).or_default();
172 entry.scans_seen += 1;
173 match classify(outcome) {
174 Some(EvidenceKind::CloudflareChallenge) => entry.cloudflare_evidence += 1,
175 Some(EvidenceKind::RateLimited) => entry.ratelimit_evidence += 1,
176 None => {}
177 }
178 }
179 }
180
181 let mut findings: Vec<EscalationFinding> = tallies
182 .into_iter()
183 .filter_map(|(site, tally)| {
184 if tally.scans_seen < min_scans {
185 return None;
186 }
187 let dominant = tally.dominant()?;
188 let evidence = tally.total_evidence();
189 let ratio = f32::from(u16::try_from(evidence).unwrap_or(u16::MAX))
190 / f32::from(u16::try_from(tally.scans_seen).unwrap_or(u16::MAX));
191 if ratio < threshold_ratio {
192 return None;
193 }
194 Some(EscalationFinding {
195 site,
196 scans_seen: tally.scans_seen,
197 escalation_evidence: evidence,
198 dominant_reason: dominant,
199 suggested_protection: dominant.suggested_protection(),
200 })
201 })
202 .collect();
203 findings.sort_by(|a, b| {
204 b.ratio()
205 .partial_cmp(&a.ratio())
206 .unwrap_or(std::cmp::Ordering::Equal)
207 .then_with(|| a.site.cmp(&b.site))
208 });
209 findings
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215 use crate::check::CheckOutcome;
216
217 fn outcome(site: &str, kind: MatchKind, reason: Option<UncertainReason>) -> CheckOutcome {
218 CheckOutcome {
219 site: site.to_owned(),
220 url: format!("https://{site}.example/foo"),
221 kind,
222 reason,
223 elapsed_ms: 100,
224 evidence: Vec::new(),
225 enrichment: std::collections::BTreeMap::new(),
226 transport: None,
227 escalations: 0,
228 }
229 }
230
231 fn outcome_browser_escalated(site: &str) -> CheckOutcome {
232 CheckOutcome {
233 site: site.to_owned(),
234 url: format!("https://{site}.example/foo"),
235 kind: MatchKind::Found,
236 reason: None,
237 elapsed_ms: 200,
238 evidence: Vec::new(),
239 enrichment: std::collections::BTreeMap::new(),
240 transport: Some(TransportTier::Browser),
241 escalations: 1,
242 }
243 }
244
245 fn outcome_http_uncertain_cf(site: &str) -> CheckOutcome {
246 outcome(
247 site,
248 MatchKind::Uncertain,
249 Some(UncertainReason::CloudflareChallenge),
250 )
251 }
252
253 fn outcome_http_uncertain_rl(site: &str) -> CheckOutcome {
254 outcome(
255 site,
256 MatchKind::Uncertain,
257 Some(UncertainReason::RateLimited),
258 )
259 }
260
261 fn outcome_http_found(site: &str) -> CheckOutcome {
262 outcome(site, MatchKind::Found, None)
263 }
264
265 #[test]
266 fn consistent_escalation_produces_finding() {
267 let scans: Vec<Vec<CheckOutcome>> = (0..5)
268 .map(|_| vec![outcome_browser_escalated("CDNed")])
269 .collect();
270 let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
271 let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
272 assert_eq!(findings.len(), 1);
273 assert_eq!(findings[0].site, "CDNed");
274 assert_eq!(findings[0].scans_seen, 5);
275 assert_eq!(findings[0].escalation_evidence, 5);
276 assert!((findings[0].ratio() - 1.0).abs() < f32::EPSILON);
277 assert_eq!(findings[0].suggested_protection, ProtectionKind::Cloudflare);
278 }
279
280 #[test]
281 fn http_only_site_does_not_get_flagged() {
282 let scans: Vec<Vec<CheckOutcome>> = (0..10)
284 .map(|_| vec![outcome_http_found("GitHub")])
285 .collect();
286 let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
287 let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
288 assert!(findings.is_empty(), "{findings:?}");
289 }
290
291 #[test]
292 fn intermittent_escalation_below_threshold_skipped() {
293 let mut scans: Vec<Vec<CheckOutcome>> = Vec::new();
295 for _ in 0..2 {
296 scans.push(vec![outcome_browser_escalated("FlakyEdge")]);
297 }
298 for _ in 0..8 {
299 scans.push(vec![outcome_http_found("FlakyEdge")]);
300 }
301 let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
302 let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
303 assert!(findings.is_empty(), "{findings:?}");
304 }
305
306 #[test]
307 fn too_few_scans_skipped_even_at_full_ratio() {
308 let scans: Vec<Vec<CheckOutcome>> = (0..2)
310 .map(|_| vec![outcome_browser_escalated("RareSite")])
311 .collect();
312 let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
313 let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
314 assert!(findings.is_empty(), "{findings:?}");
315 }
316
317 #[test]
318 fn http_uncertain_with_should_escalate_reason_counts_too() {
319 let scans: Vec<Vec<CheckOutcome>> = (0..4)
323 .map(|_| vec![outcome_http_uncertain_cf("WalledOff")])
324 .collect();
325 let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
326 let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
327 assert_eq!(findings.len(), 1);
328 assert_eq!(findings[0].site, "WalledOff");
329 assert_eq!(
330 findings[0].dominant_reason,
331 EvidenceKind::CloudflareChallenge
332 );
333 }
334
335 #[test]
336 fn dominant_reason_picks_higher_count() {
337 let mut scans: Vec<Vec<CheckOutcome>> = Vec::new();
339 for _ in 0..4 {
340 scans.push(vec![outcome_http_uncertain_cf("Mixed")]);
341 }
342 scans.push(vec![outcome_http_uncertain_rl("Mixed")]);
343 let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
344 let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.6, 3);
345 assert_eq!(findings.len(), 1);
346 assert_eq!(
347 findings[0].dominant_reason,
348 EvidenceKind::CloudflareChallenge
349 );
350 }
351
352 #[test]
353 fn findings_sorted_by_ratio_then_name() {
354 let mut scans: Vec<Vec<CheckOutcome>> = Vec::new();
356 for _ in 0..5 {
357 scans.push(vec![
358 outcome_browser_escalated("Aardvark"),
359 outcome_browser_escalated("Beaver"),
360 ]);
361 }
362 scans[3] = vec![
364 outcome_browser_escalated("Aardvark"),
365 outcome_http_found("Beaver"),
366 ];
367 scans[4] = vec![
368 outcome_browser_escalated("Aardvark"),
369 outcome_http_found("Beaver"),
370 ];
371
372 let scan_slices: Vec<&[CheckOutcome]> = scans.iter().map(Vec::as_slice).collect();
373 let findings = analyze_escalation_history(scan_slices.iter().copied(), 0.5, 3);
374 assert_eq!(findings.len(), 2);
375 assert_eq!(findings[0].site, "Aardvark");
376 assert!(findings[0].ratio() > findings[1].ratio());
377 }
378
379 #[test]
380 fn empty_input_returns_empty() {
381 let findings: Vec<EscalationFinding> =
382 analyze_escalation_history(std::iter::empty::<&[CheckOutcome]>(), 0.5, 1);
383 assert!(findings.is_empty());
384 }
385}