use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ResponseFingerprint {
pub status: u16,
pub content_type: String,
pub length_bucket: LengthBucket,
pub title: Option<String>,
pub has_block_markers: bool,
pub body_hash: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum LengthBucket {
Empty,
Tiny,
Small,
Medium,
Large,
VeryLarge,
Huge,
Massive,
}
#[derive(Debug, Clone)]
pub struct FingerprintDrift {
pub score: f64,
pub changed: Vec<&'static str>,
pub likely_blocked: bool,
}
#[must_use]
pub fn fingerprint(status: u16, headers: &[(String, String)], body: &[u8]) -> ResponseFingerprint {
let content_type = extract_content_type(headers);
let body_str = String::from_utf8_lossy(&body[..body.len().min(4096)]);
let title = extract_title(&body_str);
let has_block_markers = check_block_markers(&body_str);
let body_hash = hash_body(&body[..body.len().min(4096)]);
let length_bucket = categorize_length(body.len());
ResponseFingerprint {
status,
content_type,
length_bucket,
title,
has_block_markers,
body_hash,
}
}
#[must_use]
pub fn compare(baseline: &ResponseFingerprint, sample: &ResponseFingerprint) -> FingerprintDrift {
let mut score: f64 = 0.0;
let mut changed = Vec::new();
if baseline.status != sample.status {
score += 0.3;
changed.push("status_code");
}
if baseline.content_type != sample.content_type {
score += 0.15;
changed.push("content_type");
}
if baseline.length_bucket != sample.length_bucket {
score += 0.2;
changed.push("body_length");
}
if baseline.title != sample.title {
score += 0.15;
changed.push("title_tag");
}
if baseline.body_hash != sample.body_hash {
score += 0.1;
changed.push("body_content");
}
if !baseline.has_block_markers && sample.has_block_markers {
score += 0.3;
changed.push("block_markers_appeared");
}
let likely_blocked =
sample.has_block_markers || (score >= 0.4 && sample.status >= 400) || (score >= 0.6);
FingerprintDrift {
score: score.min(1.0),
changed,
likely_blocked,
}
}
fn extract_content_type(headers: &[(String, String)]) -> String {
headers
.iter()
.find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
.map(|(_, v)| {
v.split(';')
.next()
.unwrap_or("")
.trim()
.to_ascii_lowercase()
})
.unwrap_or_default()
}
fn extract_title(body: &str) -> Option<String> {
let re = regex::Regex::new(r"<title\b[^>]*>(.*?)</title>").ok()?;
let caps = re.captures(body)?;
let title = caps.get(1)?.as_str();
Some(title.trim().to_ascii_lowercase())
}
fn check_block_markers(body: &str) -> bool {
use aho_corasick::AhoCorasick;
use once_cell::sync::Lazy;
static MARKER_AC: Lazy<AhoCorasick> = Lazy::new(|| {
AhoCorasick::builder()
.ascii_case_insensitive(true)
.build([
"access denied",
"request blocked",
"forbidden",
"web application firewall",
"security violation",
"attack detected",
"malicious request",
"your request has been blocked",
"this request was blocked",
"suspicious activity",
"waf",
"challenge-platform",
"just a moment",
"checking your browser",
"ray id",
"incident id",
"reference #",
"error code:",
"attention required",
])
.expect("block markers are valid AC patterns")
});
MARKER_AC.is_match(body)
}
fn hash_body(body: &[u8]) -> u64 {
let mut hasher = DefaultHasher::new();
body.hash(&mut hasher);
hasher.finish()
}
fn categorize_length(length: usize) -> LengthBucket {
match length {
0 => LengthBucket::Empty,
1..=100 => LengthBucket::Tiny,
101..=1_000 => LengthBucket::Small,
1_001..=5_000 => LengthBucket::Medium,
5_001..=20_000 => LengthBucket::Large,
20_001..=100_000 => LengthBucket::VeryLarge,
100_001..=1_000_000 => LengthBucket::Huge,
_ => LengthBucket::Massive,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn html_response(status: u16, body: &str) -> ResponseFingerprint {
let headers = vec![(
"content-type".to_string(),
"text/html; charset=utf-8".to_string(),
)];
fingerprint(status, &headers, body.as_bytes())
}
#[test]
fn identical_responses_zero_drift() {
let a = html_response(200, "<html><title>Hello</title><body>OK</body></html>");
let b = html_response(200, "<html><title>Hello</title><body>OK</body></html>");
let drift = compare(&a, &b);
assert!((drift.score - 0.0).abs() < f64::EPSILON);
assert!(drift.changed.is_empty());
assert!(!drift.likely_blocked);
}
#[test]
fn status_change_detected() {
let baseline = html_response(200, "<html><body>OK</body></html>");
let blocked = html_response(403, "<html><body>Access Denied</body></html>");
let drift = compare(&baseline, &blocked);
assert!(drift.score >= 0.3);
assert!(drift.changed.contains(&"status_code"));
assert!(drift.likely_blocked);
}
#[test]
fn silent_block_detected() {
let baseline = html_response(
200,
"<html><title>My App</title><body>Search results for: test</body></html>",
);
let silently_blocked = html_response(
200,
"<html><title>Access Denied</title><body>Your request has been blocked by our web application firewall.</body></html>",
);
let drift = compare(&baseline, &silently_blocked);
assert!(
drift.score >= 0.5,
"drift score should be high: {}",
drift.score
);
assert!(drift.likely_blocked, "should detect as blocked");
}
#[test]
fn cloudflare_challenge_detected() {
let baseline = html_response(200, "<html><body>OK</body></html>");
let challenge = html_response(
503,
"<html><title>Just a moment...</title><body>Checking your browser before accessing. challenge-platform</body></html>",
);
let drift = compare(&baseline, &challenge);
assert!(drift.likely_blocked);
assert!(drift.changed.contains(&"block_markers_appeared"));
}
#[test]
fn length_bucket_classification() {
assert_eq!(categorize_length(0), LengthBucket::Empty);
assert_eq!(categorize_length(50), LengthBucket::Tiny);
assert_eq!(categorize_length(500), LengthBucket::Small);
assert_eq!(categorize_length(3000), LengthBucket::Medium);
assert_eq!(categorize_length(10000), LengthBucket::Large);
assert_eq!(categorize_length(50000), LengthBucket::VeryLarge);
assert_eq!(categorize_length(500_000), LengthBucket::Huge);
assert_eq!(categorize_length(2_000_000), LengthBucket::Massive);
}
#[test]
fn title_extraction() {
let fp = html_response(
200,
"<html><title>My Application</title><body>Hello</body></html>",
);
assert_eq!(fp.title.as_deref(), Some("my application"));
}
}