use std::collections::{HashMap, HashSet};
use std::time::Instant;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum HeaderAnomaly {
MissingRequired {
header: String,
},
UnexpectedHeader {
header: String,
},
AnomalousValue {
header: String,
value: String,
reason: String,
},
EntropyAnomaly {
header: String,
entropy: f64,
expected_mean: f64,
},
LengthAnomaly {
header: String,
length: usize,
expected_range: (usize, usize),
},
}
impl HeaderAnomaly {
pub fn header(&self) -> &str {
match self {
Self::MissingRequired { header } => header,
Self::UnexpectedHeader { header } => header,
Self::AnomalousValue { header, .. } => header,
Self::EntropyAnomaly { header, .. } => header,
Self::LengthAnomaly { header, .. } => header,
}
}
pub fn base_risk(&self) -> u16 {
match self {
Self::MissingRequired { .. } => 10,
Self::UnexpectedHeader { .. } => 5,
Self::AnomalousValue { .. } => 15,
Self::EntropyAnomaly { .. } => 20,
Self::LengthAnomaly { .. } => 10,
}
}
pub fn description(&self) -> String {
match self {
Self::MissingRequired { header } => {
format!("Required header '{}' is missing", header)
}
Self::UnexpectedHeader { header } => {
format!("Unexpected header '{}' not seen in baseline", header)
}
Self::AnomalousValue { header, reason, .. } => {
format!("Header '{}' has anomalous value: {}", header, reason)
}
Self::EntropyAnomaly {
header,
entropy,
expected_mean,
} => {
format!(
"Header '{}' entropy {:.2} deviates from expected {:.2}",
header, entropy, expected_mean
)
}
Self::LengthAnomaly {
header,
length,
expected_range,
} => {
format!(
"Header '{}' length {} outside expected range [{}, {}]",
header, length, expected_range.0, expected_range.1
)
}
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct HeaderAnomalyResult {
pub anomalies: Vec<HeaderAnomaly>,
pub risk_contribution: u16,
}
impl HeaderAnomalyResult {
#[inline]
pub fn none() -> Self {
Self {
anomalies: Vec::new(),
risk_contribution: 0,
}
}
#[inline]
pub fn new() -> Self {
Self {
anomalies: Vec::with_capacity(4),
risk_contribution: 0,
}
}
#[inline]
pub fn add(&mut self, anomaly: HeaderAnomaly) {
self.risk_contribution = self
.risk_contribution
.saturating_add(anomaly.base_risk())
.min(50);
self.anomalies.push(anomaly);
}
#[inline]
pub fn has_anomalies(&self) -> bool {
!self.anomalies.is_empty()
}
#[inline]
pub fn count(&self) -> usize {
self.anomalies.len()
}
pub fn merge(&mut self, other: HeaderAnomalyResult) {
for anomaly in other.anomalies {
self.add(anomaly);
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValueStats {
pub min_length: usize,
pub max_length: usize,
pub entropy_mean: f64,
pub entropy_variance: f64,
entropy_m2: f64,
pub total_samples: u64,
}
impl ValueStats {
pub fn new() -> Self {
Self {
min_length: usize::MAX,
max_length: 0,
entropy_mean: 0.0,
entropy_variance: 0.0,
entropy_m2: 0.0,
total_samples: 0,
}
}
#[inline]
pub fn update(&mut self, length: usize, entropy: f64) {
self.min_length = self.min_length.min(length);
self.max_length = self.max_length.max(length);
self.total_samples += 1;
let delta = entropy - self.entropy_mean;
self.entropy_mean += delta / self.total_samples as f64;
let delta2 = entropy - self.entropy_mean;
self.entropy_m2 += delta * delta2;
if self.total_samples >= 2 {
self.entropy_variance = self.entropy_m2 / self.total_samples as f64;
}
}
#[inline]
pub fn entropy_stddev(&self) -> f64 {
self.entropy_variance.sqrt()
}
#[inline]
pub fn is_mature(&self, min_samples: u64) -> bool {
self.total_samples >= min_samples
}
#[inline]
pub fn is_length_in_range(&self, length: usize, tolerance_factor: f64) -> bool {
if self.total_samples == 0 {
return true; }
let range = (self.max_length - self.min_length) as f64;
let tolerance = (range * tolerance_factor).max(10.0) as usize;
length >= self.min_length.saturating_sub(tolerance)
&& length <= self.max_length.saturating_add(tolerance)
}
#[inline]
pub fn entropy_z_score(&self, entropy: f64) -> f64 {
if self.entropy_variance <= 0.001 || self.total_samples < 5 {
return 0.0;
}
(entropy - self.entropy_mean) / self.entropy_stddev()
}
}
impl Default for ValueStats {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct HeaderBaseline {
pub endpoint: String,
pub required_headers: HashSet<String>,
pub optional_headers: HashSet<String>,
pub header_value_stats: HashMap<String, ValueStats>,
pub sample_count: u64,
pub last_updated: Instant,
}
impl HeaderBaseline {
pub fn new(endpoint: String) -> Self {
Self {
endpoint,
required_headers: HashSet::with_capacity(16),
optional_headers: HashSet::with_capacity(16),
header_value_stats: HashMap::with_capacity(16),
sample_count: 0,
last_updated: Instant::now(),
}
}
#[inline]
pub fn is_mature(&self, min_samples: u64) -> bool {
self.sample_count >= min_samples
}
#[inline]
pub fn get_stats(&self, header: &str) -> Option<&ValueStats> {
self.header_value_stats.get(&header.to_lowercase())
}
#[inline]
pub fn is_required(&self, header: &str) -> bool {
self.required_headers.contains(&header.to_lowercase())
}
#[inline]
pub fn is_known(&self, header: &str) -> bool {
let h = header.to_lowercase();
self.required_headers.contains(&h) || self.optional_headers.contains(&h)
}
pub fn header_frequency(&self, header: &str) -> f64 {
if self.sample_count == 0 {
return 0.0;
}
self.header_value_stats
.get(&header.to_lowercase())
.map(|stats| stats.total_samples as f64 / self.sample_count as f64)
.unwrap_or(0.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_header_anomaly_header() {
let missing = HeaderAnomaly::MissingRequired {
header: "Authorization".to_string(),
};
assert_eq!(missing.header(), "Authorization");
let unexpected = HeaderAnomaly::UnexpectedHeader {
header: "X-Evil".to_string(),
};
assert_eq!(unexpected.header(), "X-Evil");
}
#[test]
fn test_header_anomaly_base_risk() {
assert_eq!(
HeaderAnomaly::MissingRequired {
header: "Auth".to_string()
}
.base_risk(),
10
);
assert_eq!(
HeaderAnomaly::UnexpectedHeader {
header: "X".to_string()
}
.base_risk(),
5
);
assert_eq!(
HeaderAnomaly::AnomalousValue {
header: "X".to_string(),
value: "bad".to_string(),
reason: "test".to_string()
}
.base_risk(),
15
);
assert_eq!(
HeaderAnomaly::EntropyAnomaly {
header: "X".to_string(),
entropy: 7.5,
expected_mean: 4.0
}
.base_risk(),
20
);
assert_eq!(
HeaderAnomaly::LengthAnomaly {
header: "X".to_string(),
length: 1000,
expected_range: (10, 50)
}
.base_risk(),
10
);
}
#[test]
fn test_header_anomaly_description() {
let anomaly = HeaderAnomaly::MissingRequired {
header: "Content-Type".to_string(),
};
let desc = anomaly.description();
assert!(desc.contains("Content-Type"));
assert!(desc.contains("missing"));
}
#[test]
fn test_anomaly_result_empty() {
let result = HeaderAnomalyResult::none();
assert!(!result.has_anomalies());
assert_eq!(result.count(), 0);
assert_eq!(result.risk_contribution, 0);
}
#[test]
fn test_anomaly_result_add() {
let mut result = HeaderAnomalyResult::new();
result.add(HeaderAnomaly::MissingRequired {
header: "Auth".to_string(),
});
assert_eq!(result.count(), 1);
assert_eq!(result.risk_contribution, 10);
result.add(HeaderAnomaly::EntropyAnomaly {
header: "Token".to_string(),
entropy: 7.5,
expected_mean: 4.0,
});
assert_eq!(result.count(), 2);
assert_eq!(result.risk_contribution, 30);
}
#[test]
fn test_anomaly_result_risk_capped() {
let mut result = HeaderAnomalyResult::new();
for i in 0..10 {
result.add(HeaderAnomaly::EntropyAnomaly {
header: format!("Header-{}", i),
entropy: 7.5,
expected_mean: 4.0,
});
}
assert_eq!(result.risk_contribution, 50);
}
#[test]
fn test_anomaly_result_merge() {
let mut result1 = HeaderAnomalyResult::new();
result1.add(HeaderAnomaly::MissingRequired {
header: "A".to_string(),
});
let mut result2 = HeaderAnomalyResult::new();
result2.add(HeaderAnomaly::UnexpectedHeader {
header: "B".to_string(),
});
result1.merge(result2);
assert_eq!(result1.count(), 2);
assert_eq!(result1.risk_contribution, 15); }
#[test]
fn test_value_stats_new() {
let stats = ValueStats::new();
assert_eq!(stats.min_length, usize::MAX);
assert_eq!(stats.max_length, 0);
assert_eq!(stats.entropy_mean, 0.0);
assert_eq!(stats.total_samples, 0);
}
#[test]
fn test_value_stats_update() {
let mut stats = ValueStats::new();
stats.update(10, 4.0);
assert_eq!(stats.min_length, 10);
assert_eq!(stats.max_length, 10);
assert_eq!(stats.total_samples, 1);
assert!((stats.entropy_mean - 4.0).abs() < 0.001);
stats.update(20, 5.0);
assert_eq!(stats.min_length, 10);
assert_eq!(stats.max_length, 20);
assert_eq!(stats.total_samples, 2);
assert!((stats.entropy_mean - 4.5).abs() < 0.001);
}
#[test]
fn test_value_stats_is_mature() {
let mut stats = ValueStats::new();
assert!(!stats.is_mature(10));
for _ in 0..10 {
stats.update(10, 4.0);
}
assert!(stats.is_mature(10));
assert!(!stats.is_mature(20));
}
#[test]
fn test_value_stats_is_length_in_range() {
let mut stats = ValueStats::new();
assert!(stats.is_length_in_range(100, 1.5));
for len in [10, 20, 30, 40, 50] {
stats.update(len, 4.0);
}
assert!(stats.is_length_in_range(30, 1.5));
assert!(stats.is_length_in_range(5, 1.5));
assert!(stats.is_length_in_range(60, 1.5));
}
#[test]
fn test_value_stats_entropy_z_score() {
let mut stats = ValueStats::new();
assert_eq!(stats.entropy_z_score(7.0), 0.0);
for entropy in [3.5, 4.0, 4.5, 4.0, 4.0] {
stats.update(10, entropy);
}
let z = stats.entropy_z_score(stats.entropy_mean);
assert!(z.abs() < 0.1);
let z = stats.entropy_z_score(stats.entropy_mean + stats.entropy_stddev());
assert!(z > 0.9 && z < 1.1);
}
#[test]
fn test_header_baseline_new() {
let baseline = HeaderBaseline::new("/api/users".to_string());
assert_eq!(baseline.endpoint, "/api/users");
assert_eq!(baseline.sample_count, 0);
assert!(baseline.required_headers.is_empty());
assert!(baseline.optional_headers.is_empty());
}
#[test]
fn test_header_baseline_is_mature() {
let mut baseline = HeaderBaseline::new("/test".to_string());
assert!(!baseline.is_mature(10));
baseline.sample_count = 10;
assert!(baseline.is_mature(10));
}
#[test]
fn test_header_baseline_is_known() {
let mut baseline = HeaderBaseline::new("/test".to_string());
baseline.required_headers.insert("Content-Type".to_string());
baseline.optional_headers.insert("X-Request-ID".to_string());
assert!(baseline.is_required("Content-Type"));
assert!(!baseline.is_required("X-Request-ID"));
assert!(baseline.is_known("Content-Type"));
assert!(baseline.is_known("X-Request-ID"));
assert!(!baseline.is_known("X-Unknown"));
}
#[test]
fn test_header_baseline_header_frequency() {
let mut baseline = HeaderBaseline::new("/test".to_string());
baseline.sample_count = 100;
let mut stats = ValueStats::new();
for _ in 0..95 {
stats.update(10, 4.0);
}
baseline
.header_value_stats
.insert("Content-Type".to_string(), stats);
let freq = baseline.header_frequency("Content-Type");
assert!((freq - 0.95).abs() < 0.01);
assert_eq!(baseline.header_frequency("Unknown"), 0.0);
}
}