use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use crate::profiler::distribution::Distribution;
use crate::profiler::rate_tracker::RateTracker;
const MAX_CONTENT_TYPES: usize = 20;
const MAX_PARAMS: usize = 50;
const DEFAULT_MAX_TYPE_COUNTS: usize = 10;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParamStats {
pub count: u32,
pub length_dist: Distribution,
pub numeric_dist: Distribution,
pub type_counts: HashMap<String, u32>,
}
impl Default for ParamStats {
fn default() -> Self {
Self::new()
}
}
impl ParamStats {
pub fn new() -> Self {
Self {
count: 0,
length_dist: Distribution::new(),
numeric_dist: Distribution::new(),
type_counts: HashMap::with_capacity(4), }
}
pub fn update(&mut self, value: &str) {
self.update_with_limit(value, DEFAULT_MAX_TYPE_COUNTS);
}
pub fn update_with_limit(&mut self, value: &str, max_type_counts: usize) {
self.count += 1;
self.length_dist.update(value.len() as f64);
let mut increment_type = |type_name: &str| {
if self.type_counts.contains_key(type_name) || self.type_counts.len() < max_type_counts
{
*self.type_counts.entry(type_name.to_string()).or_insert(0) += 1;
}
};
if let Ok(num) = value.parse::<f64>() {
self.numeric_dist.update(num);
increment_type("numeric");
} else {
increment_type("string");
}
if value.contains('@') && value.contains('.') {
increment_type("email");
}
if value.len() == 36 && value.chars().filter(|&c| c == '-').count() == 4 {
increment_type("uuid");
}
}
}
pub fn redact_value(value: &str) -> String {
let len = value.len();
if len <= 4 {
return "*".repeat(len);
}
let visible_chars = 2;
let start: String = value.chars().take(visible_chars).collect();
let end: String = value.chars().skip(len - visible_chars).collect();
let mask_len = len.saturating_sub(visible_chars * 2);
format!("{}{}{}", start, "*".repeat(mask_len.max(1)), end)
}
pub fn is_likely_pii(value: &str) -> bool {
if value.contains('@') && value.contains('.') {
return true;
}
if value.len() == 36 && value.chars().filter(|&c| c == '-').count() == 4 {
return true;
}
if value.len() > 20
&& value
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
return true;
}
false
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EndpointProfile {
pub template: String,
pub payload_size: Distribution,
pub response_size: Distribution,
pub expected_params: HashMap<String, ParamStats>,
pub content_types: HashMap<String, u32>,
pub response_content_types: HashMap<String, u32>,
pub status_codes: HashMap<u16, u32>,
pub request_rate: RateTracker,
pub endpoint_risk: f32,
pub sample_count: u32,
pub first_seen_ms: u64,
pub last_updated_ms: u64,
}
impl EndpointProfile {
pub fn new(template: String, now_ms: u64) -> Self {
Self {
template,
payload_size: Distribution::new(),
response_size: Distribution::new(),
expected_params: HashMap::with_capacity(16),
content_types: HashMap::with_capacity(4),
response_content_types: HashMap::with_capacity(4),
status_codes: HashMap::with_capacity(8),
request_rate: RateTracker::new(),
endpoint_risk: 0.0,
sample_count: 0,
first_seen_ms: now_ms,
last_updated_ms: now_ms,
}
}
pub fn update(
&mut self,
payload_size: usize,
params: &[(&str, &str)], content_type: Option<&str>,
now_ms: u64,
) {
self.payload_size.update(payload_size as f64);
self.request_rate.record(now_ms);
for &(param_name, param_value) in params {
if let Some(stats) = self.expected_params.get_mut(param_name) {
stats.update(param_value);
} else if self.expected_params.len() < MAX_PARAMS {
let mut stats = ParamStats::new();
stats.update(param_value);
self.expected_params.insert(param_name.to_string(), stats);
}
}
if self.expected_params.len() > MAX_PARAMS {
Self::evict_least_frequent(&mut self.expected_params, MAX_PARAMS);
}
if let Some(ct) = content_type {
if self.content_types.len() < MAX_CONTENT_TYPES || self.content_types.contains_key(ct) {
*self.content_types.entry(ct.to_string()).or_insert(0) += 1;
}
}
self.sample_count += 1;
self.last_updated_ms = now_ms;
}
pub fn update_response(
&mut self,
response_size: usize,
status_code: u16,
content_type: Option<&str>,
now_ms: u64,
) {
self.response_size.update(response_size as f64);
self.record_status(status_code);
if let Some(ct) = content_type {
if self.response_content_types.len() < MAX_CONTENT_TYPES
|| self.response_content_types.contains_key(ct)
{
*self
.response_content_types
.entry(ct.to_string())
.or_insert(0) += 1;
}
}
self.last_updated_ms = now_ms;
}
pub fn record_status(&mut self, status_code: u16) {
*self.status_codes.entry(status_code).or_insert(0) += 1;
}
pub fn dominant_content_type(&self) -> Option<&str> {
self.content_types
.iter()
.max_by_key(|(_, count)| *count)
.map(|(ct, _)| ct.as_str())
}
pub fn dominant_response_content_type(&self) -> Option<&str> {
self.response_content_types
.iter()
.max_by_key(|(_, count)| *count)
.map(|(ct, _)| ct.as_str())
}
pub fn param_frequency(&self, param: &str) -> f64 {
if self.sample_count == 0 {
return 0.0;
}
self.expected_params
.get(param)
.map(|stats| stats.count as f64 / self.sample_count as f64)
.unwrap_or(0.0)
}
pub fn is_expected_param(&self, param: &str, threshold: f64) -> bool {
self.param_frequency(param) >= threshold
}
pub fn status_frequency(&self, status_code: u16) -> f64 {
let total: u32 = self.status_codes.values().sum();
if total == 0 {
return 0.0;
}
self.status_codes
.get(&status_code)
.map(|&count| count as f64 / total as f64)
.unwrap_or(0.0)
}
pub fn error_rate(&self) -> f64 {
let total: u32 = self.status_codes.values().sum();
if total == 0 {
return 0.0;
}
let errors: u32 = self
.status_codes
.iter()
.filter(|(&code, _)| code >= 400)
.map(|(_, &count)| count)
.sum();
errors as f64 / total as f64
}
pub fn baseline_rate(&self, now_ms: u64) -> f64 {
let lifetime_ms = now_ms.saturating_sub(self.first_seen_ms).max(1);
let lifetime_minutes = lifetime_ms as f64 / 60_000.0;
self.sample_count as f64 / lifetime_minutes.max(1.0)
}
fn evict_least_frequent(map: &mut HashMap<String, ParamStats>, target_size: usize) {
if map.len() <= target_size {
return;
}
let mut frequencies: Vec<u32> = map.values().map(|s| s.count).collect();
frequencies.sort_unstable();
let to_remove = map.len() - target_size;
let min_keep = frequencies.get(to_remove).copied().unwrap_or(0);
map.retain(|_, stats| stats.count >= min_keep);
}
pub fn is_mature(&self, min_samples: u32) -> bool {
self.sample_count >= min_samples
}
pub fn age_ms(&self, now_ms: u64) -> u64 {
now_ms.saturating_sub(self.first_seen_ms)
}
pub fn idle_ms(&self, now_ms: u64) -> u64 {
now_ms.saturating_sub(self.last_updated_ms)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_endpoint_profile_new() {
let profile = EndpointProfile::new("/api/users".to_string(), 1000);
assert_eq!(profile.template, "/api/users");
assert_eq!(profile.sample_count, 0);
assert_eq!(profile.first_seen_ms, 1000);
assert_eq!(profile.last_updated_ms, 1000);
}
#[test]
fn test_endpoint_profile_update() {
let mut profile = EndpointProfile::new("/api/users".to_string(), 1000);
profile.update(
100,
&[("name", "alice"), ("email", "a@example.com")],
Some("application/json"),
2000,
);
assert_eq!(profile.sample_count, 1);
assert_eq!(profile.last_updated_ms, 2000);
assert!(profile.expected_params.contains_key("name"));
assert!(profile.expected_params.contains_key("email"));
assert!(profile.content_types.contains_key("application/json"));
}
#[test]
fn test_endpoint_profile_param_frequency() {
let mut profile = EndpointProfile::new("/api/users".to_string(), 1000);
for i in 0..10 {
let params = if i % 2 == 0 {
vec![("name", "val"), ("email", "val")]
} else {
vec![("name", "val")]
};
profile.update(100, ¶ms, None, 1000 + i * 100);
}
assert!((profile.param_frequency("name") - 1.0).abs() < 0.01);
assert!((profile.param_frequency("email") - 0.5).abs() < 0.01);
assert_eq!(profile.param_frequency("unknown"), 0.0);
}
#[test]
fn test_endpoint_profile_is_expected_param() {
let mut profile = EndpointProfile::new("/api/users".to_string(), 1000);
for i in 0..10 {
let params = if i == 0 {
vec![("optional", "val")]
} else if i < 3 {
vec![("name", "val"), ("optional", "val")]
} else {
vec![("name", "val")]
};
profile.update(100, ¶ms, None, 1000 + i * 100);
}
assert!(profile.is_expected_param("name", 0.8)); assert!(!profile.is_expected_param("optional", 0.8)); }
#[test]
fn test_endpoint_profile_content_type_bounds() {
let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
for i in 0..MAX_CONTENT_TYPES {
profile.update(
100,
&[],
Some(&format!("application/type-{}", i)),
1000 + i as u64,
);
}
assert_eq!(profile.content_types.len(), MAX_CONTENT_TYPES);
for i in 0..10 {
profile.update(
100,
&[],
Some(&format!("application/extra-{}", i)),
2000 + i as u64,
);
}
assert_eq!(profile.content_types.len(), MAX_CONTENT_TYPES);
let initial_count = *profile.content_types.get("application/type-0").unwrap();
profile.update(100, &[], Some("application/type-0"), 3000);
let updated_count = *profile.content_types.get("application/type-0").unwrap();
assert_eq!(updated_count, initial_count + 1);
}
#[test]
fn test_endpoint_profile_dominant_content_type() {
let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
for _ in 0..5 {
profile.update(100, &[], Some("application/json"), 1000);
}
for _ in 0..2 {
profile.update(100, &[], Some("application/xml"), 1000);
}
assert_eq!(profile.dominant_content_type(), Some("application/json"));
}
#[test]
fn test_endpoint_profile_status_codes() {
let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
for _ in 0..8 {
profile.record_status(200);
}
for _ in 0..2 {
profile.record_status(500);
}
assert!((profile.status_frequency(200) - 0.8).abs() < 0.01);
assert!((profile.status_frequency(500) - 0.2).abs() < 0.01);
assert!((profile.error_rate() - 0.2).abs() < 0.01);
}
#[test]
fn test_endpoint_profile_baseline_rate() {
let mut profile = EndpointProfile::new("/api/test".to_string(), 0);
for i in 0..60 {
profile.update(100, &[], None, i * 1000);
}
let rate = profile.baseline_rate(60_000);
assert!((rate - 60.0).abs() < 1.0);
}
#[test]
fn test_endpoint_profile_is_mature() {
let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
assert!(!profile.is_mature(10));
for i in 0..10 {
profile.update(100, &[], None, 1000 + i * 100);
}
assert!(profile.is_mature(10));
assert!(!profile.is_mature(20));
}
#[test]
fn test_endpoint_profile_age_and_idle() {
let profile = EndpointProfile::new("/api/test".to_string(), 1000);
assert_eq!(profile.age_ms(2000), 1000);
assert_eq!(profile.idle_ms(2000), 1000);
}
#[test]
fn test_evict_least_frequent() {
let mut map: HashMap<String, ParamStats> = HashMap::new();
let mut s1 = ParamStats::new();
s1.count = 10;
let mut s2 = ParamStats::new();
s2.count = 5;
let mut s3 = ParamStats::new();
s3.count = 1;
let mut s4 = ParamStats::new();
s4.count = 8;
map.insert("a".to_string(), s1);
map.insert("b".to_string(), s2);
map.insert("c".to_string(), s3);
map.insert("d".to_string(), s4);
EndpointProfile::evict_least_frequent(&mut map, 2);
assert!(map.len() <= 2);
assert!(map.contains_key("a"));
}
#[test]
fn test_param_stats_type_count_limit() {
let mut stats = ParamStats::new();
for _ in 0..100 {
stats.update("12345"); stats.update("hello"); stats.update("test@example.com"); stats.update("123e4567-e89b-12d3-a456-426614174000"); }
assert!(stats.type_counts.len() <= DEFAULT_MAX_TYPE_COUNTS);
}
#[test]
fn test_param_stats_custom_type_limit() {
let mut stats = ParamStats::new();
for _ in 0..10 {
stats.update_with_limit("12345", 2); stats.update_with_limit("hello", 2); stats.update_with_limit("test@example.com", 2); }
assert!(stats.type_counts.len() <= 2);
}
#[test]
fn test_redact_value() {
let email = "user@example.com";
let redacted = redact_value(email);
assert!(redacted.starts_with("us"));
assert!(redacted.ends_with("om"));
assert!(redacted.len() == email.len());
let short = "ab";
let redacted_short = redact_value(short);
assert_eq!(redacted_short, "**");
let medium = "hello";
let redacted_medium = redact_value(medium);
assert!(redacted_medium.starts_with("he"));
assert!(redacted_medium.ends_with("lo"));
}
#[test]
fn test_is_likely_pii() {
assert!(is_likely_pii("user@example.com"));
assert!(is_likely_pii("admin@company.org"));
assert!(!is_likely_pii("not-email-format"));
assert!(is_likely_pii("123e4567-e89b-12d3-a456-426614174000"));
assert!(!is_likely_pii("not-a-uuid"));
assert!(is_likely_pii("abcdefghijklmnopqrstuvwxyz12345"));
assert!(!is_likely_pii("short"));
}
#[test]
fn test_endpoint_profile_response_update() {
let mut profile = EndpointProfile::new("/api/users".to_string(), 1000);
profile.update_response(5000, 200, Some("application/json"), 2000);
assert_eq!(profile.last_updated_ms, 2000);
assert!((profile.response_size.mean() - 5000.0).abs() < 0.01);
assert!((profile.status_frequency(200) - 1.0).abs() < 0.01);
assert_eq!(
profile.dominant_response_content_type(),
Some("application/json")
);
}
#[test]
fn test_endpoint_profile_response_content_type_bounds() {
let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
for i in 0..MAX_CONTENT_TYPES {
profile.update_response(
100,
200,
Some(&format!("application/type-{}", i)),
1000 + i as u64,
);
}
assert_eq!(profile.response_content_types.len(), MAX_CONTENT_TYPES);
profile.update_response(100, 200, Some("application/extra"), 2000);
assert_eq!(profile.response_content_types.len(), MAX_CONTENT_TYPES);
}
#[test]
fn test_param_eviction_under_load() {
let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
for i in 0..MAX_PARAMS {
let name = format!("p{}", i);
for _ in 0..10 {
profile.update(100, &[(&name, "val")], None, 1000);
}
}
for i in MAX_PARAMS..(MAX_PARAMS * 2) {
let name = format!("p{}", i);
profile.update(100, &[(&name, "val")], None, 1000);
}
assert!(profile.expected_params.len() <= MAX_PARAMS);
assert!(profile.expected_params.contains_key("p0"));
assert!(profile.expected_params.contains_key("p49"));
}
#[test]
fn test_baseline_rate_zero_lifetime() {
let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
profile.update(100, &[], None, 1000);
let rate = profile.baseline_rate(1000);
assert!(rate > 0.0);
assert!(rate.is_finite());
}
#[test]
fn test_param_stats_no_type_counts_but_count_positive() {
let mut stats = ParamStats::new();
stats.update_with_limit("val", 0);
assert_eq!(stats.count, 1);
assert_eq!(stats.type_counts.len(), 0);
assert_eq!(stats.length_dist.count(), 1);
}
}