use std::collections::HashSet;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{SystemTime, UNIX_EPOCH};
use dashmap::DashMap;
use serde::{Deserialize, Serialize};
use crate::profiler::endpoint_profile::EndpointProfile;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProfileStoreConfig {
pub max_profiles: usize,
pub min_samples_for_detection: u32,
pub idle_timeout_ms: u64,
pub enable_segment_detection: bool,
pub dynamic_segment_threshold: usize,
}
impl Default for ProfileStoreConfig {
fn default() -> Self {
Self {
max_profiles: 10_000,
min_samples_for_detection: 100,
idle_timeout_ms: 24 * 60 * 60 * 1000, enable_segment_detection: true,
dynamic_segment_threshold: 10,
}
}
}
#[derive(Debug, Default)]
pub struct SegmentCardinality {
segments: DashMap<usize, HashSet<String>>,
max_values: usize,
}
impl SegmentCardinality {
pub fn new(max_values: usize) -> Self {
Self {
segments: DashMap::new(),
max_values,
}
}
pub fn record(&self, position: usize, value: &str, threshold: usize) -> bool {
let mut entry = self.segments.entry(position).or_insert_with(HashSet::new);
let values = entry.value_mut();
if values.len() < self.max_values {
values.insert(value.to_string());
}
values.len() >= threshold
}
pub fn is_dynamic(&self, position: usize, threshold: usize) -> bool {
self.segments
.get(&position)
.map(|v| v.len() >= threshold)
.unwrap_or(false)
}
pub fn cardinality(&self, position: usize) -> usize {
self.segments.get(&position).map(|v| v.len()).unwrap_or(0)
}
pub fn clear(&self) {
self.segments.clear();
}
}
pub struct ProfileStore {
profiles: DashMap<String, EndpointProfile>,
config: ProfileStoreConfig,
segment_cardinality: SegmentCardinality,
total_created: AtomicU64,
total_evicted: AtomicU64,
last_eviction_ms: AtomicU64,
}
impl Default for ProfileStore {
fn default() -> Self {
Self::new(ProfileStoreConfig::default())
}
}
impl ProfileStore {
pub fn new(config: ProfileStoreConfig) -> Self {
let max_segment_values = config.dynamic_segment_threshold * 2;
Self {
profiles: DashMap::with_capacity(config.max_profiles / 2),
config,
segment_cardinality: SegmentCardinality::new(max_segment_values),
total_created: AtomicU64::new(0),
total_evicted: AtomicU64::new(0),
last_eviction_ms: AtomicU64::new(0),
}
}
pub fn config(&self) -> &ProfileStoreConfig {
&self.config
}
pub fn get_or_create(
&self,
path: &str,
) -> dashmap::mapref::one::RefMut<'_, String, EndpointProfile> {
let template = if self.config.enable_segment_detection {
self.normalize_path(path)
} else {
path.to_string()
};
let now_ms = now_ms();
self.maybe_evict(now_ms);
self.profiles.entry(template.clone()).or_insert_with(|| {
self.total_created.fetch_add(1, Ordering::Relaxed);
EndpointProfile::new(template, now_ms)
})
}
pub fn get(
&self,
template: &str,
) -> Option<dashmap::mapref::one::Ref<'_, String, EndpointProfile>> {
self.profiles.get(template)
}
pub fn contains(&self, template: &str) -> bool {
self.profiles.contains_key(template)
}
pub fn len(&self) -> usize {
self.profiles.len()
}
pub fn is_empty(&self) -> bool {
self.profiles.is_empty()
}
fn normalize_path(&self, path: &str) -> String {
let segments: Vec<&str> = path.split('/').collect();
let threshold = self.config.dynamic_segment_threshold;
let normalized: Vec<String> = segments
.iter()
.enumerate()
.map(|(pos, segment)| {
if segment.is_empty() {
return String::new();
}
let looks_dynamic = Self::looks_like_id(segment);
let is_high_cardinality = self.segment_cardinality.record(pos, segment, threshold);
if looks_dynamic || is_high_cardinality {
"{id}".to_string()
} else {
segment.to_string()
}
})
.collect();
normalized.join("/")
}
fn looks_like_id(segment: &str) -> bool {
if segment.is_empty() {
return false;
}
if segment.chars().all(|c| c.is_ascii_digit()) {
return !segment.is_empty() && segment.len() <= 20; }
if segment.len() == 36 && segment.chars().all(|c| c.is_ascii_hexdigit() || c == '-') {
return true;
}
if segment.len() >= 16 && segment.chars().all(|c| c.is_ascii_hexdigit()) {
return true;
}
if segment.len() == 24 && segment.chars().all(|c| c.is_ascii_hexdigit()) {
return true;
}
false
}
fn maybe_evict(&self, now_ms: u64) {
let last = self.last_eviction_ms.load(Ordering::Relaxed);
if now_ms.saturating_sub(last) < 1000 {
return;
}
if self.profiles.len() < self.config.max_profiles {
return;
}
self.last_eviction_ms.store(now_ms, Ordering::Relaxed);
self.evict_stale(now_ms);
}
fn evict_stale(&self, now_ms: u64) {
let idle_timeout = self.config.idle_timeout_ms;
let cutoff = now_ms.saturating_sub(idle_timeout);
let stale_keys: Vec<String> = self
.profiles
.iter()
.filter(|entry| entry.value().last_updated_ms < cutoff)
.map(|entry| entry.key().clone())
.take(100) .collect();
for key in stale_keys {
if self.profiles.remove(&key).is_some() {
self.total_evicted.fetch_add(1, Ordering::Relaxed);
}
}
}
pub fn clear(&self) {
self.profiles.clear();
self.segment_cardinality.clear();
}
pub fn metrics(&self) -> ProfileStoreMetrics {
ProfileStoreMetrics {
current_profiles: self.profiles.len(),
max_profiles: self.config.max_profiles,
total_created: self.total_created.load(Ordering::Relaxed),
total_evicted: self.total_evicted.load(Ordering::Relaxed),
}
}
pub fn list_templates(&self) -> Vec<String> {
self.profiles.iter().map(|e| e.key().clone()).collect()
}
pub fn get_profiles(&self) -> Vec<EndpointProfile> {
self.profiles.iter().map(|e| e.value().clone()).collect()
}
pub fn mature_profiles(&self) -> Vec<String> {
let min = self.config.min_samples_for_detection;
self.profiles
.iter()
.filter(|e| e.value().is_mature(min))
.map(|e| e.key().clone())
.collect()
}
}
#[derive(Debug, Clone, Serialize)]
pub struct ProfileStoreMetrics {
pub current_profiles: usize,
pub max_profiles: usize,
pub total_created: u64,
pub total_evicted: u64,
}
#[inline]
fn now_ms() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis() as u64)
.unwrap_or(0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_segment_cardinality_basic() {
let sc = SegmentCardinality::new(100);
for i in 0..5 {
sc.record(0, &format!("value_{}", i), 10);
}
assert_eq!(sc.cardinality(0), 5);
assert!(!sc.is_dynamic(0, 10));
}
#[test]
fn test_segment_cardinality_threshold() {
let sc = SegmentCardinality::new(100);
for i in 0..10 {
let is_dynamic = sc.record(0, &format!("value_{}", i), 10);
if i < 9 {
assert!(!is_dynamic);
} else {
assert!(is_dynamic);
}
}
assert!(sc.is_dynamic(0, 10));
}
#[test]
fn test_profile_store_basic() {
let store = ProfileStore::default();
{
let mut profile = store.get_or_create("/api/users");
profile.update(100, &[("name", "John")], Some("application/json"), now_ms());
}
assert_eq!(store.len(), 1);
assert!(store.contains("/api/users"));
}
#[test]
fn test_profile_store_path_normalization() {
let config = ProfileStoreConfig {
enable_segment_detection: true,
dynamic_segment_threshold: 2,
..Default::default()
};
let store = ProfileStore::new(config);
store.get_or_create("/api/users/123/orders");
store.get_or_create("/api/users/456/orders");
assert_eq!(store.len(), 1);
let templates = store.list_templates();
assert!(templates[0].contains("{id}"));
}
#[test]
fn test_looks_like_id() {
assert!(ProfileStore::looks_like_id("123"));
assert!(ProfileStore::looks_like_id("12345678901234567890"));
assert!(!ProfileStore::looks_like_id("123456789012345678901"));
assert!(ProfileStore::looks_like_id(
"550e8400-e29b-41d4-a716-446655440000"
));
assert!(ProfileStore::looks_like_id("abcdef1234567890"));
assert!(!ProfileStore::looks_like_id("abcdef12345"));
assert!(ProfileStore::looks_like_id("507f1f77bcf86cd799439011"));
assert!(!ProfileStore::looks_like_id("users"));
assert!(!ProfileStore::looks_like_id("api"));
assert!(!ProfileStore::looks_like_id(""));
}
#[test]
fn test_profile_store_without_normalization() {
let config = ProfileStoreConfig {
enable_segment_detection: false,
..Default::default()
};
let store = ProfileStore::new(config);
store.get_or_create("/api/users/123");
store.get_or_create("/api/users/456");
assert_eq!(store.len(), 2);
}
#[test]
fn test_profile_store_metrics() {
let store = ProfileStore::default();
for i in 0..5 {
store.get_or_create(&format!("/api/endpoint_{}", i));
}
let metrics = store.metrics();
assert_eq!(metrics.current_profiles, 5);
assert_eq!(metrics.total_created, 5);
assert_eq!(metrics.total_evicted, 0);
}
#[test]
fn test_profile_store_clear() {
let store = ProfileStore::default();
for i in 0..5 {
store.get_or_create(&format!("/api/endpoint_{}", i));
}
assert_eq!(store.len(), 5);
store.clear();
assert!(store.is_empty());
}
#[test]
fn test_profile_store_mature_profiles() {
let config = ProfileStoreConfig {
min_samples_for_detection: 10,
enable_segment_detection: false,
..Default::default()
};
let store = ProfileStore::new(config);
{
let mut p1 = store.get_or_create("/api/mature");
for _ in 0..15 {
p1.update(100, &[], None, now_ms());
}
}
{
let mut p2 = store.get_or_create("/api/immature");
for _ in 0..5 {
p2.update(100, &[], None, now_ms());
}
}
let mature = store.mature_profiles();
assert_eq!(mature.len(), 1);
assert!(mature.contains(&"/api/mature".to_string()));
}
#[test]
fn test_segment_cardinality_clear() {
let sc = SegmentCardinality::new(100);
for i in 0..10 {
sc.record(0, &format!("value_{}", i), 20);
}
assert_eq!(sc.cardinality(0), 10);
sc.clear();
assert_eq!(sc.cardinality(0), 0);
}
#[test]
fn test_segment_cardinality_max_values() {
let sc = SegmentCardinality::new(5);
for i in 0..10 {
sc.record(0, &format!("value_{}", i), 100);
}
assert_eq!(sc.cardinality(0), 5);
}
}