1use std::collections::HashMap;
14
15use serde::{Deserialize, Serialize};
16
17use crate::profiler::distribution::Distribution;
18use crate::profiler::rate_tracker::RateTracker;
19
20const MAX_CONTENT_TYPES: usize = 20;
27
28const MAX_PARAMS: usize = 50;
30
31const DEFAULT_MAX_TYPE_COUNTS: usize = 10;
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct ParamStats {
42 pub count: u32,
44
45 pub length_dist: Distribution,
47
48 pub numeric_dist: Distribution,
50
51 pub type_counts: HashMap<String, u32>,
53}
54
55impl Default for ParamStats {
56 fn default() -> Self {
57 Self::new()
58 }
59}
60
61impl ParamStats {
62 pub fn new() -> Self {
63 Self {
64 count: 0,
65 length_dist: Distribution::new(),
66 numeric_dist: Distribution::new(),
67 type_counts: HashMap::with_capacity(4), }
69 }
70
71 pub fn update(&mut self, value: &str) {
76 self.update_with_limit(value, DEFAULT_MAX_TYPE_COUNTS);
77 }
78
79 pub fn update_with_limit(&mut self, value: &str, max_type_counts: usize) {
81 self.count += 1;
82 self.length_dist.update(value.len() as f64);
83
84 let mut increment_type = |type_name: &str| {
86 if self.type_counts.contains_key(type_name) || self.type_counts.len() < max_type_counts
88 {
89 *self.type_counts.entry(type_name.to_string()).or_insert(0) += 1;
90 }
91 };
92
93 if let Ok(num) = value.parse::<f64>() {
95 self.numeric_dist.update(num);
96 increment_type("numeric");
97 } else {
98 increment_type("string");
99 }
100
101 if value.contains('@') && value.contains('.') {
103 increment_type("email");
104 }
105 if value.len() == 36 && value.chars().filter(|&c| c == '-').count() == 4 {
106 increment_type("uuid");
107 }
108 }
109}
110
111pub fn redact_value(value: &str) -> String {
120 let len = value.len();
121 if len <= 4 {
122 return "*".repeat(len);
124 }
125
126 let visible_chars = 2;
128 let start: String = value.chars().take(visible_chars).collect();
129 let end: String = value.chars().skip(len - visible_chars).collect();
130 let mask_len = len.saturating_sub(visible_chars * 2);
131
132 format!("{}{}{}", start, "*".repeat(mask_len.max(1)), end)
133}
134
135pub fn is_likely_pii(value: &str) -> bool {
137 if value.contains('@') && value.contains('.') {
139 return true;
140 }
141 if value.len() == 36 && value.chars().filter(|&c| c == '-').count() == 4 {
143 return true;
144 }
145 if value.len() > 20
147 && value
148 .chars()
149 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
150 {
151 return true;
152 }
153 false
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct EndpointProfile {
161 pub template: String,
163
164 pub payload_size: Distribution,
166
167 pub response_size: Distribution,
169
170 pub expected_params: HashMap<String, ParamStats>,
173
174 pub content_types: HashMap<String, u32>,
176
177 pub response_content_types: HashMap<String, u32>,
179
180 pub status_codes: HashMap<u16, u32>,
182
183 pub request_rate: RateTracker,
185
186 pub endpoint_risk: f32,
189
190 pub sample_count: u32,
192
193 pub first_seen_ms: u64,
195
196 pub last_updated_ms: u64,
198}
199
200impl EndpointProfile {
201 pub fn new(template: String, now_ms: u64) -> Self {
203 Self {
204 template,
205 payload_size: Distribution::new(),
206 response_size: Distribution::new(),
207 expected_params: HashMap::with_capacity(16),
208 content_types: HashMap::with_capacity(4),
209 response_content_types: HashMap::with_capacity(4),
210 status_codes: HashMap::with_capacity(8),
211 request_rate: RateTracker::new(),
212 endpoint_risk: 0.0,
213 sample_count: 0,
214 first_seen_ms: now_ms,
215 last_updated_ms: now_ms,
216 }
217 }
218
219 pub fn update(
223 &mut self,
224 payload_size: usize,
225 params: &[(&str, &str)], content_type: Option<&str>,
227 now_ms: u64,
228 ) {
229 self.payload_size.update(payload_size as f64);
231
232 self.request_rate.record(now_ms);
234
235 for &(param_name, param_value) in params {
237 if let Some(stats) = self.expected_params.get_mut(param_name) {
238 stats.update(param_value);
239 } else if self.expected_params.len() < MAX_PARAMS {
240 let mut stats = ParamStats::new();
241 stats.update(param_value);
242 self.expected_params.insert(param_name.to_string(), stats);
243 }
244 }
245
246 if self.expected_params.len() > MAX_PARAMS {
248 Self::evict_least_frequent(&mut self.expected_params, MAX_PARAMS);
249 }
250
251 if let Some(ct) = content_type {
253 if self.content_types.len() < MAX_CONTENT_TYPES || self.content_types.contains_key(ct) {
255 *self.content_types.entry(ct.to_string()).or_insert(0) += 1;
256 }
257 }
259
260 self.sample_count += 1;
261 self.last_updated_ms = now_ms;
262 }
263
264 pub fn update_response(
266 &mut self,
267 response_size: usize,
268 status_code: u16,
269 content_type: Option<&str>,
270 now_ms: u64,
271 ) {
272 self.response_size.update(response_size as f64);
273 self.record_status(status_code);
274
275 if let Some(ct) = content_type {
276 if self.response_content_types.len() < MAX_CONTENT_TYPES
277 || self.response_content_types.contains_key(ct)
278 {
279 *self
280 .response_content_types
281 .entry(ct.to_string())
282 .or_insert(0) += 1;
283 }
284 }
285
286 self.last_updated_ms = now_ms;
287 }
288
289 pub fn record_status(&mut self, status_code: u16) {
291 *self.status_codes.entry(status_code).or_insert(0) += 1;
292 }
293
294 pub fn dominant_content_type(&self) -> Option<&str> {
296 self.content_types
297 .iter()
298 .max_by_key(|(_, count)| *count)
299 .map(|(ct, _)| ct.as_str())
300 }
301
302 pub fn dominant_response_content_type(&self) -> Option<&str> {
304 self.response_content_types
305 .iter()
306 .max_by_key(|(_, count)| *count)
307 .map(|(ct, _)| ct.as_str())
308 }
309
310 pub fn param_frequency(&self, param: &str) -> f64 {
312 if self.sample_count == 0 {
313 return 0.0;
314 }
315 self.expected_params
316 .get(param)
317 .map(|stats| stats.count as f64 / self.sample_count as f64)
318 .unwrap_or(0.0)
319 }
320
321 pub fn is_expected_param(&self, param: &str, threshold: f64) -> bool {
323 self.param_frequency(param) >= threshold
324 }
325
326 pub fn status_frequency(&self, status_code: u16) -> f64 {
328 let total: u32 = self.status_codes.values().sum();
329 if total == 0 {
330 return 0.0;
331 }
332 self.status_codes
333 .get(&status_code)
334 .map(|&count| count as f64 / total as f64)
335 .unwrap_or(0.0)
336 }
337
338 pub fn error_rate(&self) -> f64 {
340 let total: u32 = self.status_codes.values().sum();
341 if total == 0 {
342 return 0.0;
343 }
344 let errors: u32 = self
345 .status_codes
346 .iter()
347 .filter(|(&code, _)| code >= 400)
348 .map(|(_, &count)| count)
349 .sum();
350 errors as f64 / total as f64
351 }
352
353 pub fn baseline_rate(&self, now_ms: u64) -> f64 {
355 let lifetime_ms = now_ms.saturating_sub(self.first_seen_ms).max(1);
356 let lifetime_minutes = lifetime_ms as f64 / 60_000.0;
357 self.sample_count as f64 / lifetime_minutes.max(1.0)
358 }
359
360 fn evict_least_frequent(map: &mut HashMap<String, ParamStats>, target_size: usize) {
362 if map.len() <= target_size {
363 return;
364 }
365
366 let mut frequencies: Vec<u32> = map.values().map(|s| s.count).collect();
368 frequencies.sort_unstable();
369 let to_remove = map.len() - target_size;
370 let min_keep = frequencies.get(to_remove).copied().unwrap_or(0);
371
372 map.retain(|_, stats| stats.count >= min_keep);
374 }
375
376 pub fn is_mature(&self, min_samples: u32) -> bool {
378 self.sample_count >= min_samples
379 }
380
381 pub fn age_ms(&self, now_ms: u64) -> u64 {
383 now_ms.saturating_sub(self.first_seen_ms)
384 }
385
386 pub fn idle_ms(&self, now_ms: u64) -> u64 {
388 now_ms.saturating_sub(self.last_updated_ms)
389 }
390}
391
392#[cfg(test)]
397mod tests {
398 use super::*;
399
400 #[test]
401 fn test_endpoint_profile_new() {
402 let profile = EndpointProfile::new("/api/users".to_string(), 1000);
403 assert_eq!(profile.template, "/api/users");
404 assert_eq!(profile.sample_count, 0);
405 assert_eq!(profile.first_seen_ms, 1000);
406 assert_eq!(profile.last_updated_ms, 1000);
407 }
408
409 #[test]
410 fn test_endpoint_profile_update() {
411 let mut profile = EndpointProfile::new("/api/users".to_string(), 1000);
412
413 profile.update(
414 100,
415 &[("name", "alice"), ("email", "a@example.com")],
416 Some("application/json"),
417 2000,
418 );
419
420 assert_eq!(profile.sample_count, 1);
421 assert_eq!(profile.last_updated_ms, 2000);
422 assert!(profile.expected_params.contains_key("name"));
423 assert!(profile.expected_params.contains_key("email"));
424 assert!(profile.content_types.contains_key("application/json"));
425 }
426
427 #[test]
428 fn test_endpoint_profile_param_frequency() {
429 let mut profile = EndpointProfile::new("/api/users".to_string(), 1000);
430
431 for i in 0..10 {
433 let params = if i % 2 == 0 {
434 vec![("name", "val"), ("email", "val")]
435 } else {
436 vec![("name", "val")]
437 };
438 profile.update(100, ¶ms, None, 1000 + i * 100);
439 }
440
441 assert!((profile.param_frequency("name") - 1.0).abs() < 0.01);
442 assert!((profile.param_frequency("email") - 0.5).abs() < 0.01);
443 assert_eq!(profile.param_frequency("unknown"), 0.0);
444 }
445
446 #[test]
447 fn test_endpoint_profile_is_expected_param() {
448 let mut profile = EndpointProfile::new("/api/users".to_string(), 1000);
449
450 for i in 0..10 {
452 let params = if i == 0 {
453 vec![("optional", "val")]
454 } else if i < 3 {
455 vec![("name", "val"), ("optional", "val")]
456 } else {
457 vec![("name", "val")]
458 };
459 profile.update(100, ¶ms, None, 1000 + i * 100);
460 }
461
462 assert!(profile.is_expected_param("name", 0.8)); assert!(!profile.is_expected_param("optional", 0.8)); }
465
466 #[test]
467 fn test_endpoint_profile_content_type_bounds() {
468 let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
469
470 for i in 0..MAX_CONTENT_TYPES {
472 profile.update(
473 100,
474 &[],
475 Some(&format!("application/type-{}", i)),
476 1000 + i as u64,
477 );
478 }
479 assert_eq!(profile.content_types.len(), MAX_CONTENT_TYPES);
480
481 for i in 0..10 {
483 profile.update(
484 100,
485 &[],
486 Some(&format!("application/extra-{}", i)),
487 2000 + i as u64,
488 );
489 }
490 assert_eq!(profile.content_types.len(), MAX_CONTENT_TYPES);
492
493 let initial_count = *profile.content_types.get("application/type-0").unwrap();
495 profile.update(100, &[], Some("application/type-0"), 3000);
496 let updated_count = *profile.content_types.get("application/type-0").unwrap();
497 assert_eq!(updated_count, initial_count + 1);
498 }
499
500 #[test]
501 fn test_endpoint_profile_dominant_content_type() {
502 let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
503
504 for _ in 0..5 {
506 profile.update(100, &[], Some("application/json"), 1000);
507 }
508 for _ in 0..2 {
509 profile.update(100, &[], Some("application/xml"), 1000);
510 }
511
512 assert_eq!(profile.dominant_content_type(), Some("application/json"));
513 }
514
515 #[test]
516 fn test_endpoint_profile_status_codes() {
517 let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
518
519 for _ in 0..8 {
521 profile.record_status(200);
522 }
523 for _ in 0..2 {
524 profile.record_status(500);
525 }
526
527 assert!((profile.status_frequency(200) - 0.8).abs() < 0.01);
528 assert!((profile.status_frequency(500) - 0.2).abs() < 0.01);
529 assert!((profile.error_rate() - 0.2).abs() < 0.01);
530 }
531
532 #[test]
533 fn test_endpoint_profile_baseline_rate() {
534 let mut profile = EndpointProfile::new("/api/test".to_string(), 0);
535
536 for i in 0..60 {
538 profile.update(100, &[], None, i * 1000);
539 }
540
541 let rate = profile.baseline_rate(60_000);
542 assert!((rate - 60.0).abs() < 1.0);
543 }
544
545 #[test]
546 fn test_endpoint_profile_is_mature() {
547 let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
548
549 assert!(!profile.is_mature(10));
550
551 for i in 0..10 {
552 profile.update(100, &[], None, 1000 + i * 100);
553 }
554
555 assert!(profile.is_mature(10));
556 assert!(!profile.is_mature(20));
557 }
558
559 #[test]
560 fn test_endpoint_profile_age_and_idle() {
561 let profile = EndpointProfile::new("/api/test".to_string(), 1000);
562
563 assert_eq!(profile.age_ms(2000), 1000);
564 assert_eq!(profile.idle_ms(2000), 1000);
565 }
566
567 #[test]
568 fn test_evict_least_frequent() {
569 let mut map: HashMap<String, ParamStats> = HashMap::new();
570 let mut s1 = ParamStats::new();
571 s1.count = 10;
572 let mut s2 = ParamStats::new();
573 s2.count = 5;
574 let mut s3 = ParamStats::new();
575 s3.count = 1;
576 let mut s4 = ParamStats::new();
577 s4.count = 8;
578
579 map.insert("a".to_string(), s1);
580 map.insert("b".to_string(), s2);
581 map.insert("c".to_string(), s3);
582 map.insert("d".to_string(), s4);
583
584 EndpointProfile::evict_least_frequent(&mut map, 2);
585
586 assert!(map.len() <= 2);
588 assert!(map.contains_key("a"));
589 }
590
591 #[test]
596 fn test_param_stats_type_count_limit() {
597 let mut stats = ParamStats::new();
598
599 for _ in 0..100 {
601 stats.update("12345"); stats.update("hello"); stats.update("test@example.com"); stats.update("123e4567-e89b-12d3-a456-426614174000"); }
606
607 assert!(stats.type_counts.len() <= DEFAULT_MAX_TYPE_COUNTS);
610 }
611
612 #[test]
613 fn test_param_stats_custom_type_limit() {
614 let mut stats = ParamStats::new();
615
616 for _ in 0..10 {
618 stats.update_with_limit("12345", 2); stats.update_with_limit("hello", 2); stats.update_with_limit("test@example.com", 2); }
622
623 assert!(stats.type_counts.len() <= 2);
625 }
626
627 #[test]
632 fn test_redact_value() {
633 let email = "user@example.com";
635 let redacted = redact_value(email);
636 assert!(redacted.starts_with("us"));
637 assert!(redacted.ends_with("om"));
638 assert!(redacted.len() == email.len());
639
640 let short = "ab";
642 let redacted_short = redact_value(short);
643 assert_eq!(redacted_short, "**");
644
645 let medium = "hello";
647 let redacted_medium = redact_value(medium);
648 assert!(redacted_medium.starts_with("he"));
649 assert!(redacted_medium.ends_with("lo"));
650 }
651
652 #[test]
653 fn test_is_likely_pii() {
654 assert!(is_likely_pii("user@example.com"));
656 assert!(is_likely_pii("admin@company.org"));
657 assert!(!is_likely_pii("not-email-format"));
658
659 assert!(is_likely_pii("123e4567-e89b-12d3-a456-426614174000"));
661 assert!(!is_likely_pii("not-a-uuid"));
662
663 assert!(is_likely_pii("abcdefghijklmnopqrstuvwxyz12345"));
665 assert!(!is_likely_pii("short"));
666 }
667
668 #[test]
669 fn test_endpoint_profile_response_update() {
670 let mut profile = EndpointProfile::new("/api/users".to_string(), 1000);
671
672 profile.update_response(5000, 200, Some("application/json"), 2000);
673
674 assert_eq!(profile.last_updated_ms, 2000);
675 assert!((profile.response_size.mean() - 5000.0).abs() < 0.01);
676 assert!((profile.status_frequency(200) - 1.0).abs() < 0.01);
677 assert_eq!(
678 profile.dominant_response_content_type(),
679 Some("application/json")
680 );
681 }
682
683 #[test]
684 fn test_endpoint_profile_response_content_type_bounds() {
685 let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
686
687 for i in 0..MAX_CONTENT_TYPES {
689 profile.update_response(
690 100,
691 200,
692 Some(&format!("application/type-{}", i)),
693 1000 + i as u64,
694 );
695 }
696 assert_eq!(profile.response_content_types.len(), MAX_CONTENT_TYPES);
697
698 profile.update_response(100, 200, Some("application/extra"), 2000);
700 assert_eq!(profile.response_content_types.len(), MAX_CONTENT_TYPES);
701 }
702
703 #[test]
704 fn test_param_eviction_under_load() {
705 let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
706
707 for i in 0..MAX_PARAMS {
711 let name = format!("p{}", i);
712 for _ in 0..10 {
713 profile.update(100, &[(&name, "val")], None, 1000);
714 }
715 }
716 for i in MAX_PARAMS..(MAX_PARAMS * 2) {
717 let name = format!("p{}", i);
718 profile.update(100, &[(&name, "val")], None, 1000);
719 }
720
721 assert!(profile.expected_params.len() <= MAX_PARAMS);
722 assert!(profile.expected_params.contains_key("p0"));
724 assert!(profile.expected_params.contains_key("p49"));
725 }
726
727 #[test]
728 fn test_baseline_rate_zero_lifetime() {
729 let mut profile = EndpointProfile::new("/api/test".to_string(), 1000);
730 profile.update(100, &[], None, 1000);
731
732 let rate = profile.baseline_rate(1000);
734 assert!(rate > 0.0);
737 assert!(rate.is_finite());
738 }
739
740 #[test]
741 fn test_param_stats_no_type_counts_but_count_positive() {
742 let mut stats = ParamStats::new();
743 stats.update_with_limit("val", 0);
745
746 assert_eq!(stats.count, 1);
747 assert_eq!(stats.type_counts.len(), 0);
748 assert_eq!(stats.length_dist.count(), 1);
750 }
751}