Skip to main content

vellaveto_engine/
behavioral.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4//
5// Copyright 2026 Paolo Vella
6// SPDX-License-Identifier: MPL-2.0
7
8//! Behavioral anomaly detection for agent tool call patterns (P4.1 / OWASP ASI).
9//!
10//! Tracks per-agent tool call frequency using exponential moving average (EMA)
11//! and flags deviations from established baselines. Deterministic and auditable —
12//! no ML, no randomness.
13//!
14//! # Design
15//!
16//! - **EMA**: `new_ema = alpha * current + (1 - alpha) * old_ema`
17//! - **Anomaly**: flagged when `current_count / baseline_ema >= threshold`
18//! - **Cold start**: no alerts until `min_sessions` sessions are recorded
19//! - **Bounded memory**: max agents and max tools per agent with LRU eviction
20//! - **Decay**: tools unused in a session have their EMA decayed toward zero
21
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24
25// ═══════════════════════════════════════════════════
26// CONFIGURATION
27// ═══════════════════════════════════════════════════
28
29/// Configuration for behavioral anomaly detection.
30#[derive(Debug, Clone, Serialize, Deserialize)]
31#[serde(deny_unknown_fields)]
32pub struct BehavioralConfig {
33    /// EMA smoothing factor in (0.0, 1.0]. Higher values weight recent data more.
34    /// Default: 0.2
35    #[serde(default = "default_alpha")]
36    pub alpha: f64,
37
38    /// Deviation threshold multiplier. Anomaly flagged when
39    /// `current_count / baseline_ema >= threshold`.
40    /// Default: 10.0
41    #[serde(default = "default_threshold")]
42    pub threshold: f64,
43
44    /// Minimum sessions before baselines are actionable (cold start protection).
45    /// No anomalies are flagged until both the agent and the specific tool have
46    /// at least this many recorded sessions.
47    /// Default: 3
48    #[serde(default = "default_min_sessions")]
49    pub min_sessions: u32,
50
51    /// Maximum tool entries tracked per agent. Oldest (by last active use) evicted first.
52    /// Default: 500
53    #[serde(default = "default_max_tools")]
54    pub max_tools_per_agent: usize,
55
56    /// Maximum agents tracked. Agent with fewest total sessions evicted first.
57    /// Default: 10_000
58    #[serde(default = "default_max_agents")]
59    pub max_agents: usize,
60
61    /// Absolute ceiling for tool call count per session (FIND-080).
62    /// When set, any session with a tool call count exceeding this value
63    /// triggers a Critical alert regardless of EMA baseline.
64    /// Prevents gradual ramp evasion where EMA adapts to slow increases.
65    /// Default: None (no absolute ceiling)
66    #[serde(default, skip_serializing_if = "Option::is_none")]
67    pub absolute_ceiling: Option<u64>,
68
69    /// Maximum initial EMA value for cold-start protection (FIND-081).
70    /// When set, the first observation's EMA is capped at this value,
71    /// preventing attackers from setting an artificially high baseline
72    /// by flooding calls during the first session.
73    /// Default: None (no cap)
74    #[serde(default, skip_serializing_if = "Option::is_none")]
75    pub max_initial_ema: Option<f64>,
76}
77
78fn default_alpha() -> f64 {
79    0.2
80}
81fn default_threshold() -> f64 {
82    10.0
83}
84fn default_min_sessions() -> u32 {
85    3
86}
87fn default_max_tools() -> usize {
88    500
89}
90fn default_max_agents() -> usize {
91    10_000
92}
93
94impl Default for BehavioralConfig {
95    fn default() -> Self {
96        Self {
97            alpha: default_alpha(),
98            threshold: default_threshold(),
99            min_sessions: default_min_sessions(),
100            max_tools_per_agent: default_max_tools(),
101            max_agents: default_max_agents(),
102            absolute_ceiling: None,
103            max_initial_ema: None,
104        }
105    }
106}
107
108// ═══════════════════════════════════════════════════
109// ERRORS
110// ═══════════════════════════════════════════════════
111
112/// Errors from behavioral tracking operations.
113#[derive(Debug, Clone, PartialEq)]
114pub enum BehavioralError {
115    /// Alpha must be in (0.0, 1.0].
116    InvalidAlpha(f64),
117    /// Threshold must be positive and finite.
118    InvalidThreshold(f64),
119    /// max_tools_per_agent must be > 0.
120    InvalidMaxTools,
121    /// max_agents must be > 0.
122    InvalidMaxAgents,
123    /// Snapshot contains invalid data.
124    InvalidSnapshot(String),
125}
126
127impl std::fmt::Display for BehavioralError {
128    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
129        match self {
130            BehavioralError::InvalidAlpha(a) => {
131                write!(f, "alpha must be in (0.0, 1.0], got {a}")
132            }
133            BehavioralError::InvalidThreshold(t) => {
134                write!(f, "threshold must be positive and finite, got {t}")
135            }
136            BehavioralError::InvalidMaxTools => write!(f, "max_tools_per_agent must be > 0"),
137            BehavioralError::InvalidMaxAgents => write!(f, "max_agents must be > 0"),
138            BehavioralError::InvalidSnapshot(msg) => write!(f, "invalid snapshot: {msg}"),
139        }
140    }
141}
142
143impl std::error::Error for BehavioralError {}
144
145impl BehavioralConfig {
146    /// Validate configuration values.
147    pub fn validate(&self) -> Result<(), BehavioralError> {
148        if self.alpha <= 0.0 || self.alpha > 1.0 || self.alpha.is_nan() {
149            return Err(BehavioralError::InvalidAlpha(self.alpha));
150        }
151        if self.threshold <= 0.0 || self.threshold.is_nan() || self.threshold.is_infinite() {
152            return Err(BehavioralError::InvalidThreshold(self.threshold));
153        }
154        // SECURITY (R240-ENG-3): Upper-bound max_tools_per_agent and max_agents to
155        // prevent unbounded HashMap growth from attacker-controlled configuration.
156        const MAX_BEHAVIORAL_AGENTS: usize = 1_000_000;
157        const MAX_TOOLS_PER_AGENT_LIMIT: usize = 100_000;
158        if self.max_tools_per_agent == 0 || self.max_tools_per_agent > MAX_TOOLS_PER_AGENT_LIMIT {
159            return Err(BehavioralError::InvalidMaxTools);
160        }
161        if self.max_agents == 0 || self.max_agents > MAX_BEHAVIORAL_AGENTS {
162            return Err(BehavioralError::InvalidMaxAgents);
163        }
164        // SECURITY (FIND-R113-P3): Validate max_initial_ema is positive and finite.
165        if let Some(max_ema) = self.max_initial_ema {
166            if max_ema <= 0.0 || max_ema.is_nan() || max_ema.is_infinite() {
167                return Err(BehavioralError::InvalidThreshold(max_ema));
168            }
169        }
170        Ok(())
171    }
172}
173
174// ═══════════════════════════════════════════════════
175// BASELINE & ALERT TYPES
176// ═══════════════════════════════════════════════════
177
178/// Per-tool statistics tracked across sessions.
179#[derive(Debug, Clone, Serialize, Deserialize)]
180#[serde(deny_unknown_fields)]
181pub struct ToolBaseline {
182    /// Exponential moving average of call count.
183    pub ema: f64,
184    /// Number of sessions where this tool was observed or decayed.
185    pub session_count: u32,
186    /// Monotonic counter from last *active* use (non-zero call count).
187    /// Used for eviction: tools only passively decaying have stale values.
188    pub last_active: u64,
189}
190
191/// Severity of a detected anomaly.
192#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
193pub enum AnomalySeverity {
194    /// Current count exceeds `threshold * baseline` (but below 2x threshold).
195    Warning,
196    /// Current count exceeds `2 * threshold * baseline`.
197    Critical,
198}
199
200/// An anomaly detected in tool call behavior.
201#[derive(Debug, Clone, Serialize, Deserialize)]
202#[serde(deny_unknown_fields)]
203pub struct AnomalyAlert {
204    /// Agent that triggered the anomaly.
205    pub agent_id: String,
206    /// Tool name with anomalous frequency.
207    pub tool: String,
208    /// Current session's call count for this tool.
209    pub current_count: u64,
210    /// Historical EMA baseline.
211    pub baseline_ema: f64,
212    /// Deviation ratio (`current_count / baseline_ema`).
213    pub deviation_ratio: f64,
214    /// Severity level.
215    pub severity: AnomalySeverity,
216}
217
218impl std::fmt::Display for AnomalyAlert {
219    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
220        write!(
221            f,
222            "[{:?}] Agent '{}' tool '{}': {} calls (baseline {:.1}, ratio {:.1}x)",
223            self.severity,
224            self.agent_id,
225            self.tool,
226            self.current_count,
227            self.baseline_ema,
228            self.deviation_ratio,
229        )
230    }
231}
232
233// ═══════════════════════════════════════════════════
234// SNAPSHOT (PERSISTENCE)
235// ═══════════════════════════════════════════════════
236
237/// Serializable snapshot of all behavioral tracking state.
238#[derive(Debug, Clone, Serialize, Deserialize)]
239#[serde(deny_unknown_fields)]
240pub struct BehavioralSnapshot {
241    /// Per-agent state.
242    pub agents: HashMap<String, AgentSnapshotEntry>,
243    /// Global update counter at time of snapshot.
244    pub update_counter: u64,
245}
246
247/// Snapshot entry for a single agent.
248#[derive(Debug, Clone, Serialize, Deserialize)]
249#[serde(deny_unknown_fields)]
250pub struct AgentSnapshotEntry {
251    pub tools: HashMap<String, ToolBaseline>,
252    pub total_sessions: u32,
253}
254
255// ═══════════════════════════════════════════════════
256// INTERNAL STATE
257// ═══════════════════════════════════════════════════
258
259/// Per-agent tracking state.
260#[derive(Debug, Clone)]
261struct AgentState {
262    tools: HashMap<String, ToolBaseline>,
263    total_sessions: u32,
264}
265
266// ═══════════════════════════════════════════════════
267// TRACKER
268// ═══════════════════════════════════════════════════
269
270/// Tracks per-agent tool call frequency patterns and detects anomalies.
271///
272/// Uses exponential moving average (EMA) — deterministic, auditable, no ML.
273/// Designed to detect behavioral shifts like an agent suddenly making 500
274/// `read_file` calls when the historical average is 5.
275pub struct BehavioralTracker {
276    config: BehavioralConfig,
277    agents: HashMap<String, AgentState>,
278    /// Monotonic counter incremented on each `record_session` call.
279    update_counter: u64,
280}
281
282impl BehavioralTracker {
283    /// Create a new tracker. Returns an error if the configuration is invalid.
284    pub fn new(config: BehavioralConfig) -> Result<Self, BehavioralError> {
285        config.validate()?;
286        Ok(Self {
287            config,
288            agents: HashMap::new(),
289            update_counter: 0,
290        })
291    }
292
293    /// Check current session's call counts against historical baselines.
294    ///
295    /// Returns detected anomalies (may be empty). Does **not** modify state.
296    /// Call [`record_session`](Self::record_session) after the session completes
297    /// to update baselines.
298    /// SECURITY (FIND-R139-001): Maximum number of entries in a caller-supplied
299    /// call_counts map. Prevents O(n) iteration DoS from pathologically large maps.
300    const MAX_CALL_COUNT_ENTRIES: usize = 10_000;
301
302    /// SECURITY (FIND-R139-002): Maximum length for agent_id on the live path,
303    /// matching the validation applied in `from_snapshot`.
304    const MAX_AGENT_ID_LEN: usize = 512;
305
306    /// SECURITY (FIND-R116-TE-003): Maximum length for tool keys in call_counts,
307    /// matching the canonical MAX_NAME_LEN (256) used for tool names in vellaveto-types.
308    const MAX_TOOL_KEY_LEN: usize = 256;
309
310    #[must_use = "behavioral anomaly alerts must not be discarded"]
311    pub fn check_session(
312        &self,
313        agent_id: &str,
314        call_counts: &HashMap<String, u64>,
315    ) -> Vec<AnomalyAlert> {
316        let mut alerts = Vec::new();
317
318        // SECURITY (FIND-R139-002): Validate agent_id on the live path.
319        if agent_id.len() > Self::MAX_AGENT_ID_LEN
320            || agent_id
321                .chars()
322                .any(|c| c.is_control() || vellaveto_types::is_unicode_format_char(c))
323        {
324            tracing::warn!(
325                len = agent_id.len(),
326                "check_session: rejecting invalid agent_id"
327            );
328            return alerts;
329        }
330
331        // SECURITY (R239-ENG-1): Normalize agent_id through NFKC + lowercase + homoglyph
332        // mapping so that homoglyph variants map to the same baseline.
333        let agent_id = crate::normalize::normalize_full(agent_id);
334
335        // SECURITY (FIND-R139-001): Cap call_counts iteration.
336        if call_counts.len() > Self::MAX_CALL_COUNT_ENTRIES {
337            tracing::warn!(
338                count = call_counts.len(),
339                max = Self::MAX_CALL_COUNT_ENTRIES,
340                "check_session: call_counts exceeds cap, skipping"
341            );
342            return alerts;
343        }
344
345        let agent = match self.agents.get(&agent_id) {
346            Some(a) => a,
347            None => return alerts, // No history for this agent
348        };
349
350        // Cold start: don't flag until the agent has enough sessions
351        if agent.total_sessions < self.config.min_sessions {
352            tracing::debug!(
353                agent_id = %agent_id,
354                sessions = %agent.total_sessions,
355                min_required = %self.config.min_sessions,
356                "Agent in cold-start phase, anomaly detection deferred"
357            );
358            return alerts;
359        }
360
361        for (tool, &count) in call_counts {
362            if count == 0 {
363                continue;
364            }
365
366            // SECURITY (FIND-R116-TE-003): Validate tool keys for length and
367            // control/format characters, matching the validation in from_snapshot().
368            if tool.len() > Self::MAX_TOOL_KEY_LEN {
369                tracing::warn!("check_session: skipping oversized tool key");
370                continue;
371            }
372            if tool
373                .chars()
374                .any(|c| c.is_control() || vellaveto_types::is_unicode_format_char(c))
375            {
376                tracing::warn!("check_session: skipping tool key with control/format chars");
377                continue;
378            }
379
380            // SECURITY (FIND-080): Check absolute ceiling before EMA-based detection.
381            // This catches gradual ramp attacks where EMA adapts to slow increases.
382            if let Some(ceiling) = self.config.absolute_ceiling {
383                if count > ceiling {
384                    // SECURITY (FIND-R114-001): Guard against ceiling=0 producing
385                    // Infinity in deviation_ratio, which bypasses threshold comparisons.
386                    // When ceiling is 0, any non-zero count is maximally anomalous.
387                    let deviation_ratio = if ceiling == 0 {
388                        f64::MAX
389                    } else {
390                        count as f64 / ceiling as f64
391                    };
392                    let alert = AnomalyAlert {
393                        severity: AnomalySeverity::Critical,
394                        tool: tool.clone(),
395                        current_count: count,
396                        baseline_ema: self
397                            .agents
398                            .get(&agent_id)
399                            .and_then(|a| a.tools.get(tool))
400                            .map_or(0.0, |b| b.ema),
401                        deviation_ratio,
402                        agent_id: agent_id.clone(),
403                    };
404
405                    metrics::counter!(
406                        "vellaveto_anomaly_detections_total",
407                        "severity" => "critical"
408                    )
409                    .increment(1);
410
411                    tracing::warn!(
412                        agent_id = %agent_id,
413                        tool = %tool,
414                        current_count = %count,
415                        ceiling = %ceiling,
416                        "CRITICAL: Tool call count exceeds absolute ceiling"
417                    );
418
419                    alerts.push(alert);
420                    continue; // Already flagged — skip EMA check for this tool
421                }
422            }
423
424            let baseline = match agent.tools.get(tool) {
425                Some(b) => b,
426                None => continue, // New tool — no baseline yet
427            };
428
429            // Per-tool cold start: need enough observations for this specific tool
430            if baseline.session_count < self.config.min_sessions {
431                tracing::trace!(
432                    agent_id = %agent_id,
433                    tool = %tool,
434                    tool_sessions = %baseline.session_count,
435                    min_required = %self.config.min_sessions,
436                    "Tool in cold-start phase, skipping anomaly check"
437                );
438                continue;
439            }
440
441            // Compute deviation ratio. If baseline EMA is zero (edge case:
442            // tool was recorded but EMA decayed to exactly 0.0), treat any
443            // non-zero count as anomalous with a high synthetic ratio.
444            let ratio = if baseline.ema <= f64::EPSILON {
445                count as f64 // effectively infinite deviation
446            } else {
447                count as f64 / baseline.ema
448            };
449
450            if ratio >= self.config.threshold {
451                let severity = if ratio >= self.config.threshold * 2.0 {
452                    AnomalySeverity::Critical
453                } else {
454                    AnomalySeverity::Warning
455                };
456
457                let alert = AnomalyAlert {
458                    agent_id: agent_id.clone(),
459                    tool: tool.clone(),
460                    current_count: count,
461                    baseline_ema: baseline.ema,
462                    deviation_ratio: ratio,
463                    severity,
464                };
465
466                // IMPROVEMENT_PLAN 1.2: Record anomaly detection metrics
467                let severity_label = match severity {
468                    AnomalySeverity::Critical => "critical",
469                    AnomalySeverity::Warning => "warning",
470                };
471                metrics::counter!(
472                    "vellaveto_anomaly_detections_total",
473                    "severity" => severity_label.to_string()
474                )
475                .increment(1);
476
477                // Log anomaly detection for observability
478                match severity {
479                    AnomalySeverity::Critical => {
480                        tracing::warn!(
481                            agent_id = %agent_id,
482                            tool = %tool,
483                            current_count = %count,
484                            baseline_ema = %baseline.ema,
485                            deviation_ratio = %ratio,
486                            "CRITICAL behavioral anomaly detected: tool call frequency {:.1}x above baseline",
487                            ratio
488                        );
489                    }
490                    AnomalySeverity::Warning => {
491                        tracing::warn!(
492                            agent_id = %agent_id,
493                            tool = %tool,
494                            current_count = %count,
495                            baseline_ema = %baseline.ema,
496                            deviation_ratio = %ratio,
497                            "Behavioral anomaly detected: tool call frequency {:.1}x above baseline",
498                            ratio
499                        );
500                    }
501                }
502
503                alerts.push(alert);
504            }
505        }
506
507        alerts
508    }
509
510    /// Update baselines after a session completes.
511    ///
512    /// Call this with the final call counts when a session ends.
513    /// Tools with zero counts are ignored for recording but existing baselines
514    /// for tools **not present** in `call_counts` are decayed toward zero.
515    pub fn record_session(&mut self, agent_id: &str, call_counts: &HashMap<String, u64>) {
516        // SECURITY (FIND-R139-002): Validate agent_id on the live path.
517        if agent_id.len() > Self::MAX_AGENT_ID_LEN
518            || agent_id
519                .chars()
520                .any(|c| c.is_control() || vellaveto_types::is_unicode_format_char(c))
521        {
522            tracing::warn!(
523                len = agent_id.len(),
524                "record_session: rejecting invalid agent_id"
525            );
526            return;
527        }
528
529        // SECURITY (R239-ENG-1): Normalize agent_id through NFKC + lowercase + homoglyph
530        // mapping so that homoglyph variants map to the same baseline.
531        let agent_id = crate::normalize::normalize_full(agent_id);
532
533        // SECURITY (FIND-R139-001): Cap call_counts iteration.
534        if call_counts.len() > Self::MAX_CALL_COUNT_ENTRIES {
535            tracing::warn!(
536                count = call_counts.len(),
537                max = Self::MAX_CALL_COUNT_ENTRIES,
538                "record_session: call_counts exceeds cap, skipping"
539            );
540            return;
541        }
542
543        self.update_counter = self.update_counter.saturating_add(1);
544
545        // Enforce agent limit via eviction before inserting a new agent
546        if !self.agents.contains_key(agent_id.as_str())
547            && self.agents.len() >= self.config.max_agents
548        {
549            self.evict_agent();
550        }
551
552        let agent = self
553            .agents
554            .entry(agent_id.to_string())
555            .or_insert_with(|| AgentState {
556                tools: HashMap::new(),
557                total_sessions: 0,
558            });
559
560        agent.total_sessions = agent.total_sessions.saturating_add(1);
561
562        // Collect which tools were actively called (non-zero count)
563        let called_tools: HashSet<&String> = call_counts
564            .iter()
565            .filter(|(_, &c)| c > 0)
566            .map(|(k, _)| k)
567            .collect();
568
569        // Update baselines for actively called tools
570        for (tool, &count) in call_counts {
571            if count == 0 {
572                continue;
573            }
574
575            // SECURITY (FIND-R116-TE-003): Validate tool keys for length and
576            // control/format characters, matching the validation in from_snapshot()
577            // and check_session(). Skip entries with invalid tool keys.
578            if tool.len() > Self::MAX_TOOL_KEY_LEN {
579                tracing::warn!("record_session: skipping oversized tool key");
580                continue;
581            }
582            if tool
583                .chars()
584                .any(|c| c.is_control() || vellaveto_types::is_unicode_format_char(c))
585            {
586                tracing::warn!("record_session: skipping tool key with control/format chars");
587                continue;
588            }
589
590            // Enforce per-agent tool limit
591            if !agent.tools.contains_key(tool)
592                && agent.tools.len() >= self.config.max_tools_per_agent
593            {
594                Self::evict_tool(&mut agent.tools);
595            }
596
597            let baseline = agent
598                .tools
599                .entry(tool.clone())
600                .or_insert_with(|| ToolBaseline {
601                    ema: 0.0,
602                    session_count: 0,
603                    last_active: 0,
604                });
605
606            // EMA update
607            if baseline.session_count == 0 {
608                // First observation: initialize EMA directly
609                // SECURITY (FIND-081): Cap initial EMA to prevent cold-start poisoning.
610                baseline.ema = if let Some(cap) = self.config.max_initial_ema {
611                    (count as f64).min(cap)
612                } else {
613                    count as f64
614                };
615            } else {
616                baseline.ema =
617                    self.config.alpha * count as f64 + (1.0 - self.config.alpha) * baseline.ema;
618                // SECURITY (FIND-R139-003): Clamp non-finite EMA to fail-closed.
619                // If EMA becomes +Infinity, all ratios become 0.0, silently
620                // disabling anomaly detection for this tool/agent.
621                if !baseline.ema.is_finite() {
622                    tracing::error!(
623                        "EMA overflow detected — resetting to current count for fail-closed behavior"
624                    );
625                    baseline.ema = count as f64;
626                }
627            }
628
629            baseline.session_count = baseline.session_count.saturating_add(1);
630            baseline.last_active = self.update_counter;
631        }
632
633        // Decay baselines for tools that were NOT called this session.
634        // Their effective count is 0 → EMA trends toward zero.
635        // Note: we intentionally do NOT update `last_active` here so that
636        // passively decaying tools are evicted before actively used ones.
637        // SECURITY (FIND-R49-002): Evict stale near-zero EMA tools after prolonged decay.
638        // Without this, tools that are never called again accumulate indefinitely in memory,
639        // with EMA asymptotically approaching zero but never being cleaned up.
640        const MAX_DECAY_SESSIONS: u32 = 200;
641        let mut evict_keys: Vec<String> = Vec::new();
642
643        let existing_tools: Vec<String> = agent.tools.keys().cloned().collect();
644        for tool_name in &existing_tools {
645            if !called_tools.contains(tool_name) {
646                if let Some(baseline) = agent.tools.get_mut(tool_name) {
647                    baseline.ema *= 1.0 - self.config.alpha;
648                    baseline.session_count = baseline.session_count.saturating_add(1);
649
650                    // Track tools to evict (stale near-zero EMA after prolonged decay)
651                    if baseline.session_count > MAX_DECAY_SESSIONS && baseline.ema < 0.01 {
652                        evict_keys.push(tool_name.clone());
653                    }
654                }
655            }
656        }
657
658        // Remove stale entries outside the borrow of agent.tools
659        for key in &evict_keys {
660            agent.tools.remove(key);
661        }
662    }
663
664    /// Get the baseline for a specific agent and tool.
665    pub fn get_baseline(&self, agent_id: &str, tool: &str) -> Option<&ToolBaseline> {
666        self.agents.get(agent_id)?.tools.get(tool)
667    }
668
669    /// Get the total sessions recorded for an agent.
670    pub fn agent_sessions(&self, agent_id: &str) -> Option<u32> {
671        self.agents.get(agent_id).map(|a| a.total_sessions)
672    }
673
674    /// Number of agents being tracked.
675    pub fn agent_count(&self) -> usize {
676        self.agents.len()
677    }
678
679    /// Number of tools tracked for a specific agent.
680    pub fn tool_count(&self, agent_id: &str) -> usize {
681        self.agents.get(agent_id).map_or(0, |a| a.tools.len())
682    }
683
684    /// Access the current configuration.
685    pub fn config(&self) -> &BehavioralConfig {
686        &self.config
687    }
688
689    /// Create a serializable snapshot of all tracking state.
690    pub fn snapshot(&self) -> BehavioralSnapshot {
691        let agents = self
692            .agents
693            .iter()
694            .map(|(id, state)| {
695                (
696                    id.clone(),
697                    AgentSnapshotEntry {
698                        tools: state.tools.clone(),
699                        total_sessions: state.total_sessions,
700                    },
701                )
702            })
703            .collect();
704
705        BehavioralSnapshot {
706            agents,
707            update_counter: self.update_counter,
708        }
709    }
710
711    /// Restore from a persisted snapshot.
712    ///
713    /// Validates that all EMA values are finite and non-negative.
714    pub fn from_snapshot(
715        config: BehavioralConfig,
716        snapshot: BehavioralSnapshot,
717    ) -> Result<Self, BehavioralError> {
718        config.validate()?;
719
720        // SECURITY (FIND-R58-ENG-001): Enforce max_agents/max_tools_per_agent bounds
721        // on deserialized snapshots to prevent OOM from oversized snapshot files.
722        if snapshot.agents.len() > config.max_agents {
723            return Err(BehavioralError::InvalidSnapshot(format!(
724                "snapshot has {} agents, exceeds max_agents {}",
725                snapshot.agents.len(),
726                config.max_agents
727            )));
728        }
729        for (agent_id, entry) in &snapshot.agents {
730            // SECURITY (FIND-R114-002): Reject agent_id keys with control or
731            // Unicode format characters to prevent bidi override injection in
732            // pattern matching and log confusion.
733            if agent_id
734                .chars()
735                .any(|c| c.is_control() || vellaveto_types::is_unicode_format_char(c))
736            {
737                return Err(BehavioralError::InvalidSnapshot(
738                    "agent_id contains control or Unicode format characters".to_string(),
739                ));
740            }
741            if entry.tools.len() > config.max_tools_per_agent {
742                return Err(BehavioralError::InvalidSnapshot(format!(
743                    "agent '{}' has {} tools, exceeds max_tools_per_agent {}",
744                    agent_id,
745                    entry.tools.len(),
746                    config.max_tools_per_agent
747                )));
748            }
749            for (tool, baseline) in &entry.tools {
750                // SECURITY (FIND-R114-002): Reject tool keys with control or
751                // Unicode format characters.
752                if tool
753                    .chars()
754                    .any(|c| c.is_control() || vellaveto_types::is_unicode_format_char(c))
755                {
756                    return Err(BehavioralError::InvalidSnapshot(
757                        "tool key contains control or Unicode format characters".to_string(),
758                    ));
759                }
760                if baseline.ema.is_nan() || baseline.ema.is_infinite() {
761                    return Err(BehavioralError::InvalidSnapshot(format!(
762                        "agent '{}' tool '{}' has invalid EMA: {}",
763                        agent_id, tool, baseline.ema
764                    )));
765                }
766                if baseline.ema < 0.0 {
767                    return Err(BehavioralError::InvalidSnapshot(format!(
768                        "agent '{}' tool '{}' has negative EMA: {}",
769                        agent_id, tool, baseline.ema
770                    )));
771                }
772            }
773        }
774
775        let agents = snapshot
776            .agents
777            .into_iter()
778            .map(|(id, entry)| {
779                (
780                    id,
781                    AgentState {
782                        tools: entry.tools,
783                        total_sessions: entry.total_sessions,
784                    },
785                )
786            })
787            .collect();
788
789        Ok(Self {
790            config,
791            agents,
792            update_counter: snapshot.update_counter,
793        })
794    }
795
796    /// Evict the agent with the fewest total sessions.
797    fn evict_agent(&mut self) {
798        if let Some(victim) = self
799            .agents
800            .iter()
801            .min_by_key(|(_, state)| state.total_sessions)
802            .map(|(id, _)| id.clone())
803        {
804            self.agents.remove(&victim);
805        }
806    }
807
808    /// Evict the tool with the oldest `last_active` timestamp.
809    fn evict_tool(tools: &mut HashMap<String, ToolBaseline>) {
810        if let Some(victim) = tools
811            .iter()
812            .min_by_key(|(_, baseline)| baseline.last_active)
813            .map(|(name, _)| name.clone())
814        {
815            tools.remove(&victim);
816        }
817    }
818}
819
820// ═══════════════════════════════════════════════════
821// TESTS
822// ═══════════════════════════════════════════════════
823
824#[cfg(test)]
825mod tests {
826    use super::*;
827
828    /// Helper to build call count maps concisely.
829    fn counts(data: &[(&str, u64)]) -> HashMap<String, u64> {
830        data.iter().map(|(k, v)| (k.to_string(), *v)).collect()
831    }
832
833    // ── Config validation ─────────────────────────
834
835    #[test]
836    fn test_new_tracker_default_config() {
837        let tracker = BehavioralTracker::new(BehavioralConfig::default());
838        assert!(tracker.is_ok());
839        assert_eq!(tracker.as_ref().map(|t| t.agent_count()).unwrap_or(0), 0);
840    }
841
842    #[test]
843    fn test_config_validate_valid() {
844        assert!(BehavioralConfig::default().validate().is_ok());
845        let edge = BehavioralConfig {
846            alpha: 1.0, // upper bound inclusive
847            ..Default::default()
848        };
849        assert!(edge.validate().is_ok());
850    }
851
852    #[test]
853    #[allow(clippy::field_reassign_with_default)]
854    fn test_config_validate_invalid_alpha() {
855        for bad in [0.0, -0.1, 1.1, f64::NAN] {
856            let mut c = BehavioralConfig::default();
857            c.alpha = bad;
858            assert!(
859                matches!(c.validate(), Err(BehavioralError::InvalidAlpha(_))),
860                "alpha={bad} should fail"
861            );
862        }
863    }
864
865    #[test]
866    #[allow(clippy::field_reassign_with_default)]
867    fn test_config_validate_invalid_threshold() {
868        for bad in [0.0, -1.0, f64::NAN, f64::INFINITY, f64::NEG_INFINITY] {
869            let mut c = BehavioralConfig::default();
870            c.threshold = bad;
871            assert!(
872                matches!(c.validate(), Err(BehavioralError::InvalidThreshold(_))),
873                "threshold={bad} should fail"
874            );
875        }
876    }
877
878    #[test]
879    fn test_config_validate_invalid_max_tools() {
880        let c = BehavioralConfig {
881            max_tools_per_agent: 0,
882            ..Default::default()
883        };
884        assert!(matches!(
885            c.validate(),
886            Err(BehavioralError::InvalidMaxTools)
887        ));
888    }
889
890    #[test]
891    fn test_config_validate_invalid_max_agents() {
892        let c = BehavioralConfig {
893            max_agents: 0,
894            ..Default::default()
895        };
896        assert!(matches!(
897            c.validate(),
898            Err(BehavioralError::InvalidMaxAgents)
899        ));
900    }
901
902    // ── Cold start ────────────────────────────────
903
904    #[test]
905    fn test_no_anomaly_during_agent_cold_start() {
906        let config = BehavioralConfig {
907            min_sessions: 3,
908            ..Default::default()
909        };
910        let mut tracker = BehavioralTracker::new(config).expect("valid config");
911
912        let c = counts(&[("read_file", 5)]);
913        tracker.record_session("agent-1", &c);
914        tracker.record_session("agent-1", &c);
915        // Only 2 sessions — below min_sessions of 3
916
917        let high = counts(&[("read_file", 5000)]);
918        let alerts = tracker.check_session("agent-1", &high);
919        assert!(alerts.is_empty(), "Should not flag during cold start");
920    }
921
922    #[test]
923    fn test_no_anomaly_during_tool_cold_start() {
924        let config = BehavioralConfig {
925            min_sessions: 3,
926            threshold: 2.0,
927            ..Default::default()
928        };
929        let mut tracker = BehavioralTracker::new(config).expect("valid config");
930
931        // 5 sessions with tool-a, establishing agent-level history
932        for _ in 0..5 {
933            tracker.record_session("agent-1", &counts(&[("tool-a", 10)]));
934        }
935
936        // Now introduce tool-b for only 1 session
937        tracker.record_session("agent-1", &counts(&[("tool-b", 5)]));
938
939        // tool-b has only 1 session of history — should not alert
940        let check = counts(&[("tool-b", 500)]);
941        let alerts = tracker.check_session("agent-1", &check);
942        assert!(
943            alerts.is_empty(),
944            "Tool with insufficient history should not alert"
945        );
946    }
947
948    // ── Anomaly detection ─────────────────────────
949
950    #[test]
951    fn test_anomaly_after_baseline_established() {
952        let config = BehavioralConfig {
953            min_sessions: 3,
954            threshold: 10.0,
955            alpha: 0.2,
956            ..Default::default()
957        };
958        let mut tracker = BehavioralTracker::new(config).expect("valid config");
959
960        let normal = counts(&[("read_file", 5)]);
961        for _ in 0..5 {
962            tracker.record_session("agent-1", &normal);
963        }
964
965        let anomalous = counts(&[("read_file", 500)]);
966        let alerts = tracker.check_session("agent-1", &anomalous);
967        assert!(!alerts.is_empty(), "Should detect anomaly");
968        assert_eq!(alerts[0].tool, "read_file");
969        assert_eq!(alerts[0].current_count, 500);
970        assert!(alerts[0].deviation_ratio >= 10.0);
971    }
972
973    #[test]
974    fn test_no_anomaly_for_normal_usage() {
975        let config = BehavioralConfig {
976            min_sessions: 3,
977            threshold: 10.0,
978            alpha: 0.2,
979            ..Default::default()
980        };
981        let mut tracker = BehavioralTracker::new(config).expect("valid config");
982
983        let normal = counts(&[("read_file", 5)]);
984        for _ in 0..5 {
985            tracker.record_session("agent-1", &normal);
986        }
987
988        // 7 is ~1.4x baseline — well below 10x threshold
989        let still_normal = counts(&[("read_file", 7)]);
990        let alerts = tracker.check_session("agent-1", &still_normal);
991        assert!(
992            alerts.is_empty(),
993            "Normal variation should not trigger alert"
994        );
995    }
996
997    #[test]
998    fn test_new_tool_no_alert() {
999        let config = BehavioralConfig {
1000            min_sessions: 3,
1001            ..Default::default()
1002        };
1003        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1004
1005        let normal = counts(&[("read_file", 5)]);
1006        for _ in 0..5 {
1007            tracker.record_session("agent-1", &normal);
1008        }
1009
1010        // write_file never seen — no baseline, no alert
1011        let new_tool = counts(&[("write_file", 1000)]);
1012        let alerts = tracker.check_session("agent-1", &new_tool);
1013        assert!(
1014            alerts.is_empty(),
1015            "New tool with no baseline should not alert"
1016        );
1017    }
1018
1019    // ── Severity ──────────────────────────────────
1020
1021    #[test]
1022    fn test_critical_severity() {
1023        let config = BehavioralConfig {
1024            min_sessions: 3,
1025            threshold: 5.0,
1026            alpha: 0.5,
1027            ..Default::default()
1028        };
1029        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1030
1031        let normal = counts(&[("read_file", 10)]);
1032        for _ in 0..5 {
1033            tracker.record_session("agent-1", &normal);
1034        }
1035
1036        // With alpha=0.5 over 5 sessions of count=10, EMA ≈ 10.0
1037        // 1000 / 10 = 100x → well above 2 * threshold(5) = 10x → Critical
1038        let critical = counts(&[("read_file", 1000)]);
1039        let alerts = tracker.check_session("agent-1", &critical);
1040        assert!(!alerts.is_empty());
1041        assert_eq!(alerts[0].severity, AnomalySeverity::Critical);
1042    }
1043
1044    #[test]
1045    fn test_warning_severity() {
1046        let config = BehavioralConfig {
1047            min_sessions: 3,
1048            threshold: 5.0,
1049            alpha: 0.5,
1050            ..Default::default()
1051        };
1052        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1053
1054        let normal = counts(&[("read_file", 10)]);
1055        for _ in 0..5 {
1056            tracker.record_session("agent-1", &normal);
1057        }
1058
1059        // 60 / 10 = 6x → above threshold(5) but below 2*threshold(10) → Warning
1060        let warning = counts(&[("read_file", 60)]);
1061        let alerts = tracker.check_session("agent-1", &warning);
1062        assert!(!alerts.is_empty());
1063        assert_eq!(alerts[0].severity, AnomalySeverity::Warning);
1064    }
1065
1066    // ── EMA behavior ──────────────────────────────
1067
1068    #[test]
1069    fn test_ema_first_observation_initializes() {
1070        let config = BehavioralConfig {
1071            min_sessions: 1,
1072            ..Default::default()
1073        };
1074        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1075
1076        tracker.record_session("agent-1", &counts(&[("read_file", 42)]));
1077
1078        let baseline = tracker
1079            .get_baseline("agent-1", "read_file")
1080            .expect("baseline should exist");
1081        assert!(
1082            (baseline.ema - 42.0).abs() < f64::EPSILON,
1083            "First observation should set EMA directly"
1084        );
1085        assert_eq!(baseline.session_count, 1);
1086    }
1087
1088    #[test]
1089    fn test_ema_update_formula() {
1090        let config = BehavioralConfig {
1091            min_sessions: 1,
1092            alpha: 0.5,
1093            ..Default::default()
1094        };
1095        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1096
1097        // Session 1: EMA = 100
1098        tracker.record_session("agent-1", &counts(&[("tool", 100)]));
1099        let ema1 = tracker.get_baseline("agent-1", "tool").expect("exists").ema;
1100        assert!((ema1 - 100.0).abs() < f64::EPSILON);
1101
1102        // Session 2: EMA = 0.5 * 200 + 0.5 * 100 = 150
1103        tracker.record_session("agent-1", &counts(&[("tool", 200)]));
1104        let ema2 = tracker.get_baseline("agent-1", "tool").expect("exists").ema;
1105        assert!((ema2 - 150.0).abs() < f64::EPSILON);
1106
1107        // Session 3: EMA = 0.5 * 100 + 0.5 * 150 = 125
1108        tracker.record_session("agent-1", &counts(&[("tool", 100)]));
1109        let ema3 = tracker.get_baseline("agent-1", "tool").expect("exists").ema;
1110        assert!((ema3 - 125.0).abs() < f64::EPSILON);
1111    }
1112
1113    #[test]
1114    fn test_ema_decay_unused_tools() {
1115        let config = BehavioralConfig {
1116            min_sessions: 1,
1117            alpha: 0.5,
1118            ..Default::default()
1119        };
1120        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1121
1122        // Establish baseline
1123        tracker.record_session("agent-1", &counts(&[("read_file", 100)]));
1124        assert!(
1125            (tracker
1126                .get_baseline("agent-1", "read_file")
1127                .expect("exists")
1128                .ema
1129                - 100.0)
1130                .abs()
1131                < f64::EPSILON
1132        );
1133
1134        // Session without read_file — EMA should decay
1135        tracker.record_session("agent-1", &counts(&[("other_tool", 1)]));
1136        let ema = tracker
1137            .get_baseline("agent-1", "read_file")
1138            .expect("exists")
1139            .ema;
1140        // Decay: ema = (1 - 0.5) * 100 = 50
1141        assert!(
1142            (ema - 50.0).abs() < 0.01,
1143            "EMA should decay to 50.0, got {ema}"
1144        );
1145    }
1146
1147    #[test]
1148    fn test_ema_decay_does_not_update_last_active() {
1149        let config = BehavioralConfig {
1150            min_sessions: 1,
1151            alpha: 0.5,
1152            ..Default::default()
1153        };
1154        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1155
1156        tracker.record_session("agent-1", &counts(&[("read_file", 100)]));
1157        let active_before = tracker
1158            .get_baseline("agent-1", "read_file")
1159            .expect("exists")
1160            .last_active;
1161
1162        // Decay-only session
1163        tracker.record_session("agent-1", &counts(&[("other_tool", 1)]));
1164        let active_after = tracker
1165            .get_baseline("agent-1", "read_file")
1166            .expect("exists")
1167            .last_active;
1168
1169        assert_eq!(
1170            active_before, active_after,
1171            "Passive decay should not update last_active"
1172        );
1173    }
1174
1175    // ── Agent/tool isolation ──────────────────────
1176
1177    #[test]
1178    fn test_multiple_agents_independent() {
1179        let config = BehavioralConfig {
1180            min_sessions: 3,
1181            threshold: 10.0,
1182            alpha: 0.2,
1183            ..Default::default()
1184        };
1185        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1186
1187        // Agent-1: low baseline (~5)
1188        let low = counts(&[("read_file", 5)]);
1189        for _ in 0..5 {
1190            tracker.record_session("agent-1", &low);
1191        }
1192
1193        // Agent-2: high baseline (~500)
1194        let high = counts(&[("read_file", 500)]);
1195        for _ in 0..5 {
1196            tracker.record_session("agent-2", &high);
1197        }
1198
1199        // 50 calls: anomalous for agent-1, normal for agent-2
1200        let check = counts(&[("read_file", 50)]);
1201        let alerts_1 = tracker.check_session("agent-1", &check);
1202        assert!(!alerts_1.is_empty(), "50 should be anomalous for agent-1");
1203
1204        let alerts_2 = tracker.check_session("agent-2", &check);
1205        assert!(alerts_2.is_empty(), "50 should be normal for agent-2");
1206    }
1207
1208    #[test]
1209    fn test_unknown_agent_no_alerts() {
1210        let tracker = BehavioralTracker::new(BehavioralConfig::default()).expect("valid config");
1211        let alerts = tracker.check_session("unknown", &counts(&[("tool", 1000)]));
1212        assert!(alerts.is_empty());
1213    }
1214
1215    // ── Eviction ──────────────────────────────────
1216
1217    #[test]
1218    fn test_agent_eviction_by_session_count() {
1219        let config = BehavioralConfig {
1220            max_agents: 2,
1221            ..Default::default()
1222        };
1223        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1224
1225        let c = counts(&[("tool", 1)]);
1226        tracker.record_session("agent-1", &c);
1227        tracker.record_session("agent-2", &c);
1228        tracker.record_session("agent-2", &c); // agent-2 has more sessions
1229
1230        // Adding agent-3 should evict agent-1 (fewest sessions)
1231        tracker.record_session("agent-3", &c);
1232
1233        assert_eq!(tracker.agent_count(), 2);
1234        assert!(
1235            tracker.get_baseline("agent-1", "tool").is_none(),
1236            "agent-1 should be evicted"
1237        );
1238        assert!(tracker.get_baseline("agent-2", "tool").is_some());
1239        assert!(tracker.get_baseline("agent-3", "tool").is_some());
1240    }
1241
1242    #[test]
1243    fn test_tool_eviction_by_last_active() {
1244        let config = BehavioralConfig {
1245            max_tools_per_agent: 2,
1246            ..Default::default()
1247        };
1248        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1249
1250        // Record tool-a first (update_counter=1)
1251        tracker.record_session("agent-1", &counts(&[("tool-a", 1)]));
1252        // Record tool-b second (update_counter=2)
1253        tracker.record_session("agent-1", &counts(&[("tool-b", 1)]));
1254
1255        // Adding tool-c should evict tool-a (oldest last_active)
1256        tracker.record_session("agent-1", &counts(&[("tool-c", 1)]));
1257
1258        assert_eq!(tracker.tool_count("agent-1"), 2);
1259        assert!(
1260            tracker.get_baseline("agent-1", "tool-a").is_none(),
1261            "tool-a should be evicted"
1262        );
1263    }
1264
1265    // ── Zero/empty handling ───────────────────────
1266
1267    #[test]
1268    fn test_zero_counts_not_recorded() {
1269        let config = BehavioralConfig {
1270            min_sessions: 1,
1271            ..Default::default()
1272        };
1273        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1274
1275        tracker.record_session("agent-1", &counts(&[("read_file", 0)]));
1276        assert!(
1277            tracker.get_baseline("agent-1", "read_file").is_none(),
1278            "Zero-count tool should not create a baseline"
1279        );
1280    }
1281
1282    #[test]
1283    fn test_empty_call_counts_no_panic() {
1284        let mut tracker =
1285            BehavioralTracker::new(BehavioralConfig::default()).expect("valid config");
1286        let empty = HashMap::new();
1287        tracker.record_session("agent-1", &empty);
1288        let alerts = tracker.check_session("agent-1", &empty);
1289        assert!(alerts.is_empty());
1290    }
1291
1292    #[test]
1293    fn test_check_with_zero_count_skipped() {
1294        let config = BehavioralConfig {
1295            min_sessions: 1,
1296            threshold: 2.0,
1297            ..Default::default()
1298        };
1299        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1300
1301        let c = counts(&[("tool", 10)]);
1302        for _ in 0..3 {
1303            tracker.record_session("agent-1", &c);
1304        }
1305
1306        // Zero-count entry should be skipped in check
1307        let zero = counts(&[("tool", 0)]);
1308        let alerts = tracker.check_session("agent-1", &zero);
1309        assert!(alerts.is_empty());
1310    }
1311
1312    // ── Snapshot persistence ──────────────────────
1313
1314    #[test]
1315    fn test_snapshot_roundtrip() {
1316        let config = BehavioralConfig {
1317            min_sessions: 2,
1318            ..Default::default()
1319        };
1320        let mut tracker = BehavioralTracker::new(config.clone()).expect("valid config");
1321
1322        let c = counts(&[("read_file", 10), ("write_file", 3)]);
1323        tracker.record_session("agent-1", &c);
1324        tracker.record_session("agent-1", &c);
1325
1326        let snapshot = tracker.snapshot();
1327
1328        // Serialize and deserialize (simulating persistence)
1329        let json = serde_json::to_string(&snapshot).expect("serialize");
1330        let restored_snap: BehavioralSnapshot = serde_json::from_str(&json).expect("deserialize");
1331
1332        let restored =
1333            BehavioralTracker::from_snapshot(config, restored_snap).expect("valid snapshot");
1334        assert_eq!(restored.agent_count(), 1);
1335        assert_eq!(restored.agent_sessions("agent-1"), Some(2));
1336        assert!(restored.get_baseline("agent-1", "read_file").is_some());
1337        assert!(restored.get_baseline("agent-1", "write_file").is_some());
1338    }
1339
1340    #[test]
1341    fn test_snapshot_rejects_nan_ema() {
1342        let config = BehavioralConfig::default();
1343        let mut tools = HashMap::new();
1344        tools.insert(
1345            "bad_tool".to_string(),
1346            ToolBaseline {
1347                ema: f64::NAN,
1348                session_count: 1,
1349                last_active: 0,
1350            },
1351        );
1352        let mut agents = HashMap::new();
1353        agents.insert(
1354            "agent-1".to_string(),
1355            AgentSnapshotEntry {
1356                tools,
1357                total_sessions: 1,
1358            },
1359        );
1360        let snapshot = BehavioralSnapshot {
1361            agents,
1362            update_counter: 0,
1363        };
1364
1365        assert!(matches!(
1366            BehavioralTracker::from_snapshot(config, snapshot),
1367            Err(BehavioralError::InvalidSnapshot(_))
1368        ));
1369    }
1370
1371    #[test]
1372    fn test_snapshot_rejects_negative_ema() {
1373        let config = BehavioralConfig::default();
1374        let mut tools = HashMap::new();
1375        tools.insert(
1376            "bad_tool".to_string(),
1377            ToolBaseline {
1378                ema: -1.0,
1379                session_count: 1,
1380                last_active: 0,
1381            },
1382        );
1383        let mut agents = HashMap::new();
1384        agents.insert(
1385            "agent-1".to_string(),
1386            AgentSnapshotEntry {
1387                tools,
1388                total_sessions: 1,
1389            },
1390        );
1391        let snapshot = BehavioralSnapshot {
1392            agents,
1393            update_counter: 0,
1394        };
1395
1396        assert!(matches!(
1397            BehavioralTracker::from_snapshot(config, snapshot),
1398            Err(BehavioralError::InvalidSnapshot(_))
1399        ));
1400    }
1401
1402    #[test]
1403    fn test_snapshot_rejects_infinite_ema() {
1404        let config = BehavioralConfig::default();
1405        let mut tools = HashMap::new();
1406        tools.insert(
1407            "bad_tool".to_string(),
1408            ToolBaseline {
1409                ema: f64::INFINITY,
1410                session_count: 1,
1411                last_active: 0,
1412            },
1413        );
1414        let mut agents = HashMap::new();
1415        agents.insert(
1416            "agent-1".to_string(),
1417            AgentSnapshotEntry {
1418                tools,
1419                total_sessions: 1,
1420            },
1421        );
1422        let snapshot = BehavioralSnapshot {
1423            agents,
1424            update_counter: 0,
1425        };
1426
1427        assert!(matches!(
1428            BehavioralTracker::from_snapshot(config, snapshot),
1429            Err(BehavioralError::InvalidSnapshot(_))
1430        ));
1431    }
1432
1433    // ── GAP-012: Persistence Integration Tests ────
1434
1435    /// GAP-012: Multi-agent snapshot roundtrip ensures all agents and their
1436    /// tools are correctly persisted and restored.
1437    #[test]
1438    fn test_snapshot_multi_agent_roundtrip() {
1439        let config = BehavioralConfig {
1440            min_sessions: 2,
1441            ..Default::default()
1442        };
1443        let mut tracker = BehavioralTracker::new(config.clone()).expect("valid config");
1444
1445        // Record sessions for multiple agents with different tool patterns
1446        let agent1_tools = counts(&[("read_file", 10), ("write_file", 3)]);
1447        let agent2_tools = counts(&[("list_dir", 50), ("delete_file", 2), ("chmod", 5)]);
1448        let agent3_tools = counts(&[("network_call", 100)]);
1449
1450        for _ in 0..3 {
1451            tracker.record_session("agent-1", &agent1_tools);
1452            tracker.record_session("agent-2", &agent2_tools);
1453            tracker.record_session("agent-3", &agent3_tools);
1454        }
1455
1456        let snapshot = tracker.snapshot();
1457
1458        // Persist and restore
1459        let json = serde_json::to_string(&snapshot).expect("serialize");
1460        let restored_snap: BehavioralSnapshot = serde_json::from_str(&json).expect("deserialize");
1461        let restored =
1462            BehavioralTracker::from_snapshot(config, restored_snap).expect("valid snapshot");
1463
1464        // Verify all agents restored
1465        assert_eq!(restored.agent_count(), 3);
1466        assert_eq!(restored.agent_sessions("agent-1"), Some(3));
1467        assert_eq!(restored.agent_sessions("agent-2"), Some(3));
1468        assert_eq!(restored.agent_sessions("agent-3"), Some(3));
1469
1470        // Verify tool counts
1471        assert_eq!(restored.tool_count("agent-1"), 2);
1472        assert_eq!(restored.tool_count("agent-2"), 3);
1473        assert_eq!(restored.tool_count("agent-3"), 1);
1474
1475        // Verify specific baselines exist
1476        assert!(restored.get_baseline("agent-1", "read_file").is_some());
1477        assert!(restored.get_baseline("agent-2", "list_dir").is_some());
1478        assert!(restored.get_baseline("agent-3", "network_call").is_some());
1479    }
1480
1481    /// GAP-012: Restored tracker produces identical anomaly detection results
1482    /// as the original tracker.
1483    #[test]
1484    fn test_snapshot_restored_produces_same_alerts() {
1485        let config = BehavioralConfig {
1486            min_sessions: 3,
1487            threshold: 10.0,
1488            alpha: 0.3,
1489            ..Default::default()
1490        };
1491        let mut tracker = BehavioralTracker::new(config.clone()).expect("valid config");
1492
1493        // Build up baseline
1494        let normal = counts(&[("tool_a", 10), ("tool_b", 20)]);
1495        for _ in 0..5 {
1496            tracker.record_session("agent-1", &normal);
1497        }
1498
1499        // Create anomalous input
1500        let anomalous = counts(&[("tool_a", 500), ("tool_b", 20)]);
1501
1502        // Check alerts on original
1503        let original_alerts = tracker.check_session("agent-1", &anomalous);
1504
1505        // Snapshot and restore
1506        let snapshot = tracker.snapshot();
1507        let json = serde_json::to_string(&snapshot).expect("serialize");
1508        let restored_snap: BehavioralSnapshot = serde_json::from_str(&json).expect("deserialize");
1509        let restored =
1510            BehavioralTracker::from_snapshot(config, restored_snap).expect("valid snapshot");
1511
1512        // Check alerts on restored tracker
1513        let restored_alerts = restored.check_session("agent-1", &anomalous);
1514
1515        // Should produce identical alerts
1516        assert_eq!(original_alerts.len(), restored_alerts.len());
1517        for (orig, rest) in original_alerts.iter().zip(restored_alerts.iter()) {
1518            assert_eq!(orig.agent_id, rest.agent_id);
1519            assert_eq!(orig.tool, rest.tool);
1520            assert_eq!(orig.current_count, rest.current_count);
1521            // EMA values should be identical
1522            assert!(
1523                (orig.baseline_ema - rest.baseline_ema).abs() < f64::EPSILON,
1524                "EMA mismatch: {} vs {}",
1525                orig.baseline_ema,
1526                rest.baseline_ema
1527            );
1528        }
1529    }
1530
1531    /// GAP-012: Large-scale snapshot handles many agents and tools efficiently.
1532    #[test]
1533    fn test_snapshot_large_scale() {
1534        let config = BehavioralConfig {
1535            min_sessions: 1,
1536            max_agents: 100,
1537            max_tools_per_agent: 50,
1538            ..Default::default()
1539        };
1540        let mut tracker = BehavioralTracker::new(config.clone()).expect("valid config");
1541
1542        // Create 50 agents, each with 20 tools
1543        for agent_id in 0..50 {
1544            let tools: HashMap<String, u64> = (0..20)
1545                .map(|tool_id| (format!("tool_{tool_id}"), (agent_id + tool_id + 1) as u64))
1546                .collect();
1547            for _ in 0..3 {
1548                tracker.record_session(&format!("agent-{agent_id}"), &tools);
1549            }
1550        }
1551
1552        let snapshot = tracker.snapshot();
1553
1554        // Verify snapshot size is reasonable
1555        let json = serde_json::to_string(&snapshot).expect("serialize");
1556        assert!(
1557            json.len() > 1000,
1558            "Snapshot should contain substantial data"
1559        );
1560        assert!(
1561            json.len() < 1_000_000,
1562            "Snapshot should be reasonably sized"
1563        );
1564
1565        // Restore and verify counts
1566        let restored_snap: BehavioralSnapshot = serde_json::from_str(&json).expect("deserialize");
1567        let restored =
1568            BehavioralTracker::from_snapshot(config, restored_snap).expect("valid snapshot");
1569
1570        assert_eq!(restored.agent_count(), 50);
1571        assert_eq!(restored.tool_count("agent-0"), 20);
1572        assert_eq!(restored.agent_sessions("agent-49"), Some(3));
1573    }
1574
1575    /// GAP-012: Update counter is preserved through persistence roundtrip.
1576    #[test]
1577    fn test_snapshot_preserves_update_counter() {
1578        let config = BehavioralConfig::default();
1579        let mut tracker = BehavioralTracker::new(config.clone()).expect("valid config");
1580
1581        // Record many sessions to increment update counter
1582        for i in 0..10 {
1583            tracker.record_session(&format!("agent-{}", i % 3), &counts(&[("tool", 5)]));
1584        }
1585
1586        let snapshot = tracker.snapshot();
1587        let original_counter = snapshot.update_counter;
1588        assert!(original_counter >= 10, "Counter should track updates");
1589
1590        // Roundtrip
1591        let json = serde_json::to_string(&snapshot).expect("serialize");
1592        let restored_snap: BehavioralSnapshot = serde_json::from_str(&json).expect("deserialize");
1593
1594        assert_eq!(
1595            restored_snap.update_counter, original_counter,
1596            "Update counter must survive roundtrip"
1597        );
1598
1599        let restored =
1600            BehavioralTracker::from_snapshot(config, restored_snap).expect("valid snapshot");
1601        let new_snapshot = restored.snapshot();
1602        assert_eq!(
1603            new_snapshot.update_counter, original_counter,
1604            "Restored tracker preserves counter value"
1605        );
1606    }
1607
1608    // ── Accessors ─────────────────────────────────
1609
1610    #[test]
1611    fn test_agent_sessions_none_for_unknown() {
1612        let tracker = BehavioralTracker::new(BehavioralConfig::default()).expect("valid config");
1613        assert_eq!(tracker.agent_sessions("nonexistent"), None);
1614    }
1615
1616    #[test]
1617    fn test_tool_count_zero_for_unknown() {
1618        let tracker = BehavioralTracker::new(BehavioralConfig::default()).expect("valid config");
1619        assert_eq!(tracker.tool_count("nonexistent"), 0);
1620    }
1621
1622    #[test]
1623    fn test_config_accessor() {
1624        let config = BehavioralConfig {
1625            alpha: 0.3,
1626            threshold: 8.0,
1627            ..Default::default()
1628        };
1629        let tracker = BehavioralTracker::new(config).expect("valid config");
1630        assert!((tracker.config().alpha - 0.3).abs() < f64::EPSILON);
1631        assert!((tracker.config().threshold - 8.0).abs() < f64::EPSILON);
1632    }
1633
1634    // ── Display ───────────────────────────────────
1635
1636    #[test]
1637    fn test_anomaly_alert_display() {
1638        let alert = AnomalyAlert {
1639            agent_id: "agent-1".to_string(),
1640            tool: "read_file".to_string(),
1641            current_count: 500,
1642            baseline_ema: 5.0,
1643            deviation_ratio: 100.0,
1644            severity: AnomalySeverity::Critical,
1645        };
1646        let display = format!("{alert}");
1647        assert!(display.contains("Critical"));
1648        assert!(display.contains("agent-1"));
1649        assert!(display.contains("read_file"));
1650        assert!(display.contains("500"));
1651    }
1652
1653    #[test]
1654    fn test_behavioral_error_display() {
1655        let e = BehavioralError::InvalidAlpha(0.0);
1656        assert!(format!("{e}").contains("alpha"));
1657        let e = BehavioralError::InvalidThreshold(-1.0);
1658        assert!(format!("{e}").contains("threshold"));
1659        let e = BehavioralError::InvalidMaxTools;
1660        assert!(format!("{e}").contains("max_tools"));
1661        let e = BehavioralError::InvalidMaxAgents;
1662        assert!(format!("{e}").contains("max_agents"));
1663        let e = BehavioralError::InvalidSnapshot("test".to_string());
1664        assert!(format!("{e}").contains("test"));
1665    }
1666
1667    // ── Saturating arithmetic ─────────────────────
1668
1669    #[test]
1670    fn test_saturating_session_count() {
1671        let config = BehavioralConfig {
1672            min_sessions: 1,
1673            ..Default::default()
1674        };
1675        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1676
1677        let c = counts(&[("tool", 1)]);
1678        for _ in 0..100 {
1679            tracker.record_session("agent-1", &c);
1680        }
1681        assert_eq!(tracker.agent_sessions("agent-1"), Some(100));
1682    }
1683
1684    // ── Multiple tools in one session ─────────────
1685
1686    #[test]
1687    fn test_multiple_tools_single_session() {
1688        let config = BehavioralConfig {
1689            min_sessions: 3,
1690            threshold: 10.0,
1691            alpha: 0.5,
1692            ..Default::default()
1693        };
1694        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1695
1696        let normal = counts(&[("read_file", 5), ("write_file", 2), ("list_dir", 10)]);
1697        for _ in 0..5 {
1698            tracker.record_session("agent-1", &normal);
1699        }
1700
1701        // read_file anomalous, write_file normal, list_dir normal
1702        let mixed = counts(&[("read_file", 500), ("write_file", 3), ("list_dir", 12)]);
1703        let alerts = tracker.check_session("agent-1", &mixed);
1704        assert_eq!(alerts.len(), 1, "Only read_file should trigger");
1705        assert_eq!(alerts[0].tool, "read_file");
1706    }
1707
1708    // ── Gradual increase adapts baseline ──────────
1709
1710    #[test]
1711    fn test_gradual_increase_adapts() {
1712        let config = BehavioralConfig {
1713            min_sessions: 3,
1714            threshold: 5.0,
1715            alpha: 0.5,
1716            ..Default::default()
1717        };
1718        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1719
1720        // Start with baseline of 10
1721        for _ in 0..3 {
1722            tracker.record_session("agent-1", &counts(&[("tool", 10)]));
1723        }
1724
1725        // Gradually increase — EMA adapts, so 20 shouldn't alert after adaptation
1726        for _ in 0..10 {
1727            tracker.record_session("agent-1", &counts(&[("tool", 20)]));
1728        }
1729
1730        // After many sessions at 20, EMA is close to 20. 20 should not alert.
1731        let alerts = tracker.check_session("agent-1", &counts(&[("tool", 20)]));
1732        assert!(
1733            alerts.is_empty(),
1734            "Gradual increase should adapt the baseline"
1735        );
1736    }
1737
1738    // ════════════════════════════════════════════════════════
1739    // FIND-052: EMA epsilon and extreme numeric edge cases
1740    // ════════════════════════════════════════════════════════
1741
1742    #[test]
1743    fn test_behavioral_epsilon_ema_triggers_anomaly() {
1744        // When EMA is at or below f64::EPSILON, any non-zero count should
1745        // use the synthetic high-deviation path (line 306-307)
1746        let config = BehavioralConfig {
1747            min_sessions: 1,
1748            threshold: 2.0,
1749            alpha: 0.99, // High alpha — EMA will closely track current value
1750            ..Default::default()
1751        };
1752        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1753
1754        // Record very small values to get EMA close to zero,
1755        // then decay further by recording sessions without the tool
1756        tracker.record_session("agent-1", &counts(&[("tool", 1)]));
1757        // Decay the tool's EMA by running many sessions without it
1758        for _ in 0..100 {
1759            tracker.record_session("agent-1", &counts(&[("other", 1)]));
1760        }
1761
1762        let baseline = tracker
1763            .get_baseline("agent-1", "tool")
1764            .expect("baseline should exist");
1765        // After 100 decay rounds with alpha=0.99, EMA should be extremely small
1766        assert!(
1767            baseline.ema < 0.01,
1768            "EMA should have decayed to near zero, got: {}",
1769            baseline.ema
1770        );
1771
1772        // Now check: with near-zero EMA, the epsilon guard uses count as the ratio.
1773        // A count >= threshold should trigger anomaly.
1774        let alerts = tracker.check_session("agent-1", &counts(&[("tool", 3)]));
1775        // The tool has enough sessions and the agent has enough sessions
1776        // so the cold start guard won't block this.
1777        // ratio = count as f64 = 3.0 >= threshold(2.0)
1778        assert!(
1779            !alerts.is_empty(),
1780            "Near-zero EMA with count >= threshold should flag as anomalous"
1781        );
1782    }
1783
1784    #[test]
1785    fn test_behavioral_u64_max_count_does_not_panic() {
1786        let config = BehavioralConfig {
1787            min_sessions: 1,
1788            threshold: 2.0,
1789            alpha: 0.5,
1790            ..Default::default()
1791        };
1792        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1793
1794        // Establish baseline
1795        let normal = counts(&[("tool", 10)]);
1796        for _ in 0..3 {
1797            tracker.record_session("agent-1", &normal);
1798        }
1799
1800        // Check session with u64::MAX — should not panic
1801        let extreme = counts(&[("tool", u64::MAX)]);
1802        let alerts = tracker.check_session("agent-1", &extreme);
1803        // Should definitely detect anomaly
1804        assert!(!alerts.is_empty(), "u64::MAX count should trigger anomaly");
1805
1806        // Recording u64::MAX should also not panic
1807        tracker.record_session("agent-1", &extreme);
1808        let baseline = tracker
1809            .get_baseline("agent-1", "tool")
1810            .expect("baseline exists");
1811        assert!(
1812            baseline.ema.is_finite(),
1813            "EMA should remain finite after u64::MAX, got: {}",
1814            baseline.ema
1815        );
1816    }
1817
1818    #[test]
1819    fn test_behavioral_large_ema_large_count_no_overflow() {
1820        let config = BehavioralConfig {
1821            min_sessions: 1,
1822            threshold: 2.0,
1823            alpha: 0.5,
1824            ..Default::default()
1825        };
1826        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1827
1828        // Record sessions with very large counts
1829        let large = counts(&[("tool", u64::MAX / 2)]);
1830        for _ in 0..5 {
1831            tracker.record_session("agent-1", &large);
1832        }
1833
1834        let baseline = tracker
1835            .get_baseline("agent-1", "tool")
1836            .expect("baseline exists");
1837        assert!(
1838            baseline.ema.is_finite(),
1839            "EMA should remain finite with large counts"
1840        );
1841
1842        // Check with even larger count
1843        let larger = counts(&[("tool", u64::MAX)]);
1844        let alerts = tracker.check_session("agent-1", &larger);
1845        // With EMA ~ u64::MAX/2 and count ~ u64::MAX, ratio ~ 2.0 >= threshold(2.0)
1846        assert!(!alerts.is_empty(), "u64::MAX vs large EMA should trigger");
1847        // Verify ratio is finite
1848        assert!(
1849            alerts[0].deviation_ratio.is_finite(),
1850            "Deviation ratio should be finite, got: {}",
1851            alerts[0].deviation_ratio
1852        );
1853    }
1854
1855    #[test]
1856    fn test_behavioral_update_counter_saturates() {
1857        let config = BehavioralConfig {
1858            min_sessions: 1,
1859            ..Default::default()
1860        };
1861        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1862
1863        // Record many sessions — update_counter uses saturating_add
1864        for i in 0..100 {
1865            tracker.record_session(&format!("agent-{}", i % 5), &counts(&[("tool", 1)]));
1866        }
1867
1868        let snapshot = tracker.snapshot();
1869        assert_eq!(
1870            snapshot.update_counter, 100,
1871            "Update counter should track session count"
1872        );
1873    }
1874
1875    // ════════════════════════════════════════════════════════
1876    // FIND-R114-001: absolute_ceiling=0 must not produce Infinity
1877    // ════════════════════════════════════════════════════════
1878
1879    #[test]
1880    fn test_absolute_ceiling_zero_does_not_produce_infinity() {
1881        let config = BehavioralConfig {
1882            min_sessions: 1,
1883            absolute_ceiling: Some(0),
1884            ..Default::default()
1885        };
1886        let mut tracker = BehavioralTracker::new(config).expect("valid config");
1887
1888        // Establish some baseline
1889        let normal = counts(&[("tool", 5)]);
1890        for _ in 0..3 {
1891            tracker.record_session("agent-1", &normal);
1892        }
1893
1894        // Any non-zero count exceeds ceiling=0
1895        let check = counts(&[("tool", 1)]);
1896        let alerts = tracker.check_session("agent-1", &check);
1897        assert!(!alerts.is_empty(), "count > 0 should exceed ceiling of 0");
1898        assert_eq!(alerts[0].severity, AnomalySeverity::Critical);
1899        // The critical check: deviation_ratio must be finite (not Infinity)
1900        assert!(
1901            alerts[0].deviation_ratio.is_finite(),
1902            "deviation_ratio must be finite when ceiling=0, got: {}",
1903            alerts[0].deviation_ratio
1904        );
1905        assert!(
1906            alerts[0].deviation_ratio > 0.0,
1907            "deviation_ratio should be positive"
1908        );
1909    }
1910
1911    // ════════════════════════════════════════════════════════
1912    // FIND-R114-002: from_snapshot rejects control/format chars in keys
1913    // ════════════════════════════════════════════════════════
1914
1915    #[test]
1916    fn test_from_snapshot_rejects_control_char_agent_id() {
1917        let config = BehavioralConfig::default();
1918        let mut agents = HashMap::new();
1919        agents.insert(
1920            "agent\x01bad".to_string(),
1921            AgentSnapshotEntry {
1922                tools: HashMap::new(),
1923                total_sessions: 1,
1924            },
1925        );
1926        let snapshot = BehavioralSnapshot {
1927            agents,
1928            update_counter: 0,
1929        };
1930        let result = BehavioralTracker::from_snapshot(config, snapshot);
1931        assert!(
1932            matches!(result, Err(BehavioralError::InvalidSnapshot(_))),
1933            "expected InvalidSnapshot for control char agent_id"
1934        );
1935    }
1936
1937    #[test]
1938    fn test_from_snapshot_rejects_unicode_format_char_agent_id() {
1939        let config = BehavioralConfig::default();
1940        let mut agents = HashMap::new();
1941        // Zero-width space in agent ID
1942        agents.insert(
1943            "agent\u{200B}id".to_string(),
1944            AgentSnapshotEntry {
1945                tools: HashMap::new(),
1946                total_sessions: 1,
1947            },
1948        );
1949        let snapshot = BehavioralSnapshot {
1950            agents,
1951            update_counter: 0,
1952        };
1953        let result = BehavioralTracker::from_snapshot(config, snapshot);
1954        assert!(
1955            matches!(result, Err(BehavioralError::InvalidSnapshot(_))),
1956            "expected InvalidSnapshot for Unicode format char agent_id"
1957        );
1958    }
1959
1960    #[test]
1961    fn test_from_snapshot_rejects_control_char_tool_key() {
1962        let config = BehavioralConfig::default();
1963        let mut tools = HashMap::new();
1964        tools.insert(
1965            "tool\nnewline".to_string(),
1966            ToolBaseline {
1967                ema: 5.0,
1968                session_count: 1,
1969                last_active: 0,
1970            },
1971        );
1972        let mut agents = HashMap::new();
1973        agents.insert(
1974            "agent-1".to_string(),
1975            AgentSnapshotEntry {
1976                tools,
1977                total_sessions: 1,
1978            },
1979        );
1980        let snapshot = BehavioralSnapshot {
1981            agents,
1982            update_counter: 0,
1983        };
1984        let result = BehavioralTracker::from_snapshot(config, snapshot);
1985        assert!(
1986            matches!(result, Err(BehavioralError::InvalidSnapshot(_))),
1987            "expected InvalidSnapshot for control char tool key"
1988        );
1989    }
1990
1991    #[test]
1992    fn test_from_snapshot_rejects_bidi_override_tool_key() {
1993        let config = BehavioralConfig::default();
1994        let mut tools = HashMap::new();
1995        // Right-to-left override in tool name
1996        tools.insert(
1997            "tool\u{202E}malicious".to_string(),
1998            ToolBaseline {
1999                ema: 5.0,
2000                session_count: 1,
2001                last_active: 0,
2002            },
2003        );
2004        let mut agents = HashMap::new();
2005        agents.insert(
2006            "agent-1".to_string(),
2007            AgentSnapshotEntry {
2008                tools,
2009                total_sessions: 1,
2010            },
2011        );
2012        let snapshot = BehavioralSnapshot {
2013            agents,
2014            update_counter: 0,
2015        };
2016        let result = BehavioralTracker::from_snapshot(config, snapshot);
2017        assert!(
2018            matches!(result, Err(BehavioralError::InvalidSnapshot(_))),
2019            "expected InvalidSnapshot for bidi override tool key"
2020        );
2021    }
2022
2023    #[test]
2024    fn test_from_snapshot_accepts_clean_keys() {
2025        let config = BehavioralConfig::default();
2026        let mut tools = HashMap::new();
2027        tools.insert(
2028            "read_file".to_string(),
2029            ToolBaseline {
2030                ema: 5.0,
2031                session_count: 1,
2032                last_active: 0,
2033            },
2034        );
2035        let mut agents = HashMap::new();
2036        agents.insert(
2037            "agent-1".to_string(),
2038            AgentSnapshotEntry {
2039                tools,
2040                total_sessions: 1,
2041            },
2042        );
2043        let snapshot = BehavioralSnapshot {
2044            agents,
2045            update_counter: 0,
2046        };
2047        assert!(BehavioralTracker::from_snapshot(config, snapshot).is_ok());
2048    }
2049
2050    // ── FIND-R139: Live-path validation tests ──────────
2051
2052    #[test]
2053    fn test_record_session_rejects_oversized_agent_id() {
2054        let mut tracker = BehavioralTracker::new(BehavioralConfig::default()).unwrap();
2055        let long_id = "a".repeat(513);
2056        let counts: HashMap<String, u64> = [("tool1".to_string(), 5u64)].into_iter().collect();
2057        tracker.record_session(&long_id, &counts);
2058        assert!(
2059            tracker.agents.is_empty(),
2060            "oversized agent_id should be rejected"
2061        );
2062    }
2063
2064    #[test]
2065    fn test_record_session_rejects_control_char_agent_id() {
2066        let mut tracker = BehavioralTracker::new(BehavioralConfig::default()).unwrap();
2067        let counts: HashMap<String, u64> = [("tool1".to_string(), 5u64)].into_iter().collect();
2068        tracker.record_session("agent\x1b[31m", &counts);
2069        assert!(
2070            tracker.agents.is_empty(),
2071            "control-char agent_id should be rejected"
2072        );
2073    }
2074
2075    #[test]
2076    fn test_check_session_rejects_oversized_call_counts() {
2077        let tracker = BehavioralTracker::new(BehavioralConfig::default()).unwrap();
2078        let mut counts: HashMap<String, u64> = HashMap::new();
2079        for i in 0..10_001 {
2080            counts.insert(format!("tool_{i}"), 1);
2081        }
2082        let alerts = tracker.check_session("agent-1", &counts);
2083        assert!(
2084            alerts.is_empty(),
2085            "oversized call_counts should be rejected with empty alerts"
2086        );
2087    }
2088
2089    #[test]
2090    fn test_ema_non_finite_clamp() {
2091        let mut tracker = BehavioralTracker::new(BehavioralConfig {
2092            alpha: 0.5,
2093            ..BehavioralConfig::default()
2094        })
2095        .unwrap();
2096        let agent_id = "agent-ema-test";
2097        // First session to establish baseline
2098        let counts: HashMap<String, u64> = [("tool1".to_string(), u64::MAX)].into_iter().collect();
2099        tracker.record_session(agent_id, &counts);
2100        let agent = tracker.agents.get(agent_id).unwrap();
2101        let ema = agent.tools.get("tool1").unwrap().ema;
2102        assert!(
2103            ema.is_finite(),
2104            "EMA should remain finite even with u64::MAX count"
2105        );
2106    }
2107
2108    // ═══════════════════════════════════════════════════
2109    // FIND-R116-TE-003: Tool key validation in record_session / check_session
2110    // ═══════════════════════════════════════════════════
2111
2112    #[test]
2113    fn test_record_session_skips_oversized_tool_key() {
2114        let mut tracker = BehavioralTracker::new(BehavioralConfig::default()).unwrap();
2115        let long_tool = "a".repeat(257); // exceeds MAX_TOOL_KEY_LEN = 256
2116        let c: HashMap<String, u64> = [(long_tool.clone(), 5u64)].into_iter().collect();
2117        tracker.record_session("agent-1", &c);
2118        // The oversized tool key should have been skipped
2119        assert!(
2120            tracker.get_baseline("agent-1", &long_tool).is_none(),
2121            "oversized tool key should not be recorded"
2122        );
2123    }
2124
2125    #[test]
2126    fn test_record_session_skips_control_char_tool_key() {
2127        let mut tracker = BehavioralTracker::new(BehavioralConfig::default()).unwrap();
2128        let bad_tool = "tool\nnewline".to_string();
2129        let c: HashMap<String, u64> = [(bad_tool.clone(), 5u64)].into_iter().collect();
2130        tracker.record_session("agent-1", &c);
2131        assert!(
2132            tracker.get_baseline("agent-1", &bad_tool).is_none(),
2133            "control char tool key should not be recorded"
2134        );
2135    }
2136
2137    #[test]
2138    fn test_record_session_skips_unicode_format_char_tool_key() {
2139        let mut tracker = BehavioralTracker::new(BehavioralConfig::default()).unwrap();
2140        // Zero-width space in tool name
2141        let bad_tool = "tool\u{200B}name".to_string();
2142        let c: HashMap<String, u64> = [(bad_tool.clone(), 5u64)].into_iter().collect();
2143        tracker.record_session("agent-1", &c);
2144        assert!(
2145            tracker.get_baseline("agent-1", &bad_tool).is_none(),
2146            "Unicode format char tool key should not be recorded"
2147        );
2148    }
2149
2150    #[test]
2151    fn test_record_session_accepts_valid_tool_key_alongside_invalid() {
2152        let mut tracker = BehavioralTracker::new(BehavioralConfig::default()).unwrap();
2153        let mut c = HashMap::new();
2154        c.insert("valid_tool".to_string(), 5u64);
2155        c.insert("bad\x01tool".to_string(), 10u64);
2156        tracker.record_session("agent-1", &c);
2157        // Valid tool should be recorded, invalid should be skipped
2158        assert!(
2159            tracker.get_baseline("agent-1", "valid_tool").is_some(),
2160            "valid tool key should be recorded"
2161        );
2162        assert!(
2163            tracker.get_baseline("agent-1", "bad\x01tool").is_none(),
2164            "invalid tool key should not be recorded"
2165        );
2166    }
2167
2168    #[test]
2169    fn test_record_session_tool_key_at_max_len_accepted() {
2170        let mut tracker = BehavioralTracker::new(BehavioralConfig::default()).unwrap();
2171        let tool_at_limit = "a".repeat(256); // exactly at MAX_TOOL_KEY_LEN
2172        let c: HashMap<String, u64> = [(tool_at_limit.clone(), 5u64)].into_iter().collect();
2173        tracker.record_session("agent-1", &c);
2174        assert!(
2175            tracker.get_baseline("agent-1", &tool_at_limit).is_some(),
2176            "tool key at exactly MAX_TOOL_KEY_LEN should be accepted"
2177        );
2178    }
2179
2180    #[test]
2181    fn test_check_session_skips_oversized_tool_key() {
2182        let config = BehavioralConfig {
2183            min_sessions: 1,
2184            threshold: 2.0,
2185            ..Default::default()
2186        };
2187        let mut tracker = BehavioralTracker::new(config).unwrap();
2188
2189        // Build baseline with a valid tool
2190        let normal = counts(&[("valid_tool", 5)]);
2191        for _ in 0..3 {
2192            tracker.record_session("agent-1", &normal);
2193        }
2194
2195        // Check session with oversized tool key — should be silently skipped
2196        let long_tool = "a".repeat(257);
2197        let mut check: HashMap<String, u64> = HashMap::new();
2198        check.insert(long_tool, 1000);
2199        let alerts = tracker.check_session("agent-1", &check);
2200        assert!(
2201            alerts.is_empty(),
2202            "oversized tool key should be skipped in check_session"
2203        );
2204    }
2205
2206    #[test]
2207    fn test_check_session_skips_control_char_tool_key() {
2208        let config = BehavioralConfig {
2209            min_sessions: 1,
2210            threshold: 2.0,
2211            absolute_ceiling: Some(10),
2212            ..Default::default()
2213        };
2214        let mut tracker = BehavioralTracker::new(config).unwrap();
2215
2216        let normal = counts(&[("valid_tool", 5)]);
2217        for _ in 0..3 {
2218            tracker.record_session("agent-1", &normal);
2219        }
2220
2221        // Check session with control char in tool key — should be silently skipped
2222        // even with absolute_ceiling which would normally trigger Critical
2223        let mut check: HashMap<String, u64> = HashMap::new();
2224        check.insert("tool\x1bnewline".to_string(), 1000);
2225        let alerts = tracker.check_session("agent-1", &check);
2226        assert!(
2227            alerts.is_empty(),
2228            "control char tool key should be skipped in check_session"
2229        );
2230    }
2231
2232    #[test]
2233    fn test_check_session_skips_bidi_override_tool_key() {
2234        let config = BehavioralConfig {
2235            min_sessions: 1,
2236            threshold: 2.0,
2237            absolute_ceiling: Some(10),
2238            ..Default::default()
2239        };
2240        let mut tracker = BehavioralTracker::new(config).unwrap();
2241
2242        let normal = counts(&[("valid_tool", 5)]);
2243        for _ in 0..3 {
2244            tracker.record_session("agent-1", &normal);
2245        }
2246
2247        // Bidi override in tool key — should be silently skipped
2248        let mut check: HashMap<String, u64> = HashMap::new();
2249        check.insert("tool\u{202E}evil".to_string(), 1000);
2250        let alerts = tracker.check_session("agent-1", &check);
2251        assert!(
2252            alerts.is_empty(),
2253            "bidi override tool key should be skipped in check_session"
2254        );
2255    }
2256}