Skip to main content

vellaveto_engine/
sequence.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4//
5// Copyright 2026 Paolo Vella
6// SPDX-License-Identifier: MPL-2.0
7
8//! Phase 6.3: Behavioral sequence analysis.
9//!
10//! Tracks tool call sequences within a session and detects behavioral
11//! anomalies that may indicate injection-provoked actions — even within
12//! the allowed intent scope. Five deterministic heuristic detectors,
13//! no ML, all auditable.
14
15use vellaveto_types::provenance::SinkClass;
16
17/// Maximum call log entries per session.
18const MAX_CALL_LOG: usize = 1000;
19
20/// A recorded tool call in the sequence.
21#[derive(Debug, Clone)]
22struct SequenceEntry {
23    tool_name: String,
24    sink_class: SinkClass,
25    timestamp_ms: u64,
26    source_tainted: bool,
27    is_novel: bool,
28}
29
30/// Configuration for sequence analysis.
31#[derive(Debug, Clone)]
32pub struct SequenceConfig {
33    /// Minimum calls before anomaly detection activates.
34    pub warmup_calls: u32,
35    /// Max time (ms) between tainted source read and privileged action.
36    pub read_to_act_window_ms: u64,
37    /// Max new distinct tools after first taint before flagging.
38    pub max_new_tools_after_taint: u32,
39    /// Action on anomaly detection.
40    pub anomaly_action: AnomalyAction,
41}
42
43impl Default for SequenceConfig {
44    fn default() -> Self {
45        Self {
46            warmup_calls: 3,
47            read_to_act_window_ms: 5000,
48            max_new_tools_after_taint: 2,
49            anomaly_action: AnomalyAction::AuditOnly,
50        }
51    }
52}
53
54/// What to do when an anomaly is detected.
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum AnomalyAction {
57    Block,
58    RequireApproval,
59    AuditOnly,
60}
61
62/// A detected behavioral anomaly.
63#[derive(Debug, Clone)]
64pub struct SequenceAnomaly {
65    pub anomaly_type: AnomalyType,
66    pub confidence: u32,
67    pub description: String,
68}
69
70/// Types of sequence anomalies.
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum AnomalyType {
73    /// Sensitive read followed by network egress.
74    ReadThenExfil,
75    /// Sink class jump after source taint.
76    PrivilegeEscalationAfterTaint,
77    /// New tools cluster after taint event.
78    ToolDiversitySpike,
79    /// Never-seen tool targets privileged sink after untrusted content.
80    NovelToolAfterUntrustedContent,
81    /// Burst of privileged sink calls.
82    PrivilegedActionCluster,
83}
84
85/// Tracks tool call sequences and detects anomalies.
86pub struct SequenceTracker {
87    call_log: Vec<SequenceEntry>,
88    distinct_tools: Vec<String>,
89    config: SequenceConfig,
90    first_taint_idx: Option<usize>,
91    tools_before_taint: usize,
92    anomalies: Vec<SequenceAnomaly>,
93}
94
95impl SequenceTracker {
96    pub fn new(config: SequenceConfig) -> Self {
97        Self {
98            call_log: Vec::new(),
99            distinct_tools: Vec::new(),
100            config,
101            first_taint_idx: None,
102            tools_before_taint: 0,
103            anomalies: Vec::new(),
104        }
105    }
106
107    /// Record a tool call and run all detectors.
108    pub fn record_and_analyze(
109        &mut self,
110        tool_name: &str,
111        sink_class: SinkClass,
112        source_tainted: bool,
113        now_ms: u64,
114    ) -> Vec<SequenceAnomaly> {
115        let is_novel = !self.distinct_tools.iter().any(|t| t == tool_name);
116        if is_novel && self.distinct_tools.len() < MAX_CALL_LOG {
117            self.distinct_tools.push(tool_name.to_string());
118        }
119
120        if source_tainted && self.first_taint_idx.is_none() {
121            self.first_taint_idx = Some(self.call_log.len());
122            self.tools_before_taint = self.distinct_tools.len().saturating_sub(1);
123        }
124
125        let entry = SequenceEntry {
126            tool_name: tool_name[..tool_name.len().min(256)].to_string(),
127            sink_class,
128            timestamp_ms: now_ms,
129            source_tainted,
130            is_novel,
131        };
132
133        if self.call_log.len() < MAX_CALL_LOG {
134            self.call_log.push(entry);
135        }
136
137        if (self.call_log.len() as u32) < self.config.warmup_calls {
138            return Vec::new();
139        }
140
141        let mut new_anomalies = Vec::new();
142
143        if let Some(a) = self.detect_read_then_exfil() {
144            new_anomalies.push(a);
145        }
146        if let Some(a) = self.detect_privilege_escalation_after_taint() {
147            new_anomalies.push(a);
148        }
149        if let Some(a) = self.detect_tool_diversity_spike() {
150            new_anomalies.push(a);
151        }
152        if let Some(a) = self.detect_novel_tool_after_untrusted() {
153            new_anomalies.push(a);
154        }
155        if let Some(a) = self.detect_privileged_action_cluster() {
156            new_anomalies.push(a);
157        }
158
159        self.anomalies.extend(new_anomalies.clone());
160        new_anomalies
161    }
162
163    /// Get all anomalies detected so far.
164    pub fn anomalies(&self) -> &[SequenceAnomaly] {
165        &self.anomalies
166    }
167
168    /// Highest confidence anomaly, if any.
169    pub fn max_confidence(&self) -> u32 {
170        self.anomalies
171            .iter()
172            .map(|a| a.confidence)
173            .max()
174            .unwrap_or(0)
175    }
176
177    // ═══════════════════════════════════════════════════
178    // Detectors
179    // ═══════════════════════════════════════════════════
180
181    /// Detector 1: Sensitive read followed by network egress within window.
182    fn detect_read_then_exfil(&self) -> Option<SequenceAnomaly> {
183        let len = self.call_log.len();
184        if len < 2 {
185            return None;
186        }
187        let current = &self.call_log[len - 1];
188        if !matches!(current.sink_class, SinkClass::NetworkEgress) {
189            return None;
190        }
191        // Look back for a tainted read within the window
192        for i in (0..len - 1).rev() {
193            let prev = &self.call_log[i];
194            if current.timestamp_ms.saturating_sub(prev.timestamp_ms)
195                > self.config.read_to_act_window_ms
196            {
197                break;
198            }
199            if prev.source_tainted && matches!(prev.sink_class, SinkClass::ReadOnly) {
200                return Some(SequenceAnomaly {
201                    anomaly_type: AnomalyType::ReadThenExfil,
202                    confidence: 80,
203                    description: format!(
204                        "tainted read '{}' followed by network egress '{}'",
205                        prev.tool_name, current.tool_name
206                    ),
207                });
208            }
209        }
210        None
211    }
212
213    /// Detector 2: Privilege escalation after source taint.
214    fn detect_privilege_escalation_after_taint(&self) -> Option<SequenceAnomaly> {
215        let taint_idx = self.first_taint_idx?;
216        let current = self.call_log.last()?;
217
218        // Was the session only using low-privilege sinks before taint?
219        let max_pre_taint_sink = self.call_log[..taint_idx]
220            .iter()
221            .map(|e| e.sink_class.rank())
222            .max()
223            .unwrap_or(0);
224
225        if max_pre_taint_sink <= SinkClass::LowRiskWrite.rank()
226            && current.sink_class.rank() >= SinkClass::CodeExecution.rank()
227        {
228            return Some(SequenceAnomaly {
229                anomaly_type: AnomalyType::PrivilegeEscalationAfterTaint,
230                confidence: 90,
231                description: format!(
232                    "privilege escalation to {:?} after source taint (pre-taint max: LowRiskWrite)",
233                    current.sink_class
234                ),
235            });
236        }
237        None
238    }
239
240    /// Detector 3: Tool diversity spike after taint.
241    fn detect_tool_diversity_spike(&self) -> Option<SequenceAnomaly> {
242        let _taint_idx = self.first_taint_idx?;
243        let new_tools_after_taint = self
244            .distinct_tools
245            .len()
246            .saturating_sub(self.tools_before_taint);
247
248        if new_tools_after_taint > self.config.max_new_tools_after_taint as usize {
249            return Some(SequenceAnomaly {
250                anomaly_type: AnomalyType::ToolDiversitySpike,
251                confidence: 60,
252                description: format!(
253                    "{new_tools_after_taint} new tools after taint (max: {})",
254                    self.config.max_new_tools_after_taint
255                ),
256            });
257        }
258        None
259    }
260
261    /// Detector 4: Novel tool targeting privileged sink after untrusted content.
262    fn detect_novel_tool_after_untrusted(&self) -> Option<SequenceAnomaly> {
263        let len = self.call_log.len();
264        if len < 2 {
265            return None;
266        }
267        let current = &self.call_log[len - 1];
268        if !current.is_novel || current.sink_class.rank() < SinkClass::CodeExecution.rank() {
269            return None;
270        }
271        // Check if previous call was tainted
272        let prev = &self.call_log[len - 2];
273        if prev.source_tainted {
274            return Some(SequenceAnomaly {
275                anomaly_type: AnomalyType::NovelToolAfterUntrustedContent,
276                confidence: 85,
277                description: format!(
278                    "novel tool '{}' ({:?}) immediately after untrusted '{}'",
279                    current.tool_name, current.sink_class, prev.tool_name
280                ),
281            });
282        }
283        None
284    }
285
286    /// Detector 5: Temporal clustering of privileged actions.
287    fn detect_privileged_action_cluster(&self) -> Option<SequenceAnomaly> {
288        // Don't duplicate this specific anomaly type
289        if self
290            .anomalies
291            .iter()
292            .any(|a| a.anomaly_type == AnomalyType::PrivilegedActionCluster)
293        {
294            return None;
295        }
296        let len = self.call_log.len();
297        if len < 3 {
298            return None;
299        }
300        // Has any taint?
301        self.first_taint_idx?;
302        // Count privileged calls in the last window
303        let current_ts = self.call_log[len - 1].timestamp_ms;
304        let privileged_count = self
305            .call_log
306            .iter()
307            .rev()
308            .take_while(|e| {
309                current_ts.saturating_sub(e.timestamp_ms) <= self.config.read_to_act_window_ms
310            })
311            .filter(|e| e.sink_class.rank() >= SinkClass::FilesystemWrite.rank())
312            .count();
313
314        if privileged_count >= 3 {
315            return Some(SequenceAnomaly {
316                anomaly_type: AnomalyType::PrivilegedActionCluster,
317                confidence: 70,
318                description: format!(
319                    "{privileged_count} privileged calls within {}ms window",
320                    self.config.read_to_act_window_ms
321                ),
322            });
323        }
324        None
325    }
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    fn tracker() -> SequenceTracker {
333        SequenceTracker::new(SequenceConfig {
334            warmup_calls: 2,
335            read_to_act_window_ms: 5000,
336            max_new_tools_after_taint: 2,
337            anomaly_action: AnomalyAction::Block,
338        })
339    }
340
341    #[test]
342    fn test_no_anomaly_clean_session() {
343        let mut t = tracker();
344        let a = t.record_and_analyze("read_file", SinkClass::ReadOnly, false, 1000);
345        assert!(a.is_empty());
346        let a = t.record_and_analyze("read_file", SinkClass::ReadOnly, false, 2000);
347        assert!(a.is_empty());
348        let a = t.record_and_analyze("write_file", SinkClass::FilesystemWrite, false, 3000);
349        assert!(a.is_empty());
350    }
351
352    #[test]
353    fn test_read_then_exfil_detected() {
354        let mut t = tracker();
355        t.record_and_analyze("warmup", SinkClass::ReadOnly, false, 100);
356        t.record_and_analyze("fetch_url", SinkClass::ReadOnly, true, 1000);
357        let a = t.record_and_analyze("http_post", SinkClass::NetworkEgress, false, 2000);
358        assert!(a
359            .iter()
360            .any(|x| x.anomaly_type == AnomalyType::ReadThenExfil));
361    }
362
363    #[test]
364    fn test_privilege_escalation_after_taint() {
365        let mut t = tracker();
366        t.record_and_analyze("read_file", SinkClass::ReadOnly, false, 100);
367        t.record_and_analyze("fetch_url", SinkClass::ReadOnly, true, 1000);
368        let a = t.record_and_analyze("execute_cmd", SinkClass::CodeExecution, false, 2000);
369        assert!(a
370            .iter()
371            .any(|x| x.anomaly_type == AnomalyType::PrivilegeEscalationAfterTaint));
372    }
373
374    #[test]
375    fn test_tool_diversity_spike() {
376        let mut t = SequenceTracker::new(SequenceConfig {
377            warmup_calls: 1,
378            max_new_tools_after_taint: 1,
379            ..SequenceConfig::default()
380        });
381        t.record_and_analyze("tool_a", SinkClass::ReadOnly, true, 100); // taint fires, 1 tool before
382        t.record_and_analyze("tool_b", SinkClass::ReadOnly, false, 200); // +1 new after taint
383        let a = t.record_and_analyze("tool_c", SinkClass::ReadOnly, false, 300); // +2 new → spike
384        assert!(a
385            .iter()
386            .any(|x| x.anomaly_type == AnomalyType::ToolDiversitySpike));
387    }
388
389    #[test]
390    fn test_novel_tool_after_untrusted() {
391        let mut t = tracker();
392        t.record_and_analyze("warmup", SinkClass::ReadOnly, false, 100);
393        t.record_and_analyze("fetch_url", SinkClass::ReadOnly, true, 1000);
394        let a = t.record_and_analyze("never_seen_exec", SinkClass::CodeExecution, false, 1500);
395        assert!(a
396            .iter()
397            .any(|x| x.anomaly_type == AnomalyType::NovelToolAfterUntrustedContent));
398    }
399
400    #[test]
401    fn test_privileged_action_cluster() {
402        let mut t = tracker();
403        t.record_and_analyze("warmup", SinkClass::ReadOnly, true, 100); // taint
404        t.record_and_analyze("write_1", SinkClass::FilesystemWrite, false, 1000);
405        t.record_and_analyze("write_2", SinkClass::FilesystemWrite, false, 1500);
406        let a = t.record_and_analyze("write_3", SinkClass::FilesystemWrite, false, 2000);
407        assert!(a
408            .iter()
409            .any(|x| x.anomaly_type == AnomalyType::PrivilegedActionCluster));
410    }
411
412    #[test]
413    fn test_warmup_suppresses_early_detection() {
414        let mut t = SequenceTracker::new(SequenceConfig {
415            warmup_calls: 5,
416            ..SequenceConfig::default()
417        });
418        // Even with clear anomaly pattern, warmup suppresses
419        t.record_and_analyze("fetch", SinkClass::ReadOnly, true, 100);
420        let a = t.record_and_analyze("exec", SinkClass::CodeExecution, false, 200);
421        assert!(a.is_empty(), "Should be suppressed during warmup");
422    }
423
424    #[test]
425    fn test_max_confidence() {
426        let mut t = tracker();
427        assert_eq!(t.max_confidence(), 0);
428        t.record_and_analyze("warmup", SinkClass::ReadOnly, false, 100);
429        t.record_and_analyze("fetch", SinkClass::ReadOnly, true, 1000);
430        t.record_and_analyze("exec", SinkClass::CodeExecution, false, 1500);
431        assert!(t.max_confidence() >= 80);
432    }
433}