arbiter_behavior/
detector.rs

1//! Behavioral anomaly detector.
2//!
3//! Analyzes the declared intent of a session against the actual operation
4//! types being performed, flagging mismatches as anomalies.
5
6use regex::RegexSet;
7use serde::{Deserialize, Serialize};
8
9use crate::classifier::OperationType;
10
11/// Default read-intent keywords. Each indicates that a session's declared intent
12/// is read-only, and any write/delete/admin operations should be flagged.
13///
14/// Rationale for each keyword:
15/// - "read", "view", "inspect" -- explicit read operations
16/// - "analyze", "summarize", "review", "explain", "describe" -- comprehension tasks that consume but don't modify
17/// - "check", "search", "query", "list" -- enumeration/lookup operations
18fn default_read_intent_keywords() -> Vec<String> {
19    vec![
20        "read".into(),
21        "analyze".into(),
22        "summarize".into(),
23        "review".into(),
24        "inspect".into(),
25        "view".into(),
26        "check".into(),
27        "list".into(),
28        "search".into(),
29        "query".into(),
30        "describe".into(),
31        "explain".into(),
32    ]
33}
34
35/// Default write-intent keywords. Sessions matching these may perform read
36/// and write operations, but admin operations are flagged.
37///
38/// Rationale: these verbs imply data mutation but not system administration.
39fn default_write_intent_keywords() -> Vec<String> {
40    vec![
41        "write".into(),
42        "create".into(),
43        "update".into(),
44        "modify".into(),
45        "edit".into(),
46        "deploy".into(),
47        "build".into(),
48        "generate".into(),
49        "publish".into(),
50        "upload".into(),
51    ]
52}
53
54/// Default admin-intent keywords. Sessions matching these may perform any
55/// operation; no anomaly is flagged regardless of operation type.
56///
57/// Rationale: these verbs imply system-level authority.
58fn default_admin_intent_keywords() -> Vec<String> {
59    vec![
60        "admin".into(),
61        "manage".into(),
62        "configure".into(),
63        "setup".into(),
64        "install".into(),
65        "maintain".into(),
66        "operate".into(),
67        "provision".into(),
68    ]
69}
70
71/// The classified privilege tier of a session's declared intent.
72/// Used to determine which operation types are anomalous.
73///
74/// Precedence: Admin > Write > Read > Unknown.
75/// If multiple keyword sets match, the highest tier wins.
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77pub enum IntentTier {
78    /// No intent keywords matched. Anomaly detection skipped.
79    Unknown,
80    /// Read-only intent: write/delete/admin ops are anomalous.
81    Read,
82    /// Write intent: admin ops are anomalous, read/write/delete are normal.
83    Write,
84    /// Admin intent: delete ops flagged, all others normal.
85    Admin,
86}
87
88/// Default suspicious argument patterns that trigger anomaly detection in read
89/// sessions. Each pattern is matched (case-insensitive) against the serialized
90/// JSON argument text.
91///
92/// Categories:
93/// - Destructive shell commands: "rm -rf", "rm -f", "mkfs", "dd if=", "chmod 777"
94/// - Destructive SQL: "drop table", "drop database", "delete from", "truncate table"
95/// - SQL injection fragments: "; --", "' or '1'='1", "union select"
96/// - Path traversal: "../../../", "..\\..\\"
97fn default_suspicious_arg_patterns() -> Vec<String> {
98    vec![
99        "rm -rf".into(),
100        "rm -f".into(),
101        "mkfs".into(),
102        "dd if=".into(),
103        "chmod 777".into(),
104        "drop table".into(),
105        "drop database".into(),
106        "delete from".into(),
107        "truncate table".into(),
108        "; --".into(),
109        "' or '1'='1".into(),
110        "union select".into(),
111        "../../../".into(),
112        "..\\..\\".into(),
113    ]
114}
115
116/// Suspicious argument key substrings. If any argument key in a read-intent
117/// session contains one of these substrings, the call is flagged.
118const SUSPICIOUS_ARG_KEY_FRAGMENTS: &[&str] = &[
119    "exec", "shell", "command", "query", "sql", "eval", "script", "code", "run", "system",
120];
121
122/// Maximum string value length (in bytes) allowed in a read-intent session
123/// before flagging as a potential payload injection.
124const MAX_READ_ARG_STRING_LEN: usize = 1024;
125
126/// Configuration for anomaly detection behavior.
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct AnomalyConfig {
129    /// Whether anomalies should escalate to deny (hard block).
130    /// If false, anomalies are logged and flagged but the request proceeds.
131    #[serde(default)]
132    pub escalate_to_deny: bool,
133
134    /// Keywords that indicate a session's declared intent is read-only.
135    /// If the intent matches any of these (case-insensitive word boundary),
136    /// then write/delete/admin operations are flagged as anomalies.
137    #[serde(default = "default_read_intent_keywords")]
138    pub read_intent_keywords: Vec<String>,
139
140    /// Keywords that indicate a session's declared intent includes writes.
141    /// Write-intent sessions may perform read and write operations, but
142    /// admin operations are flagged.
143    #[serde(default = "default_write_intent_keywords")]
144    pub write_intent_keywords: Vec<String>,
145
146    /// Keywords that indicate a session's declared intent is administrative.
147    /// Admin-intent sessions may perform any operation without anomaly flags.
148    #[serde(default = "default_admin_intent_keywords")]
149    pub admin_intent_keywords: Vec<String>,
150
151    /// Suspicious argument patterns that trigger anomaly detection in read sessions.
152    /// Matched case-insensitively against the serialized JSON argument text.
153    #[serde(default = "default_suspicious_arg_patterns")]
154    pub suspicious_arg_patterns: Vec<String>,
155}
156
157impl Default for AnomalyConfig {
158    fn default() -> Self {
159        Self {
160            // Default to deny: the less-secure default (false/flag-only) must
161            // require explicit operator opt-in, not the other way around.
162            escalate_to_deny: true,
163            read_intent_keywords: default_read_intent_keywords(),
164            write_intent_keywords: default_write_intent_keywords(),
165            admin_intent_keywords: default_admin_intent_keywords(),
166            suspicious_arg_patterns: default_suspicious_arg_patterns(),
167        }
168    }
169}
170
171/// The result of anomaly detection on a tool call.
172#[derive(Debug, Clone, PartialEq, Eq)]
173pub enum AnomalyResponse {
174    /// No anomaly detected.
175    Normal,
176    /// Anomaly detected but request proceeds (soft flag).
177    Flagged {
178        /// Description of the anomaly.
179        reason: String,
180    },
181    /// Anomaly detected and request should be denied (hard block).
182    Denied {
183        /// Description of the anomaly.
184        reason: String,
185    },
186}
187
188/// Behavioral anomaly detector.
189pub struct AnomalyDetector {
190    config: AnomalyConfig,
191    /// Pre-compiled regex sets for intent word-boundary matching.
192    /// Built from config keywords at construction time. No per-call compilation.
193    read_intent_regex: RegexSet,
194    write_intent_regex: RegexSet,
195    admin_intent_regex: RegexSet,
196}
197
198fn build_regex_set(keywords: &[String], label: &str) -> RegexSet {
199    let patterns: Vec<String> = keywords
200        .iter()
201        .map(|p| format!(r"(?i)\b{}\b", regex::escape(p)))
202        .collect();
203    RegexSet::new(&patterns).unwrap_or_else(|e| panic!("{label} must be valid regex atoms: {e}"))
204}
205
206impl AnomalyDetector {
207    /// Create a new anomaly detector with the given config.
208    /// Pre-compiles intent keywords into RegexSets for O(1) matching.
209    pub fn new(config: AnomalyConfig) -> Self {
210        let read_intent_regex =
211            build_regex_set(&config.read_intent_keywords, "read_intent_keywords");
212        let write_intent_regex =
213            build_regex_set(&config.write_intent_keywords, "write_intent_keywords");
214        let admin_intent_regex =
215            build_regex_set(&config.admin_intent_keywords, "admin_intent_keywords");
216        Self {
217            config,
218            read_intent_regex,
219            write_intent_regex,
220            admin_intent_regex,
221        }
222    }
223
224    /// Classify the declared intent into a privilege tier.
225    /// Highest matching tier wins: Admin > Write > Read > Unknown.
226    pub fn classify_intent(&self, declared_intent: &str) -> IntentTier {
227        if self.admin_intent_regex.is_match(declared_intent) {
228            IntentTier::Admin
229        } else if self.write_intent_regex.is_match(declared_intent) {
230            IntentTier::Write
231        } else if self.read_intent_regex.is_match(declared_intent) {
232            IntentTier::Read
233        } else {
234            IntentTier::Unknown
235        }
236    }
237
238    /// Detect anomalies for a tool call given the session's declared intent.
239    ///
240    /// Tiered detection:
241    /// - Admin intent: delete ops flagged, all others normal
242    /// - Write intent: admin ops flagged, everything else normal
243    /// - Read intent: write/delete/admin ops flagged
244    /// - Unknown intent: no anomaly detection
245    pub fn detect(
246        &self,
247        declared_intent: &str,
248        operation_type: OperationType,
249        tool_name: &str,
250    ) -> AnomalyResponse {
251        self.detect_with_args(declared_intent, operation_type, tool_name, None)
252    }
253
254    /// Detect anomalies with argument-level scanning.
255    pub fn detect_with_args(
256        &self,
257        declared_intent: &str,
258        operation_type: OperationType,
259        tool_name: &str,
260        arguments: Option<&serde_json::Value>,
261    ) -> AnomalyResponse {
262        let tier = self.classify_intent(declared_intent);
263
264        let is_anomalous = match tier {
265            // Unknown intent: flag ALL operations. Unclassified intents receive
266            // maximum scrutiny — less specific intent declarations should trigger
267            // more monitoring, not less. Previously returned false (no monitoring),
268            // allowing trivial bypass by declaring non-keyword intents like "do stuff".
269            IntentTier::Unknown => true,
270            // Admin intent: flag delete operations. Admin sessions are powerful
271            // and deletion should always leave an anomaly trace for forensics.
272            IntentTier::Admin => operation_type == OperationType::Delete,
273            IntentTier::Write => operation_type == OperationType::Admin,
274            IntentTier::Read => !matches!(operation_type, OperationType::Read),
275        };
276
277        if !is_anomalous {
278            // Argument scanning for destructive patterns applies to ALL tiers.
279            // Previously limited to Read/Unknown, allowing Write and Admin
280            // sessions to pass arbitrary destructive content (rm -rf, drop
281            // database, SQL injection) without any argument inspection.
282            if let Some(args) = arguments {
283                // Pattern-based scan against configurable suspicious patterns.
284                // Strip non-printing characters and zero-width Unicode before matching
285                // to resist evasion via \u200B (zero-width space), \u00AD (soft hyphen),
286                // or JSON unicode escapes that survive to_lowercase().
287                let raw = args.to_string().to_lowercase();
288                let text: String = raw
289                    .chars()
290                    .filter(|c| {
291                        !c.is_control()
292                            && *c != '\u{200B}'
293                            && *c != '\u{200C}'
294                            && *c != '\u{200D}'
295                            && *c != '\u{FEFF}'
296                            && *c != '\u{00AD}'
297                    })
298                    .collect();
299                for pattern in &self.config.suspicious_arg_patterns {
300                    if text.contains(pattern.as_str()) {
301                        let reason = format!(
302                            "suspicious argument content in tool '{}': pattern '{}' detected",
303                            tool_name, pattern
304                        );
305                        return if self.config.escalate_to_deny {
306                            AnomalyResponse::Denied { reason }
307                        } else {
308                            AnomalyResponse::Flagged { reason }
309                        };
310                    }
311                }
312
313                // Structural analysis for read/unknown-intent sessions.
314                if let Some(reason) = check_structural_anomalies(args, tool_name) {
315                    return if self.config.escalate_to_deny {
316                        AnomalyResponse::Denied { reason }
317                    } else {
318                        AnomalyResponse::Flagged { reason }
319                    };
320                }
321            }
322            return AnomalyResponse::Normal;
323        }
324
325        let tier_label = match tier {
326            IntentTier::Unknown => "unknown (unclassified)",
327            IntentTier::Read => "read-only",
328            IntentTier::Write => "write",
329            IntentTier::Admin => "admin",
330        };
331
332        let reason = format!(
333            "session intent '{}' classified as {}, but tool '{}' classified as {:?}",
334            declared_intent, tier_label, tool_name, operation_type
335        );
336
337        tracing::warn!(
338            intent = %declared_intent,
339            tool = %tool_name,
340            operation = ?operation_type,
341            intent_tier = ?tier,
342            "behavioral anomaly detected"
343        );
344
345        if self.config.escalate_to_deny {
346            AnomalyResponse::Denied { reason }
347        } else {
348            AnomalyResponse::Flagged { reason }
349        }
350    }
351}
352
353/// Check for structural anomalies in arguments for a read-intent session.
354///
355/// Read operations typically take simple scalar parameters (strings, numbers,
356/// booleans). Complex structures or known-dangerous key names suggest an
357/// attempt to smuggle write/execute semantics through a read-classified call.
358///
359/// Returns `Some(reason)` if an anomaly is found, `None` otherwise.
360fn check_structural_anomalies(args: &serde_json::Value, tool_name: &str) -> Option<String> {
361    let obj = args.as_object()?;
362
363    for (key, value) in obj {
364        // 1. Nested objects/arrays: read ops should only have scalar params.
365        if value.is_object() || value.is_array() {
366            return Some(format!(
367                "structural anomaly in tool '{}': argument '{}' contains a nested {} in a read session",
368                tool_name,
369                key,
370                if value.is_object() { "object" } else { "array" },
371            ));
372        }
373
374        // 2. Suspicious argument key names.
375        let key_lower = key.to_lowercase();
376        for fragment in SUSPICIOUS_ARG_KEY_FRAGMENTS {
377            if key_lower.contains(fragment) {
378                return Some(format!(
379                    "structural anomaly in tool '{}': argument key '{}' contains suspicious fragment '{}'",
380                    tool_name, key, fragment,
381                ));
382            }
383        }
384
385        // 3. Long string values (potential payload injection).
386        if let Some(s) = value.as_str()
387            && s.len() > MAX_READ_ARG_STRING_LEN
388        {
389            return Some(format!(
390                "structural anomaly in tool '{}': argument '{}' has a string value of {} bytes (max {})",
391                tool_name,
392                key,
393                s.len(),
394                MAX_READ_ARG_STRING_LEN,
395            ));
396        }
397    }
398
399    None
400}
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405
406    #[test]
407    fn normal_read_sequence_no_anomaly() {
408        let detector = AnomalyDetector::new(AnomalyConfig::default());
409
410        let result = detector.detect(
411            "read and analyze the log files",
412            OperationType::Read,
413            "read_file",
414        );
415        assert_eq!(result, AnomalyResponse::Normal);
416
417        let result = detector.detect("summarize the report", OperationType::Read, "get_document");
418        assert_eq!(result, AnomalyResponse::Normal);
419    }
420
421    #[test]
422    fn write_in_read_only_session_flagged() {
423        let detector = AnomalyDetector::new(AnomalyConfig {
424            escalate_to_deny: false,
425            ..Default::default()
426        });
427
428        let result = detector.detect(
429            "read the configuration files",
430            OperationType::Write,
431            "write_file",
432        );
433        assert!(matches!(result, AnomalyResponse::Flagged { .. }));
434
435        // Delete in a read session should also flag.
436        let result = detector.detect(
437            "analyze the database",
438            OperationType::Delete,
439            "delete_record",
440        );
441        assert!(matches!(result, AnomalyResponse::Flagged { .. }));
442    }
443
444    #[test]
445    fn anomaly_escalation_to_deny() {
446        let detector = AnomalyDetector::new(AnomalyConfig {
447            escalate_to_deny: true,
448            ..Default::default()
449        });
450
451        let result = detector.detect("review the source code", OperationType::Write, "write_file");
452        assert!(matches!(result, AnomalyResponse::Denied { .. }));
453
454        if let AnomalyResponse::Denied { reason } = result {
455            assert!(reason.contains("review the source code"));
456            assert!(reason.contains("write_file"));
457        }
458    }
459
460    #[test]
461    fn admin_in_read_session_detected() {
462        let detector = AnomalyDetector::new(AnomalyConfig {
463            escalate_to_deny: false,
464            ..Default::default()
465        });
466
467        let result = detector.detect(
468            "check the system status",
469            OperationType::Admin,
470            "configure_settings",
471        );
472        assert!(matches!(result, AnomalyResponse::Flagged { .. }));
473    }
474
475    // ── Tiered intent classification tests ───────────────────────────
476
477    #[test]
478    fn classify_intent_tiers() {
479        let detector = AnomalyDetector::new(AnomalyConfig::default());
480
481        assert_eq!(detector.classify_intent("read the logs"), IntentTier::Read);
482        assert_eq!(
483            detector.classify_intent("analyze reports"),
484            IntentTier::Read
485        );
486        assert_eq!(
487            detector.classify_intent("create new user"),
488            IntentTier::Write
489        );
490        assert_eq!(
491            detector.classify_intent("deploy the app"),
492            IntentTier::Write
493        );
494        assert_eq!(
495            detector.classify_intent("manage the servers"),
496            IntentTier::Admin
497        );
498        assert_eq!(
499            detector.classify_intent("configure settings"),
500            IntentTier::Admin
501        );
502        assert_eq!(
503            detector.classify_intent("do something"),
504            IntentTier::Unknown
505        );
506    }
507
508    #[test]
509    fn admin_intent_highest_precedence() {
510        let detector = AnomalyDetector::new(AnomalyConfig::default());
511
512        // "manage" (admin) + "read" (read) → admin wins
513        assert_eq!(
514            detector.classify_intent("manage and read the system"),
515            IntentTier::Admin
516        );
517    }
518
519    #[test]
520    fn write_intent_beats_read() {
521        let detector = AnomalyDetector::new(AnomalyConfig::default());
522
523        // "create" (write) + "read" (read) → write wins
524        assert_eq!(
525            detector.classify_intent("read files and create backups"),
526            IntentTier::Write
527        );
528    }
529
530    #[test]
531    fn write_intent_allows_writes_but_flags_admin() {
532        let detector = AnomalyDetector::new(AnomalyConfig {
533            escalate_to_deny: false,
534            ..Default::default()
535        });
536
537        // Write intent allows read and write operations.
538        let result = detector.detect("create new documents", OperationType::Read, "list_files");
539        assert_eq!(result, AnomalyResponse::Normal);
540
541        let result = detector.detect("create new documents", OperationType::Write, "write_file");
542        assert_eq!(result, AnomalyResponse::Normal);
543
544        let result = detector.detect("create new documents", OperationType::Delete, "delete_file");
545        assert_eq!(result, AnomalyResponse::Normal);
546
547        // But admin ops are flagged.
548        let result = detector.detect(
549            "create new documents",
550            OperationType::Admin,
551            "configure_settings",
552        );
553        assert!(matches!(result, AnomalyResponse::Flagged { .. }));
554    }
555
556    #[test]
557    fn admin_intent_allows_non_delete_operations() {
558        let detector = AnomalyDetector::new(AnomalyConfig {
559            escalate_to_deny: true,
560            ..Default::default()
561        });
562
563        for op in [
564            OperationType::Read,
565            OperationType::Write,
566            OperationType::Admin,
567        ] {
568            let result = detector.detect("manage the cluster", op, "any_tool");
569            assert_eq!(
570                result,
571                AnomalyResponse::Normal,
572                "admin intent should allow {op:?}"
573            );
574        }
575    }
576
577    #[test]
578    fn admin_intent_flags_delete_operations() {
579        let detector = AnomalyDetector::new(AnomalyConfig {
580            escalate_to_deny: false,
581            ..Default::default()
582        });
583
584        let result = detector.detect(
585            "manage the cluster",
586            OperationType::Delete,
587            "delete_resource",
588        );
589        assert!(
590            matches!(result, AnomalyResponse::Flagged { .. }),
591            "admin intent should flag delete operations, got: {result:?}"
592        );
593    }
594
595    #[test]
596    fn admin_intent_denies_delete_when_escalated() {
597        let detector = AnomalyDetector::new(AnomalyConfig {
598            escalate_to_deny: true,
599            ..Default::default()
600        });
601
602        let result = detector.detect(
603            "manage the cluster",
604            OperationType::Delete,
605            "delete_resource",
606        );
607        assert!(
608            matches!(result, AnomalyResponse::Denied { .. }),
609            "admin intent with escalation should deny delete operations, got: {result:?}"
610        );
611    }
612
613    #[test]
614    fn unknown_intent_flags_everything() {
615        let detector = AnomalyDetector::new(AnomalyConfig {
616            escalate_to_deny: false,
617            ..Default::default()
618        });
619
620        for op in [
621            OperationType::Read,
622            OperationType::Write,
623            OperationType::Delete,
624            OperationType::Admin,
625        ] {
626            let result = detector.detect("do something", op, "any_tool");
627            assert!(
628                matches!(result, AnomalyResponse::Flagged { .. }),
629                "unknown intent should flag {op:?}, got {result:?}"
630            );
631        }
632    }
633
634    #[test]
635    fn unknown_intent_denies_when_escalated() {
636        let detector = AnomalyDetector::new(AnomalyConfig {
637            escalate_to_deny: true,
638            ..Default::default()
639        });
640
641        for op in [
642            OperationType::Read,
643            OperationType::Write,
644            OperationType::Delete,
645            OperationType::Admin,
646        ] {
647            let result = detector.detect("do something", op, "any_tool");
648            assert!(
649                matches!(result, AnomalyResponse::Denied { .. }),
650                "unknown intent with escalation should deny {op:?}, got {result:?}"
651            );
652        }
653    }
654
655    /// RT-202: Unknown intent gets argument scanning (same as Read tier).
656    /// Suspicious arg patterns should be detected even when intent is unclassified.
657    #[test]
658    fn unknown_intent_scans_arguments_for_suspicious_patterns() {
659        let detector = AnomalyDetector::new(AnomalyConfig {
660            escalate_to_deny: false,
661            ..Default::default()
662        });
663
664        // Unknown intent + Read operation + suspicious "rm -rf" pattern in args.
665        // Even though the operation type alone wouldn't trigger (Read is normal),
666        // the argument scanning should catch the suspicious pattern.
667        let args = serde_json::json!({"command": "rm -rf /"});
668        let result = detector.detect_with_args(
669            "do something", // Unknown intent (no keyword match)
670            OperationType::Read,
671            "some_tool",
672            Some(&args),
673        );
674        assert!(
675            matches!(result, AnomalyResponse::Flagged { .. }),
676            "unknown intent should scan args for suspicious patterns, got {result:?}"
677        );
678    }
679
680    /// Unknown intent triggers structural anomaly detection (suspicious key names).
681    #[test]
682    fn unknown_intent_detects_structural_anomalies() {
683        let detector = AnomalyDetector::new(AnomalyConfig {
684            escalate_to_deny: false,
685            ..Default::default()
686        });
687
688        // Argument with suspicious key name "exec_command" (contains "exec" fragment).
689        let args = serde_json::json!({"exec_command": "ls"});
690        let result = detector.detect_with_args(
691            "perform tasks", // Unknown intent
692            OperationType::Read,
693            "run_tool",
694            Some(&args),
695        );
696        assert!(
697            matches!(result, AnomalyResponse::Flagged { .. }),
698            "unknown intent should detect structural anomalies in args, got {result:?}"
699        );
700    }
701
702    /// Intent classification uses case-insensitive word-boundary matching,
703    /// so uppercase keywords should still match their respective tiers.
704    #[test]
705    fn custom_keywords_case_insensitive() {
706        let detector = AnomalyDetector::new(AnomalyConfig::default());
707
708        // "READ" (uppercase) should match the read tier
709        assert_eq!(detector.classify_intent("READ FILES"), IntentTier::Read);
710
711        // "ANALYZE" (uppercase) should also match read tier
712        assert_eq!(detector.classify_intent("ANALYZE DATA"), IntentTier::Read);
713
714        // "CREATE" (uppercase) should match write tier
715        assert_eq!(
716            detector.classify_intent("CREATE REPORTS"),
717            IntentTier::Write
718        );
719
720        // "MANAGE" (uppercase) should match admin tier
721        assert_eq!(
722            detector.classify_intent("MANAGE SERVERS"),
723            IntentTier::Admin
724        );
725
726        // Mixed case
727        assert_eq!(
728            detector.classify_intent("Read And Deploy"),
729            IntentTier::Write
730        );
731    }
732
733    /// Destructive arguments in read session must be flagged.
734    #[test]
735    fn argument_evasion_destructive_args() {
736        let detector = AnomalyDetector::new(AnomalyConfig {
737            escalate_to_deny: true,
738            ..Default::default()
739        });
740        let args = serde_json::json!({"path": "/etc", "command": "rm -rf /"});
741        let result = detector.detect_with_args(
742            "read and analyze files",
743            OperationType::Read,
744            "read_file",
745            Some(&args),
746        );
747        assert!(!matches!(result, AnomalyResponse::Normal));
748    }
749
750    /// SQL injection in arguments must be flagged.
751    #[test]
752    fn argument_evasion_sql_injection() {
753        let detector = AnomalyDetector::new(AnomalyConfig::default());
754        let args = serde_json::json!({"query": "'; DROP TABLE users; --"});
755        let result = detector.detect_with_args(
756            "search the database",
757            OperationType::Read,
758            "search_records",
759            Some(&args),
760        );
761        assert!(!matches!(result, AnomalyResponse::Normal));
762    }
763
764    // ── Configurable argument patterns ──────────────────────────────
765
766    /// Custom pattern supplied via AnomalyConfig triggers detection.
767    #[test]
768    fn configurable_patterns_trigger_detection() {
769        let detector = AnomalyDetector::new(AnomalyConfig {
770            escalate_to_deny: false,
771            suspicious_arg_patterns: vec!["super_secret_payload".into()],
772            ..Default::default()
773        });
774        let args = serde_json::json!({"data": "contains super_secret_payload here"});
775        let result = detector.detect_with_args(
776            "read the logs",
777            OperationType::Read,
778            "read_file",
779            Some(&args),
780        );
781        assert!(
782            matches!(result, AnomalyResponse::Flagged { ref reason } if reason.contains("super_secret_payload")),
783            "custom pattern should trigger flagging, got: {result:?}"
784        );
785    }
786
787    // ── Structural argument analysis (read-intent only) ─────────────
788
789    /// Nested array in a read session should be flagged.
790    #[test]
791    fn nested_array_in_read_session_flagged() {
792        let detector = AnomalyDetector::new(AnomalyConfig {
793            escalate_to_deny: false,
794            ..Default::default()
795        });
796        let args = serde_json::json!({"files": ["a", "b"]});
797        let result = detector.detect_with_args(
798            "read the config",
799            OperationType::Read,
800            "read_file",
801            Some(&args),
802        );
803        assert!(
804            matches!(result, AnomalyResponse::Flagged { ref reason } if reason.contains("nested") && reason.contains("array")),
805            "nested array should be flagged, got: {result:?}"
806        );
807    }
808
809    /// Suspicious key name in a read session should be flagged.
810    #[test]
811    fn suspicious_key_in_read_session_flagged() {
812        let detector = AnomalyDetector::new(AnomalyConfig {
813            escalate_to_deny: false,
814            ..Default::default()
815        });
816        let args = serde_json::json!({"shell_command": "ls"});
817        let result = detector.detect_with_args(
818            "read the config",
819            OperationType::Read,
820            "read_file",
821            Some(&args),
822        );
823        assert!(
824            matches!(result, AnomalyResponse::Flagged { ref reason } if reason.contains("suspicious fragment")),
825            "suspicious key should be flagged, got: {result:?}"
826        );
827    }
828
829    /// String value > 1KB in a read session should be flagged.
830    #[test]
831    fn long_value_in_read_session_flagged() {
832        let detector = AnomalyDetector::new(AnomalyConfig {
833            escalate_to_deny: false,
834            ..Default::default()
835        });
836        let long_string = "A".repeat(1025);
837        let args = serde_json::json!({"payload": long_string});
838        let result = detector.detect_with_args(
839            "read the config",
840            OperationType::Read,
841            "read_file",
842            Some(&args),
843        );
844        assert!(
845            matches!(result, AnomalyResponse::Flagged { ref reason } if reason.contains("1025 bytes")),
846            "long value should be flagged, got: {result:?}"
847        );
848    }
849
850    /// Structural checks should NOT fire for admin-intent sessions.
851    #[test]
852    fn structural_checks_apply_to_admin_sessions() {
853        let detector = AnomalyDetector::new(AnomalyConfig {
854            escalate_to_deny: false,
855            ..Default::default()
856        });
857        let args = serde_json::json!({"files": ["a", "b"], "shell_command": "ls"});
858        let result = detector.detect_with_args(
859            "manage the servers",
860            OperationType::Read,
861            "read_file",
862            Some(&args),
863        );
864        // Argument scanning now applies to all tiers including admin.
865        assert!(
866            matches!(result, AnomalyResponse::Flagged { .. }),
867            "admin session should now trigger structural checks, got {result:?}"
868        );
869    }
870
871    /// Structural checks should NOT fire for write-intent sessions.
872    #[test]
873    fn structural_checks_apply_to_write_sessions() {
874        let detector = AnomalyDetector::new(AnomalyConfig {
875            escalate_to_deny: false,
876            ..Default::default()
877        });
878        let args = serde_json::json!({"files": ["a", "b"], "shell_command": "ls"});
879        let result = detector.detect_with_args(
880            "create the documents",
881            OperationType::Read,
882            "read_file",
883            Some(&args),
884        );
885        // Argument scanning now applies to write sessions too.
886        assert!(
887            matches!(result, AnomalyResponse::Flagged { .. }),
888            "write session should now trigger structural checks, got {result:?}"
889        );
890    }
891
892    /// Normal scalar arguments in a read session should NOT be flagged.
893    #[test]
894    fn normal_read_args_not_flagged() {
895        let detector = AnomalyDetector::new(AnomalyConfig::default());
896        let args = serde_json::json!({"path": "/etc/config", "recursive": true});
897        let result = detector.detect_with_args(
898            "read the config",
899            OperationType::Read,
900            "read_file",
901            Some(&args),
902        );
903        assert_eq!(
904            result,
905            AnomalyResponse::Normal,
906            "normal scalar args should not be flagged, got: {result:?}"
907        );
908    }
909}
arbiter_behavior/detector.rs

arbiter_behavior/
detector.rs