1use regex::RegexSet;
7use serde::{Deserialize, Serialize};
8
9use crate::classifier::OperationType;
10
11fn default_read_intent_keywords() -> Vec<String> {
19 vec![
20 "read".into(),
21 "analyze".into(),
22 "summarize".into(),
23 "review".into(),
24 "inspect".into(),
25 "view".into(),
26 "check".into(),
27 "list".into(),
28 "search".into(),
29 "query".into(),
30 "describe".into(),
31 "explain".into(),
32 ]
33}
34
35fn default_write_intent_keywords() -> Vec<String> {
40 vec![
41 "write".into(),
42 "create".into(),
43 "update".into(),
44 "modify".into(),
45 "edit".into(),
46 "deploy".into(),
47 "build".into(),
48 "generate".into(),
49 "publish".into(),
50 "upload".into(),
51 ]
52}
53
54fn default_admin_intent_keywords() -> Vec<String> {
59 vec![
60 "admin".into(),
61 "manage".into(),
62 "configure".into(),
63 "setup".into(),
64 "install".into(),
65 "maintain".into(),
66 "operate".into(),
67 "provision".into(),
68 ]
69}
70
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77pub enum IntentTier {
78 Unknown,
80 Read,
82 Write,
84 Admin,
86}
87
88fn default_suspicious_arg_patterns() -> Vec<String> {
98 vec![
99 "rm -rf".into(),
100 "rm -f".into(),
101 "mkfs".into(),
102 "dd if=".into(),
103 "chmod 777".into(),
104 "drop table".into(),
105 "drop database".into(),
106 "delete from".into(),
107 "truncate table".into(),
108 "; --".into(),
109 "' or '1'='1".into(),
110 "union select".into(),
111 "../../../".into(),
112 "..\\..\\".into(),
113 ]
114}
115
116const SUSPICIOUS_ARG_KEY_FRAGMENTS: &[&str] = &[
119 "exec", "shell", "command", "query", "sql", "eval", "script", "code", "run", "system",
120];
121
122const MAX_READ_ARG_STRING_LEN: usize = 1024;
125
126#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct AnomalyConfig {
129 #[serde(default)]
132 pub escalate_to_deny: bool,
133
134 #[serde(default = "default_read_intent_keywords")]
138 pub read_intent_keywords: Vec<String>,
139
140 #[serde(default = "default_write_intent_keywords")]
144 pub write_intent_keywords: Vec<String>,
145
146 #[serde(default = "default_admin_intent_keywords")]
149 pub admin_intent_keywords: Vec<String>,
150
151 #[serde(default = "default_suspicious_arg_patterns")]
154 pub suspicious_arg_patterns: Vec<String>,
155}
156
157impl Default for AnomalyConfig {
158 fn default() -> Self {
159 Self {
160 escalate_to_deny: true,
163 read_intent_keywords: default_read_intent_keywords(),
164 write_intent_keywords: default_write_intent_keywords(),
165 admin_intent_keywords: default_admin_intent_keywords(),
166 suspicious_arg_patterns: default_suspicious_arg_patterns(),
167 }
168 }
169}
170
171#[derive(Debug, Clone, PartialEq, Eq)]
173pub enum AnomalyResponse {
174 Normal,
176 Flagged {
178 reason: String,
180 },
181 Denied {
183 reason: String,
185 },
186}
187
188pub struct AnomalyDetector {
190 config: AnomalyConfig,
191 read_intent_regex: RegexSet,
194 write_intent_regex: RegexSet,
195 admin_intent_regex: RegexSet,
196}
197
198fn build_regex_set(keywords: &[String], label: &str) -> RegexSet {
199 let patterns: Vec<String> = keywords
200 .iter()
201 .map(|p| format!(r"(?i)\b{}\b", regex::escape(p)))
202 .collect();
203 RegexSet::new(&patterns).unwrap_or_else(|e| panic!("{label} must be valid regex atoms: {e}"))
204}
205
206impl AnomalyDetector {
207 pub fn new(config: AnomalyConfig) -> Self {
210 let read_intent_regex =
211 build_regex_set(&config.read_intent_keywords, "read_intent_keywords");
212 let write_intent_regex =
213 build_regex_set(&config.write_intent_keywords, "write_intent_keywords");
214 let admin_intent_regex =
215 build_regex_set(&config.admin_intent_keywords, "admin_intent_keywords");
216 Self {
217 config,
218 read_intent_regex,
219 write_intent_regex,
220 admin_intent_regex,
221 }
222 }
223
224 pub fn classify_intent(&self, declared_intent: &str) -> IntentTier {
227 if self.admin_intent_regex.is_match(declared_intent) {
228 IntentTier::Admin
229 } else if self.write_intent_regex.is_match(declared_intent) {
230 IntentTier::Write
231 } else if self.read_intent_regex.is_match(declared_intent) {
232 IntentTier::Read
233 } else {
234 IntentTier::Unknown
235 }
236 }
237
238 pub fn detect(
246 &self,
247 declared_intent: &str,
248 operation_type: OperationType,
249 tool_name: &str,
250 ) -> AnomalyResponse {
251 self.detect_with_args(declared_intent, operation_type, tool_name, None)
252 }
253
254 pub fn detect_with_args(
256 &self,
257 declared_intent: &str,
258 operation_type: OperationType,
259 tool_name: &str,
260 arguments: Option<&serde_json::Value>,
261 ) -> AnomalyResponse {
262 let tier = self.classify_intent(declared_intent);
263
264 let is_anomalous = match tier {
265 IntentTier::Unknown => true,
270 IntentTier::Admin => operation_type == OperationType::Delete,
273 IntentTier::Write => operation_type == OperationType::Admin,
274 IntentTier::Read => !matches!(operation_type, OperationType::Read),
275 };
276
277 if !is_anomalous {
278 if let Some(args) = arguments {
283 let raw = args.to_string().to_lowercase();
288 let text: String = raw
289 .chars()
290 .filter(|c| {
291 !c.is_control()
292 && *c != '\u{200B}'
293 && *c != '\u{200C}'
294 && *c != '\u{200D}'
295 && *c != '\u{FEFF}'
296 && *c != '\u{00AD}'
297 })
298 .collect();
299 for pattern in &self.config.suspicious_arg_patterns {
300 if text.contains(pattern.as_str()) {
301 let reason = format!(
302 "suspicious argument content in tool '{}': pattern '{}' detected",
303 tool_name, pattern
304 );
305 return if self.config.escalate_to_deny {
306 AnomalyResponse::Denied { reason }
307 } else {
308 AnomalyResponse::Flagged { reason }
309 };
310 }
311 }
312
313 if let Some(reason) = check_structural_anomalies(args, tool_name) {
315 return if self.config.escalate_to_deny {
316 AnomalyResponse::Denied { reason }
317 } else {
318 AnomalyResponse::Flagged { reason }
319 };
320 }
321 }
322 return AnomalyResponse::Normal;
323 }
324
325 let tier_label = match tier {
326 IntentTier::Unknown => "unknown (unclassified)",
327 IntentTier::Read => "read-only",
328 IntentTier::Write => "write",
329 IntentTier::Admin => "admin",
330 };
331
332 let reason = format!(
333 "session intent '{}' classified as {}, but tool '{}' classified as {:?}",
334 declared_intent, tier_label, tool_name, operation_type
335 );
336
337 tracing::warn!(
338 intent = %declared_intent,
339 tool = %tool_name,
340 operation = ?operation_type,
341 intent_tier = ?tier,
342 "behavioral anomaly detected"
343 );
344
345 if self.config.escalate_to_deny {
346 AnomalyResponse::Denied { reason }
347 } else {
348 AnomalyResponse::Flagged { reason }
349 }
350 }
351}
352
353fn check_structural_anomalies(args: &serde_json::Value, tool_name: &str) -> Option<String> {
361 let obj = args.as_object()?;
362
363 for (key, value) in obj {
364 if value.is_object() || value.is_array() {
366 return Some(format!(
367 "structural anomaly in tool '{}': argument '{}' contains a nested {} in a read session",
368 tool_name,
369 key,
370 if value.is_object() { "object" } else { "array" },
371 ));
372 }
373
374 let key_lower = key.to_lowercase();
376 for fragment in SUSPICIOUS_ARG_KEY_FRAGMENTS {
377 if key_lower.contains(fragment) {
378 return Some(format!(
379 "structural anomaly in tool '{}': argument key '{}' contains suspicious fragment '{}'",
380 tool_name, key, fragment,
381 ));
382 }
383 }
384
385 if let Some(s) = value.as_str()
387 && s.len() > MAX_READ_ARG_STRING_LEN
388 {
389 return Some(format!(
390 "structural anomaly in tool '{}': argument '{}' has a string value of {} bytes (max {})",
391 tool_name,
392 key,
393 s.len(),
394 MAX_READ_ARG_STRING_LEN,
395 ));
396 }
397 }
398
399 None
400}
401
402#[cfg(test)]
403mod tests {
404 use super::*;
405
406 #[test]
407 fn normal_read_sequence_no_anomaly() {
408 let detector = AnomalyDetector::new(AnomalyConfig::default());
409
410 let result = detector.detect(
411 "read and analyze the log files",
412 OperationType::Read,
413 "read_file",
414 );
415 assert_eq!(result, AnomalyResponse::Normal);
416
417 let result = detector.detect("summarize the report", OperationType::Read, "get_document");
418 assert_eq!(result, AnomalyResponse::Normal);
419 }
420
421 #[test]
422 fn write_in_read_only_session_flagged() {
423 let detector = AnomalyDetector::new(AnomalyConfig {
424 escalate_to_deny: false,
425 ..Default::default()
426 });
427
428 let result = detector.detect(
429 "read the configuration files",
430 OperationType::Write,
431 "write_file",
432 );
433 assert!(matches!(result, AnomalyResponse::Flagged { .. }));
434
435 let result = detector.detect(
437 "analyze the database",
438 OperationType::Delete,
439 "delete_record",
440 );
441 assert!(matches!(result, AnomalyResponse::Flagged { .. }));
442 }
443
444 #[test]
445 fn anomaly_escalation_to_deny() {
446 let detector = AnomalyDetector::new(AnomalyConfig {
447 escalate_to_deny: true,
448 ..Default::default()
449 });
450
451 let result = detector.detect("review the source code", OperationType::Write, "write_file");
452 assert!(matches!(result, AnomalyResponse::Denied { .. }));
453
454 if let AnomalyResponse::Denied { reason } = result {
455 assert!(reason.contains("review the source code"));
456 assert!(reason.contains("write_file"));
457 }
458 }
459
460 #[test]
461 fn admin_in_read_session_detected() {
462 let detector = AnomalyDetector::new(AnomalyConfig {
463 escalate_to_deny: false,
464 ..Default::default()
465 });
466
467 let result = detector.detect(
468 "check the system status",
469 OperationType::Admin,
470 "configure_settings",
471 );
472 assert!(matches!(result, AnomalyResponse::Flagged { .. }));
473 }
474
475 #[test]
478 fn classify_intent_tiers() {
479 let detector = AnomalyDetector::new(AnomalyConfig::default());
480
481 assert_eq!(detector.classify_intent("read the logs"), IntentTier::Read);
482 assert_eq!(
483 detector.classify_intent("analyze reports"),
484 IntentTier::Read
485 );
486 assert_eq!(
487 detector.classify_intent("create new user"),
488 IntentTier::Write
489 );
490 assert_eq!(
491 detector.classify_intent("deploy the app"),
492 IntentTier::Write
493 );
494 assert_eq!(
495 detector.classify_intent("manage the servers"),
496 IntentTier::Admin
497 );
498 assert_eq!(
499 detector.classify_intent("configure settings"),
500 IntentTier::Admin
501 );
502 assert_eq!(
503 detector.classify_intent("do something"),
504 IntentTier::Unknown
505 );
506 }
507
508 #[test]
509 fn admin_intent_highest_precedence() {
510 let detector = AnomalyDetector::new(AnomalyConfig::default());
511
512 assert_eq!(
514 detector.classify_intent("manage and read the system"),
515 IntentTier::Admin
516 );
517 }
518
519 #[test]
520 fn write_intent_beats_read() {
521 let detector = AnomalyDetector::new(AnomalyConfig::default());
522
523 assert_eq!(
525 detector.classify_intent("read files and create backups"),
526 IntentTier::Write
527 );
528 }
529
530 #[test]
531 fn write_intent_allows_writes_but_flags_admin() {
532 let detector = AnomalyDetector::new(AnomalyConfig {
533 escalate_to_deny: false,
534 ..Default::default()
535 });
536
537 let result = detector.detect("create new documents", OperationType::Read, "list_files");
539 assert_eq!(result, AnomalyResponse::Normal);
540
541 let result = detector.detect("create new documents", OperationType::Write, "write_file");
542 assert_eq!(result, AnomalyResponse::Normal);
543
544 let result = detector.detect("create new documents", OperationType::Delete, "delete_file");
545 assert_eq!(result, AnomalyResponse::Normal);
546
547 let result = detector.detect(
549 "create new documents",
550 OperationType::Admin,
551 "configure_settings",
552 );
553 assert!(matches!(result, AnomalyResponse::Flagged { .. }));
554 }
555
556 #[test]
557 fn admin_intent_allows_non_delete_operations() {
558 let detector = AnomalyDetector::new(AnomalyConfig {
559 escalate_to_deny: true,
560 ..Default::default()
561 });
562
563 for op in [
564 OperationType::Read,
565 OperationType::Write,
566 OperationType::Admin,
567 ] {
568 let result = detector.detect("manage the cluster", op, "any_tool");
569 assert_eq!(
570 result,
571 AnomalyResponse::Normal,
572 "admin intent should allow {op:?}"
573 );
574 }
575 }
576
577 #[test]
578 fn admin_intent_flags_delete_operations() {
579 let detector = AnomalyDetector::new(AnomalyConfig {
580 escalate_to_deny: false,
581 ..Default::default()
582 });
583
584 let result = detector.detect(
585 "manage the cluster",
586 OperationType::Delete,
587 "delete_resource",
588 );
589 assert!(
590 matches!(result, AnomalyResponse::Flagged { .. }),
591 "admin intent should flag delete operations, got: {result:?}"
592 );
593 }
594
595 #[test]
596 fn admin_intent_denies_delete_when_escalated() {
597 let detector = AnomalyDetector::new(AnomalyConfig {
598 escalate_to_deny: true,
599 ..Default::default()
600 });
601
602 let result = detector.detect(
603 "manage the cluster",
604 OperationType::Delete,
605 "delete_resource",
606 );
607 assert!(
608 matches!(result, AnomalyResponse::Denied { .. }),
609 "admin intent with escalation should deny delete operations, got: {result:?}"
610 );
611 }
612
613 #[test]
614 fn unknown_intent_flags_everything() {
615 let detector = AnomalyDetector::new(AnomalyConfig {
616 escalate_to_deny: false,
617 ..Default::default()
618 });
619
620 for op in [
621 OperationType::Read,
622 OperationType::Write,
623 OperationType::Delete,
624 OperationType::Admin,
625 ] {
626 let result = detector.detect("do something", op, "any_tool");
627 assert!(
628 matches!(result, AnomalyResponse::Flagged { .. }),
629 "unknown intent should flag {op:?}, got {result:?}"
630 );
631 }
632 }
633
634 #[test]
635 fn unknown_intent_denies_when_escalated() {
636 let detector = AnomalyDetector::new(AnomalyConfig {
637 escalate_to_deny: true,
638 ..Default::default()
639 });
640
641 for op in [
642 OperationType::Read,
643 OperationType::Write,
644 OperationType::Delete,
645 OperationType::Admin,
646 ] {
647 let result = detector.detect("do something", op, "any_tool");
648 assert!(
649 matches!(result, AnomalyResponse::Denied { .. }),
650 "unknown intent with escalation should deny {op:?}, got {result:?}"
651 );
652 }
653 }
654
655 #[test]
658 fn unknown_intent_scans_arguments_for_suspicious_patterns() {
659 let detector = AnomalyDetector::new(AnomalyConfig {
660 escalate_to_deny: false,
661 ..Default::default()
662 });
663
664 let args = serde_json::json!({"command": "rm -rf /"});
668 let result = detector.detect_with_args(
669 "do something", OperationType::Read,
671 "some_tool",
672 Some(&args),
673 );
674 assert!(
675 matches!(result, AnomalyResponse::Flagged { .. }),
676 "unknown intent should scan args for suspicious patterns, got {result:?}"
677 );
678 }
679
680 #[test]
682 fn unknown_intent_detects_structural_anomalies() {
683 let detector = AnomalyDetector::new(AnomalyConfig {
684 escalate_to_deny: false,
685 ..Default::default()
686 });
687
688 let args = serde_json::json!({"exec_command": "ls"});
690 let result = detector.detect_with_args(
691 "perform tasks", OperationType::Read,
693 "run_tool",
694 Some(&args),
695 );
696 assert!(
697 matches!(result, AnomalyResponse::Flagged { .. }),
698 "unknown intent should detect structural anomalies in args, got {result:?}"
699 );
700 }
701
702 #[test]
705 fn custom_keywords_case_insensitive() {
706 let detector = AnomalyDetector::new(AnomalyConfig::default());
707
708 assert_eq!(detector.classify_intent("READ FILES"), IntentTier::Read);
710
711 assert_eq!(detector.classify_intent("ANALYZE DATA"), IntentTier::Read);
713
714 assert_eq!(
716 detector.classify_intent("CREATE REPORTS"),
717 IntentTier::Write
718 );
719
720 assert_eq!(
722 detector.classify_intent("MANAGE SERVERS"),
723 IntentTier::Admin
724 );
725
726 assert_eq!(
728 detector.classify_intent("Read And Deploy"),
729 IntentTier::Write
730 );
731 }
732
733 #[test]
735 fn argument_evasion_destructive_args() {
736 let detector = AnomalyDetector::new(AnomalyConfig {
737 escalate_to_deny: true,
738 ..Default::default()
739 });
740 let args = serde_json::json!({"path": "/etc", "command": "rm -rf /"});
741 let result = detector.detect_with_args(
742 "read and analyze files",
743 OperationType::Read,
744 "read_file",
745 Some(&args),
746 );
747 assert!(!matches!(result, AnomalyResponse::Normal));
748 }
749
750 #[test]
752 fn argument_evasion_sql_injection() {
753 let detector = AnomalyDetector::new(AnomalyConfig::default());
754 let args = serde_json::json!({"query": "'; DROP TABLE users; --"});
755 let result = detector.detect_with_args(
756 "search the database",
757 OperationType::Read,
758 "search_records",
759 Some(&args),
760 );
761 assert!(!matches!(result, AnomalyResponse::Normal));
762 }
763
764 #[test]
768 fn configurable_patterns_trigger_detection() {
769 let detector = AnomalyDetector::new(AnomalyConfig {
770 escalate_to_deny: false,
771 suspicious_arg_patterns: vec!["super_secret_payload".into()],
772 ..Default::default()
773 });
774 let args = serde_json::json!({"data": "contains super_secret_payload here"});
775 let result = detector.detect_with_args(
776 "read the logs",
777 OperationType::Read,
778 "read_file",
779 Some(&args),
780 );
781 assert!(
782 matches!(result, AnomalyResponse::Flagged { ref reason } if reason.contains("super_secret_payload")),
783 "custom pattern should trigger flagging, got: {result:?}"
784 );
785 }
786
787 #[test]
791 fn nested_array_in_read_session_flagged() {
792 let detector = AnomalyDetector::new(AnomalyConfig {
793 escalate_to_deny: false,
794 ..Default::default()
795 });
796 let args = serde_json::json!({"files": ["a", "b"]});
797 let result = detector.detect_with_args(
798 "read the config",
799 OperationType::Read,
800 "read_file",
801 Some(&args),
802 );
803 assert!(
804 matches!(result, AnomalyResponse::Flagged { ref reason } if reason.contains("nested") && reason.contains("array")),
805 "nested array should be flagged, got: {result:?}"
806 );
807 }
808
809 #[test]
811 fn suspicious_key_in_read_session_flagged() {
812 let detector = AnomalyDetector::new(AnomalyConfig {
813 escalate_to_deny: false,
814 ..Default::default()
815 });
816 let args = serde_json::json!({"shell_command": "ls"});
817 let result = detector.detect_with_args(
818 "read the config",
819 OperationType::Read,
820 "read_file",
821 Some(&args),
822 );
823 assert!(
824 matches!(result, AnomalyResponse::Flagged { ref reason } if reason.contains("suspicious fragment")),
825 "suspicious key should be flagged, got: {result:?}"
826 );
827 }
828
829 #[test]
831 fn long_value_in_read_session_flagged() {
832 let detector = AnomalyDetector::new(AnomalyConfig {
833 escalate_to_deny: false,
834 ..Default::default()
835 });
836 let long_string = "A".repeat(1025);
837 let args = serde_json::json!({"payload": long_string});
838 let result = detector.detect_with_args(
839 "read the config",
840 OperationType::Read,
841 "read_file",
842 Some(&args),
843 );
844 assert!(
845 matches!(result, AnomalyResponse::Flagged { ref reason } if reason.contains("1025 bytes")),
846 "long value should be flagged, got: {result:?}"
847 );
848 }
849
850 #[test]
852 fn structural_checks_apply_to_admin_sessions() {
853 let detector = AnomalyDetector::new(AnomalyConfig {
854 escalate_to_deny: false,
855 ..Default::default()
856 });
857 let args = serde_json::json!({"files": ["a", "b"], "shell_command": "ls"});
858 let result = detector.detect_with_args(
859 "manage the servers",
860 OperationType::Read,
861 "read_file",
862 Some(&args),
863 );
864 assert!(
866 matches!(result, AnomalyResponse::Flagged { .. }),
867 "admin session should now trigger structural checks, got {result:?}"
868 );
869 }
870
871 #[test]
873 fn structural_checks_apply_to_write_sessions() {
874 let detector = AnomalyDetector::new(AnomalyConfig {
875 escalate_to_deny: false,
876 ..Default::default()
877 });
878 let args = serde_json::json!({"files": ["a", "b"], "shell_command": "ls"});
879 let result = detector.detect_with_args(
880 "create the documents",
881 OperationType::Read,
882 "read_file",
883 Some(&args),
884 );
885 assert!(
887 matches!(result, AnomalyResponse::Flagged { .. }),
888 "write session should now trigger structural checks, got {result:?}"
889 );
890 }
891
892 #[test]
894 fn normal_read_args_not_flagged() {
895 let detector = AnomalyDetector::new(AnomalyConfig::default());
896 let args = serde_json::json!({"path": "/etc/config", "recursive": true});
897 let result = detector.detect_with_args(
898 "read the config",
899 OperationType::Read,
900 "read_file",
901 Some(&args),
902 );
903 assert_eq!(
904 result,
905 AnomalyResponse::Normal,
906 "normal scalar args should not be flagged, got: {result:?}"
907 );
908 }
909}