1use crate::error::{M1ndError, M1ndResult};
8use crate::graph::Graph;
9use crate::types::*;
10use serde::{Deserialize, Serialize};
11use std::collections::HashSet;
12use std::io::Write;
13use std::path::Path;
14use std::time::Instant;
15
16pub const MAX_PATTERN_NODES: usize = 12;
20pub const MAX_PATTERN_EDGES: usize = 20;
22pub const MAX_ANTIBODIES: usize = 500;
24pub const MAX_MATCHES_PER_ANTIBODY: usize = 100;
26pub const MIN_SPECIFICITY: f32 = 0.15;
28pub const MIN_AUTO_EXTRACT_SPECIFICITY: f32 = 0.4;
30pub const PATTERN_MATCH_TIMEOUT_MS: u64 = 10;
32pub const TOTAL_SCAN_TIMEOUT_MS: u64 = 100;
34pub const STALE_THRESHOLD_DAYS: u64 = 30;
36pub const DUPLICATE_SIMILARITY_THRESHOLD: f32 = 0.9;
38
39#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
43pub enum AntibodySeverity {
44 Info,
46 Warning,
48 Critical,
50}
51
52#[derive(Clone, Debug, Serialize, Deserialize)]
57pub struct Antibody {
58 pub id: String,
60 pub name: String,
62 pub description: String,
64 pub pattern: AntibodyPattern,
66 pub severity: AntibodySeverity,
68 pub match_count: u32,
70 pub created_at: f64,
72 pub last_match_at: Option<f64>,
74 pub created_by: String,
76 pub source_query: String,
78 pub source_nodes: Vec<String>,
80 pub enabled: bool,
82 pub specificity: f32,
84}
85
86#[derive(Clone, Debug, Serialize, Deserialize)]
91pub struct AntibodyPattern {
92 pub nodes: Vec<PatternNode>,
94 pub edges: Vec<PatternEdge>,
96 #[serde(default)]
98 pub negative_edges: Vec<PatternEdge>,
99}
100
101#[derive(Clone, Debug, Serialize, Deserialize)]
103pub struct PatternNode {
104 pub role: String,
106 pub node_type: Option<String>,
108 #[serde(default)]
110 pub required_tags: Vec<String>,
111 pub label_contains: Option<String>,
113}
114
115#[derive(Clone, Debug, Serialize, Deserialize)]
117pub struct PatternEdge {
118 pub source_idx: usize,
120 pub target_idx: usize,
122 pub relation: Option<String>,
124}
125
126#[derive(Clone, Debug, Serialize)]
128pub struct AntibodyMatch {
129 pub antibody_id: String,
131 pub antibody_name: String,
133 pub severity: AntibodySeverity,
135 pub bound_nodes: Vec<BoundNode>,
137 pub confidence: f32,
139 pub location: Option<String>,
141}
142
143#[derive(Clone, Debug, Serialize)]
145pub struct BoundNode {
146 pub node_id: String,
148 pub label: String,
150 pub role: String,
152 pub source_path: Option<String>,
154 pub line_start: Option<u32>,
156 pub line_end: Option<u32>,
158}
159
160#[derive(Clone, Debug, Serialize)]
162pub struct AntibodyScanResult {
163 pub matches: Vec<AntibodyMatch>,
165 pub antibodies_checked: u32,
167 pub nodes_scanned: u32,
169 pub elapsed_ms: f64,
171 pub scan_scope: String,
173 pub timed_out_antibodies: Vec<String>,
175 pub auto_disabled_antibodies: Vec<String>,
177}
178
179fn ab_str_to_node_type(s: &str) -> Option<NodeType> {
183 match s.to_lowercase().as_str() {
184 "file" => Some(NodeType::File),
185 "directory" | "dir" => Some(NodeType::Directory),
186 "function" | "func" => Some(NodeType::Function),
187 "class" => Some(NodeType::Class),
188 "struct" => Some(NodeType::Struct),
189 "enum" => Some(NodeType::Enum),
190 "type" => Some(NodeType::Type),
191 "module" => Some(NodeType::Module),
192 "reference" | "ref" => Some(NodeType::Reference),
193 "concept" => Some(NodeType::Concept),
194 "material" => Some(NodeType::Material),
195 "process" => Some(NodeType::Process),
196 "product" => Some(NodeType::Product),
197 "supplier" => Some(NodeType::Supplier),
198 "regulatory" => Some(NodeType::Regulatory),
199 "system" => Some(NodeType::System),
200 "cost" => Some(NodeType::Cost),
201 _ => None,
202 }
203}
204
205fn ab_node_type_to_str(nt: NodeType) -> &'static str {
207 match nt {
208 NodeType::File => "File",
209 NodeType::Directory => "Directory",
210 NodeType::Function => "Function",
211 NodeType::Class => "Class",
212 NodeType::Struct => "Struct",
213 NodeType::Enum => "Enum",
214 NodeType::Type => "Type",
215 NodeType::Module => "Module",
216 NodeType::Reference => "Reference",
217 NodeType::Concept => "Concept",
218 NodeType::Material => "Material",
219 NodeType::Process => "Process",
220 NodeType::Product => "Product",
221 NodeType::Supplier => "Supplier",
222 NodeType::Regulatory => "Regulatory",
223 NodeType::System => "System",
224 NodeType::Cost => "Cost",
225 NodeType::Custom(_) => "Custom",
226 }
227}
228
229fn ab_node_constraint_count(node: &PatternNode) -> u32 {
231 let mut c: u32 = 0;
232 if node.node_type.is_some() {
233 c += 1;
234 }
235 if !node.required_tags.is_empty() {
236 c += 1;
237 }
238 if node.label_contains.is_some() {
239 c += 1;
240 }
241 c
242}
243
244fn ab_matches_node_constraints(graph: &Graph, node_id: NodeId, pattern_node: &PatternNode) -> bool {
246 let idx = node_id.as_usize();
247 if idx >= graph.nodes.count as usize {
248 return false;
249 }
250
251 if let Some(ref type_str) = pattern_node.node_type {
253 if let Some(expected_type) = ab_str_to_node_type(type_str) {
254 if graph.nodes.node_type[idx] != expected_type {
255 return false;
256 }
257 }
258 }
260
261 if let Some(ref substring) = pattern_node.label_contains {
263 let label = graph.strings.resolve(graph.nodes.label[idx]);
264 if !label.to_lowercase().contains(&substring.to_lowercase()) {
265 return false;
266 }
267 }
268
269 if !pattern_node.required_tags.is_empty() {
271 let node_tags = &graph.nodes.tags[idx];
272 for required_tag in &pattern_node.required_tags {
273 let tag_found = node_tags
274 .iter()
275 .any(|&t| graph.strings.resolve(t).eq_ignore_ascii_case(required_tag));
276 if !tag_found {
277 return false;
278 }
279 }
280 }
281
282 true
283}
284
285fn ab_edge_exists(
287 graph: &Graph,
288 source: NodeId,
289 target: NodeId,
290 relation: &Option<String>,
291) -> bool {
292 if !graph.finalized {
293 return false;
294 }
295 let range = graph.csr.out_range(source);
296 for i in range {
297 if graph.csr.targets[i] == target {
298 if let Some(ref rel) = relation {
299 let edge_rel = graph.strings.resolve(graph.csr.relations[i]);
300 if edge_rel.eq_ignore_ascii_case(rel) {
301 return true;
302 }
303 } else {
304 return true;
306 }
307 }
308 }
309 false
310}
311
312fn ab_any_edge_exists(graph: &Graph, source: NodeId, target: NodeId) -> bool {
314 if !graph.finalized {
315 return false;
316 }
317 let range = graph.csr.out_range(source);
318 for i in range {
319 if graph.csr.targets[i] == target {
320 return true;
321 }
322 }
323 false
324}
325
326fn ab_pick_anchor(pattern: &AntibodyPattern) -> usize {
328 let mut best_idx = 0;
329 let mut best_count = 0u32;
330 for (i, node) in pattern.nodes.iter().enumerate() {
331 let c = ab_node_constraint_count(node);
332 if c > best_count {
333 best_count = c;
334 best_idx = i;
335 }
336 }
337 best_idx
338}
339
340fn ab_outgoing_neighbors(graph: &Graph, node: NodeId) -> Vec<NodeId> {
342 if !graph.finalized {
343 return Vec::new();
344 }
345 let range = graph.csr.out_range(node);
346 let mut neighbors = Vec::with_capacity(range.len());
347 for i in range {
348 neighbors.push(graph.csr.targets[i]);
349 }
350 neighbors
351}
352
353fn ab_incoming_neighbors(graph: &Graph, node: NodeId) -> Vec<NodeId> {
355 if !graph.finalized {
356 return Vec::new();
357 }
358 let range = graph.csr.in_range(node);
359 let mut neighbors = Vec::with_capacity(range.len());
360 for i in range {
361 neighbors.push(graph.csr.rev_sources[i]);
362 }
363 neighbors
364}
365
366fn ab_connected_candidates(
368 graph: &Graph,
369 pattern: &AntibodyPattern,
370 binding: &[Option<NodeId>],
371 target_idx: usize,
372) -> Vec<NodeId> {
373 let mut candidates: Vec<NodeId> = Vec::new();
374 let mut seen = HashSet::new();
375
376 for edge in &pattern.edges {
378 if edge.target_idx == target_idx {
379 if let Some(src_node) = binding[edge.source_idx] {
380 for neighbor in ab_outgoing_neighbors(graph, src_node) {
382 if seen.insert(neighbor) {
383 candidates.push(neighbor);
384 }
385 }
386 }
387 }
388 if edge.source_idx == target_idx {
389 if let Some(tgt_node) = binding[edge.target_idx] {
390 for neighbor in ab_incoming_neighbors(graph, tgt_node) {
392 if seen.insert(neighbor) {
393 candidates.push(neighbor);
394 }
395 }
396 }
397 }
398 }
399
400 if candidates.is_empty() {
403 let n = graph.nodes.count as usize;
404 for i in 0..n {
405 let nid = NodeId::new(i as u32);
406 if seen.insert(nid) {
407 candidates.push(nid);
408 }
409 }
410 }
411
412 candidates
413}
414
415fn ab_verify_edges(graph: &Graph, edges: &[PatternEdge], binding: &[Option<NodeId>]) -> bool {
417 for edge in edges {
418 let src = match binding[edge.source_idx] {
419 Some(n) => n,
420 None => return false,
421 };
422 let tgt = match binding[edge.target_idx] {
423 Some(n) => n,
424 None => return false,
425 };
426 if !ab_edge_exists(graph, src, tgt, &edge.relation) {
427 return false;
428 }
429 }
430 true
431}
432
433fn ab_verify_negative_edges(
435 graph: &Graph,
436 negative_edges: &[PatternEdge],
437 binding: &[Option<NodeId>],
438) -> bool {
439 for edge in negative_edges {
440 let src = match binding[edge.source_idx] {
441 Some(n) => n,
442 None => continue, };
444 let tgt = match binding[edge.target_idx] {
445 Some(n) => n,
446 None => continue,
447 };
448 if edge.relation.is_some() {
449 if ab_edge_exists(graph, src, tgt, &edge.relation) {
451 return false;
452 }
453 } else {
454 if ab_any_edge_exists(graph, src, tgt) {
456 return false;
457 }
458 }
459 }
460 true
461}
462
463fn ab_dfs_match(
467 graph: &Graph,
468 pattern: &AntibodyPattern,
469 binding: &mut Vec<Option<NodeId>>,
470 order: &[usize],
471 depth: usize,
472 used_nodes: &mut HashSet<NodeId>,
473 deadline: &Instant,
474) -> bool {
475 ab_dfs_match_mode(
476 graph,
477 pattern,
478 binding,
479 order,
480 depth,
481 used_nodes,
482 deadline,
483 "substring",
484 )
485}
486
487#[allow(clippy::too_many_arguments)]
489fn ab_dfs_match_mode(
490 graph: &Graph,
491 pattern: &AntibodyPattern,
492 binding: &mut Vec<Option<NodeId>>,
493 order: &[usize],
494 depth: usize,
495 used_nodes: &mut HashSet<NodeId>,
496 deadline: &Instant,
497 match_mode: &str,
498) -> bool {
499 if depth >= order.len() {
500 return ab_verify_edges(graph, &pattern.edges, binding)
502 && ab_verify_negative_edges(graph, &pattern.negative_edges, binding);
503 }
504
505 if deadline.elapsed().as_millis() > 0 && Instant::now() >= *deadline {
507 return false;
508 }
509
510 let pat_idx = order[depth];
511
512 if binding[pat_idx].is_some() {
514 return ab_dfs_match_mode(
515 graph,
516 pattern,
517 binding,
518 order,
519 depth + 1,
520 used_nodes,
521 deadline,
522 match_mode,
523 );
524 }
525
526 let candidates = ab_connected_candidates(graph, pattern, binding, pat_idx);
527
528 for candidate in candidates {
529 if used_nodes.contains(&candidate) {
530 continue; }
532 if !ab_matches_node_constraints_mode(graph, candidate, &pattern.nodes[pat_idx], match_mode)
533 {
534 continue;
535 }
536
537 binding[pat_idx] = Some(candidate);
539 used_nodes.insert(candidate);
540
541 if ab_dfs_match_mode(
542 graph,
543 pattern,
544 binding,
545 order,
546 depth + 1,
547 used_nodes,
548 deadline,
549 match_mode,
550 ) {
551 return true;
552 }
553
554 binding[pat_idx] = None;
556 used_nodes.remove(&candidate);
557 }
558
559 false
560}
561
562fn ab_build_bound_node(graph: &Graph, node_id: NodeId, role: &str) -> BoundNode {
564 let idx = node_id.as_usize();
565 let label = graph.strings.resolve(graph.nodes.label[idx]).to_string();
566
567 let external_id = graph
569 .id_to_node
570 .iter()
571 .find(|(_, &nid)| nid == node_id)
572 .map(|(interned, _)| graph.strings.resolve(*interned).to_string())
573 .unwrap_or_else(|| format!("node_{}", idx));
574
575 let provenance = graph.resolve_node_provenance(node_id);
576
577 BoundNode {
578 node_id: external_id,
579 label,
580 role: role.to_string(),
581 source_path: provenance.source_path,
582 line_start: provenance.line_start,
583 line_end: provenance.line_end,
584 }
585}
586
587fn ab_compute_confidence(
589 graph: &Graph,
590 binding: &[Option<NodeId>],
591 pattern: &AntibodyPattern,
592) -> f32 {
593 let mut confidence: f32 = 1.0;
594 let n = graph.nodes.count as usize;
595
596 for (i, pat_node) in pattern.nodes.iter().enumerate() {
597 if let Some(node_id) = binding[i] {
598 let idx = node_id.as_usize();
599
600 if let Some(ref substring) = pat_node.label_contains {
602 let lower = substring.to_lowercase();
603 let match_count = (0..n)
604 .filter(|&j| {
605 let lbl = graph.strings.resolve(graph.nodes.label[j]);
606 lbl.to_lowercase().contains(&lower)
607 })
608 .count();
609 let ratio = match_count as f32 / n.max(1) as f32;
610 if ratio > 0.1 {
611 confidence -= 0.1;
612 }
613 }
614
615 if pat_node.node_type.is_some() {
617 let nt = graph.nodes.node_type[idx];
618 let type_count = (0..n).filter(|&j| graph.nodes.node_type[j] == nt).count();
619 let ratio = type_count as f32 / n.max(1) as f32;
620 if ratio < 0.01 {
621 confidence += 0.1;
622 }
623 }
624 }
625 }
626
627 confidence.clamp(0.1, 1.0)
628}
629
630fn ab_now() -> f64 {
632 std::time::SystemTime::now()
633 .duration_since(std::time::UNIX_EPOCH)
634 .map(|d| d.as_secs_f64())
635 .unwrap_or(0.0)
636}
637
638pub fn compute_specificity(pattern: &AntibodyPattern) -> f32 {
648 if pattern.nodes.is_empty() {
649 return 0.0;
650 }
651
652 let mut constraints: u32 = 0;
653
654 for node in &pattern.nodes {
656 constraints += ab_node_constraint_count(node);
657 }
658
659 for edge in &pattern.edges {
661 if edge.relation.is_some() {
662 constraints += 1;
663 }
664 }
665
666 constraints += pattern.negative_edges.len() as u32;
668
669 let max_constraints =
671 (pattern.nodes.len() * 3 + pattern.edges.len() + pattern.negative_edges.len()) as u32;
672
673 if max_constraints == 0 {
674 return 0.0;
675 }
676
677 constraints as f32 / max_constraints as f32
678}
679
680pub fn match_antibody(graph: &Graph, antibody: &Antibody, timeout_ms: u64) -> Vec<AntibodyMatch> {
690 if !graph.finalized || graph.nodes.count == 0 || antibody.pattern.nodes.is_empty() {
691 return Vec::new();
692 }
693
694 let deadline = Instant::now() + std::time::Duration::from_millis(timeout_ms);
695 let pattern = &antibody.pattern;
696
697 let anchor_idx = ab_pick_anchor(pattern);
699
700 let mut order: Vec<usize> = (0..pattern.nodes.len()).collect();
702 order.retain(|&i| i != anchor_idx);
704 order.sort_by(|a, b| {
705 ab_node_constraint_count(&pattern.nodes[*b])
706 .cmp(&ab_node_constraint_count(&pattern.nodes[*a]))
707 });
708 order.insert(0, anchor_idx);
709
710 let n = graph.nodes.count;
712 let mut matches = Vec::new();
713 let mut seen_bindings: HashSet<Vec<u32>> = HashSet::new();
714
715 for node_idx in 0..n {
716 if Instant::now() >= deadline {
717 break;
718 }
719
720 let candidate = NodeId::new(node_idx);
721 if !ab_matches_node_constraints(graph, candidate, &pattern.nodes[anchor_idx]) {
722 continue;
723 }
724
725 let mut binding: Vec<Option<NodeId>> = vec![None; pattern.nodes.len()];
727 binding[anchor_idx] = Some(candidate);
728 let mut used_nodes = HashSet::new();
729 used_nodes.insert(candidate);
730
731 if ab_dfs_match(
732 graph,
733 pattern,
734 &mut binding,
735 &order,
736 0,
737 &mut used_nodes,
738 &deadline,
739 ) {
740 let mut key: Vec<u32> = binding.iter().filter_map(|b| b.map(|n| n.0)).collect();
742 key.sort();
743 if !seen_bindings.insert(key) {
744 continue;
745 }
746
747 let confidence = ab_compute_confidence(graph, &binding, pattern);
748
749 let bound_nodes: Vec<BoundNode> = binding
750 .iter()
751 .enumerate()
752 .filter_map(|(i, b)| {
753 b.map(|nid| ab_build_bound_node(graph, nid, &pattern.nodes[i].role))
754 })
755 .collect();
756
757 let location = bound_nodes.first().and_then(|bn| bn.source_path.clone());
758
759 matches.push(AntibodyMatch {
760 antibody_id: antibody.id.clone(),
761 antibody_name: antibody.name.clone(),
762 severity: antibody.severity,
763 bound_nodes,
764 confidence,
765 location,
766 });
767
768 if matches.len() >= MAX_MATCHES_PER_ANTIBODY {
769 break;
770 }
771 }
772 }
773
774 matches
775}
776
777pub fn match_antibody_with_options(
787 graph: &Graph,
788 antibody: &Antibody,
789 timeout_ms: u64,
790 max_matches: usize,
791 match_mode: &str,
792 _similarity_threshold: f32,
793) -> Vec<AntibodyMatch> {
794 if !graph.finalized || graph.nodes.count == 0 || antibody.pattern.nodes.is_empty() {
795 return Vec::new();
796 }
797
798 let deadline = Instant::now() + std::time::Duration::from_millis(timeout_ms);
799 let pattern = &antibody.pattern;
800
801 let anchor_idx = ab_pick_anchor(pattern);
802
803 let mut order: Vec<usize> = (0..pattern.nodes.len()).collect();
804 order.retain(|&i| i != anchor_idx);
805 order.sort_by(|a, b| {
806 ab_node_constraint_count(&pattern.nodes[*b])
807 .cmp(&ab_node_constraint_count(&pattern.nodes[*a]))
808 });
809 order.insert(0, anchor_idx);
810
811 let n = graph.nodes.count;
812 let mut matches = Vec::new();
813 let mut seen_bindings: HashSet<Vec<u32>> = HashSet::new();
814
815 for node_idx in 0..n {
816 if Instant::now() >= deadline {
817 break;
818 }
819
820 let candidate = NodeId::new(node_idx);
821 if !ab_matches_node_constraints_mode(
822 graph,
823 candidate,
824 &pattern.nodes[anchor_idx],
825 match_mode,
826 ) {
827 continue;
828 }
829
830 let mut binding: Vec<Option<NodeId>> = vec![None; pattern.nodes.len()];
831 binding[anchor_idx] = Some(candidate);
832 let mut used_nodes = HashSet::new();
833 used_nodes.insert(candidate);
834
835 if ab_dfs_match_mode(
836 graph,
837 pattern,
838 &mut binding,
839 &order,
840 0,
841 &mut used_nodes,
842 &deadline,
843 match_mode,
844 ) {
845 let mut key: Vec<u32> = binding.iter().filter_map(|b| b.map(|n| n.0)).collect();
846 key.sort();
847 if !seen_bindings.insert(key) {
848 continue;
849 }
850
851 let confidence = ab_compute_confidence(graph, &binding, pattern);
852
853 let bound_nodes: Vec<BoundNode> = binding
854 .iter()
855 .enumerate()
856 .filter_map(|(i, b)| {
857 b.map(|nid| ab_build_bound_node(graph, nid, &pattern.nodes[i].role))
858 })
859 .collect();
860
861 let location = bound_nodes.first().and_then(|bn| bn.source_path.clone());
862
863 matches.push(AntibodyMatch {
864 antibody_id: antibody.id.clone(),
865 antibody_name: antibody.name.clone(),
866 severity: antibody.severity,
867 bound_nodes,
868 confidence,
869 location,
870 });
871
872 if matches.len() >= max_matches {
873 break;
874 }
875 }
876 }
877
878 matches
879}
880
881fn ab_matches_node_constraints_mode(
883 graph: &Graph,
884 node_id: NodeId,
885 pattern_node: &PatternNode,
886 match_mode: &str,
887) -> bool {
888 let idx = node_id.as_usize();
889 if idx >= graph.nodes.count as usize {
890 return false;
891 }
892
893 if let Some(ref type_str) = pattern_node.node_type {
895 if let Some(expected_type) = ab_str_to_node_type(type_str) {
896 if graph.nodes.node_type[idx] != expected_type {
897 return false;
898 }
899 }
900 }
901
902 if let Some(ref substring) = pattern_node.label_contains {
904 let label = graph.strings.resolve(graph.nodes.label[idx]);
905 match match_mode {
906 "exact" => {
907 if !label.eq_ignore_ascii_case(substring) {
908 return false;
909 }
910 }
911 "regex" => {
912 if !label.to_lowercase().contains(&substring.to_lowercase()) {
915 return false;
916 }
917 }
918 _ => {
919 if !label.to_lowercase().contains(&substring.to_lowercase()) {
921 return false;
922 }
923 }
924 }
925 }
926
927 if !pattern_node.required_tags.is_empty() {
929 let node_tags = &graph.nodes.tags[idx];
930 for required_tag in &pattern_node.required_tags {
931 let tag_found = node_tags
932 .iter()
933 .any(|&t| graph.strings.resolve(t).eq_ignore_ascii_case(required_tag));
934 if !tag_found {
935 return false;
936 }
937 }
938 }
939
940 true
941}
942
943#[allow(clippy::too_many_arguments)]
957pub fn scan_antibodies(
958 graph: &Graph,
959 antibodies: &mut [Antibody],
960 scope: &str,
961 last_scan_generation: u64,
962 max_matches: usize,
963 min_severity: AntibodySeverity,
964 antibody_ids: Option<&[String]>,
965 max_matches_per_antibody: usize,
966 match_mode: &str,
967 similarity_threshold: f32,
968) -> AntibodyScanResult {
969 let start = Instant::now();
970 let total_deadline = Instant::now() + std::time::Duration::from_millis(TOTAL_SCAN_TIMEOUT_MS);
971
972 let mut all_matches: Vec<AntibodyMatch> = Vec::new();
973 let mut antibodies_checked: u32 = 0;
974 let mut timed_out_antibodies: Vec<String> = Vec::new();
975 let mut auto_disabled_antibodies: Vec<String> = Vec::new();
976
977 let nodes_scanned = if scope == "changed" {
978 let gen_val = graph.generation.0;
980 if gen_val > last_scan_generation {
981 (gen_val - last_scan_generation).min(graph.nodes.count as u64) as u32
982 } else {
983 graph.nodes.count
984 }
985 } else {
986 graph.nodes.count
987 };
988
989 let severity_rank = |s: AntibodySeverity| -> u8 {
990 match s {
991 AntibodySeverity::Info => 0,
992 AntibodySeverity::Warning => 1,
993 AntibodySeverity::Critical => 2,
994 }
995 };
996
997 let min_sev_rank = severity_rank(min_severity);
998 let now = ab_now();
999
1000 for antibody in antibodies.iter_mut() {
1001 if Instant::now() >= total_deadline {
1002 break;
1003 }
1004
1005 if !antibody.enabled {
1007 continue;
1008 }
1009
1010 if severity_rank(antibody.severity) < min_sev_rank {
1012 continue;
1013 }
1014
1015 if let Some(ids) = antibody_ids {
1017 if !ids.contains(&antibody.id) {
1018 continue;
1019 }
1020 }
1021
1022 antibodies_checked += 1;
1023
1024 let before = Instant::now();
1025 let effective_max = if max_matches_per_antibody > 0 {
1026 max_matches_per_antibody
1027 } else {
1028 MAX_MATCHES_PER_ANTIBODY
1029 };
1030 let mut matches = match_antibody_with_options(
1031 graph,
1032 antibody,
1033 PATTERN_MATCH_TIMEOUT_MS,
1034 effective_max,
1035 match_mode,
1036 similarity_threshold,
1037 );
1038 let elapsed = before.elapsed().as_millis() as u64;
1039
1040 if elapsed >= PATTERN_MATCH_TIMEOUT_MS {
1041 timed_out_antibodies.push(antibody.id.clone());
1042 }
1043
1044 if matches.len() >= effective_max {
1046 antibody.enabled = false;
1047 auto_disabled_antibodies.push(antibody.id.clone());
1048 }
1049
1050 if !matches.is_empty() {
1051 antibody.match_count += matches.len() as u32;
1052 antibody.last_match_at = Some(now);
1053 }
1054
1055 let remaining = max_matches.saturating_sub(all_matches.len());
1057 matches.truncate(remaining);
1058 all_matches.extend(matches);
1059
1060 if all_matches.len() >= max_matches {
1061 break;
1062 }
1063 }
1064
1065 let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
1066
1067 AntibodyScanResult {
1068 matches: all_matches,
1069 antibodies_checked,
1070 nodes_scanned,
1071 elapsed_ms,
1072 scan_scope: scope.to_string(),
1073 timed_out_antibodies,
1074 auto_disabled_antibodies,
1075 }
1076}
1077
1078pub fn extract_antibody_from_learn(
1089 graph: &Graph,
1090 node_ids: &[NodeId],
1091 name: &str,
1092 query: &str,
1093 agent_id: &str,
1094) -> Option<Antibody> {
1095 if node_ids.is_empty() || !graph.finalized {
1096 return None;
1097 }
1098
1099 let node_set: HashSet<NodeId> = node_ids.iter().copied().collect();
1100 let mut pattern_nodes: Vec<PatternNode> = Vec::new();
1101 let mut pattern_edges: Vec<PatternEdge> = Vec::new();
1102 let mut source_nodes: Vec<String> = Vec::new();
1103
1104 let mut node_to_pat: std::collections::HashMap<NodeId, usize> =
1106 std::collections::HashMap::new();
1107
1108 for &nid in node_ids {
1109 let idx = nid.as_usize();
1110 if idx >= graph.nodes.count as usize {
1111 continue;
1112 }
1113
1114 let pat_idx = pattern_nodes.len();
1115 node_to_pat.insert(nid, pat_idx);
1116
1117 let nt = graph.nodes.node_type[idx];
1118 let label = graph.strings.resolve(graph.nodes.label[idx]);
1119
1120 let label_contains = ab_extract_discriminating_substring(graph, label);
1122
1123 let ext_id = graph
1125 .id_to_node
1126 .iter()
1127 .find(|(_, &n)| n == nid)
1128 .map(|(interned, _)| graph.strings.resolve(*interned).to_string())
1129 .unwrap_or_default();
1130 source_nodes.push(ext_id);
1131
1132 let role = format!("{}_{}", ab_node_type_to_str(nt).to_lowercase(), pat_idx);
1134
1135 let tags: Vec<String> = graph.nodes.tags[idx]
1137 .iter()
1138 .map(|&t| graph.strings.resolve(t).to_string())
1139 .collect();
1140
1141 pattern_nodes.push(PatternNode {
1142 role,
1143 node_type: Some(ab_node_type_to_str(nt).to_string()),
1144 required_tags: tags,
1145 label_contains,
1146 });
1147 }
1148
1149 for &nid in node_ids {
1151 let idx = nid.as_usize();
1152 if idx >= graph.nodes.count as usize {
1153 continue;
1154 }
1155 if let Some(&src_pat) = node_to_pat.get(&nid) {
1156 let range = graph.csr.out_range(nid);
1157 for i in range {
1158 let target = graph.csr.targets[i];
1159 if let Some(&tgt_pat) = node_to_pat.get(&target) {
1160 let relation = graph.strings.resolve(graph.csr.relations[i]).to_string();
1161 let edge_exists = pattern_edges
1163 .iter()
1164 .any(|e| e.source_idx == src_pat && e.target_idx == tgt_pat);
1165 if !edge_exists {
1166 pattern_edges.push(PatternEdge {
1167 source_idx: src_pat,
1168 target_idx: tgt_pat,
1169 relation: Some(relation),
1170 });
1171 }
1172 }
1173 }
1174 }
1175 }
1176
1177 if pattern_nodes.len() > MAX_PATTERN_NODES {
1179 pattern_nodes.truncate(MAX_PATTERN_NODES);
1180 }
1181 if pattern_edges.len() > MAX_PATTERN_EDGES {
1182 pattern_edges.truncate(MAX_PATTERN_EDGES);
1183 }
1184
1185 let pattern = AntibodyPattern {
1186 nodes: pattern_nodes,
1187 edges: pattern_edges,
1188 negative_edges: Vec::new(),
1189 };
1190
1191 let specificity = compute_specificity(&pattern);
1192 if specificity < MIN_AUTO_EXTRACT_SPECIFICITY {
1193 return None;
1194 }
1195
1196 let id = ab_generate_id();
1197
1198 Some(Antibody {
1199 id,
1200 name: name.to_string(),
1201 description: format!("Auto-extracted from learn() query: {}", query),
1202 pattern,
1203 severity: AntibodySeverity::Info,
1204 match_count: 0,
1205 created_at: ab_now(),
1206 last_match_at: None,
1207 created_by: agent_id.to_string(),
1208 source_query: query.to_string(),
1209 source_nodes,
1210 enabled: true,
1211 specificity,
1212 })
1213}
1214
1215fn ab_extract_discriminating_substring(graph: &Graph, label: &str) -> Option<String> {
1218 let delimiters = &[':', '_', '.', '/', '\\'];
1219 let segments: Vec<&str> = label
1220 .split(|c: char| delimiters.contains(&c))
1221 .filter(|s| s.len() >= 2)
1222 .collect();
1223
1224 if segments.is_empty() {
1225 if label.len() >= 2 {
1226 return Some(label.to_string());
1227 }
1228 return None;
1229 }
1230
1231 let n = graph.nodes.count as usize;
1232
1233 let mut best_segment: Option<&str> = None;
1235 let mut best_count = usize::MAX;
1236
1237 for segment in &segments {
1238 let lower = segment.to_lowercase();
1239 let count = (0..n)
1240 .filter(|&i| {
1241 let lbl = graph.strings.resolve(graph.nodes.label[i]);
1242 lbl.to_lowercase().contains(&lower)
1243 })
1244 .count();
1245
1246 if count < best_count {
1247 best_count = count;
1248 best_segment = Some(segment);
1249 }
1250 }
1251
1252 best_segment.map(|s| s.to_string())
1253}
1254
1255fn ab_generate_id() -> String {
1257 use std::time::{SystemTime, UNIX_EPOCH};
1258 let ts = SystemTime::now()
1259 .duration_since(UNIX_EPOCH)
1260 .map(|d| d.as_nanos())
1261 .unwrap_or(0);
1262 format!(
1264 "ab-{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
1265 (ts >> 32) as u32,
1266 (ts >> 16) as u16,
1267 ((ts >> 8) & 0x0FFF | 0x4000) as u16,
1268 ((ts & 0x3FFF) | 0x8000) as u16,
1269 (ts & 0xFFFFFFFFFFFF) as u64
1270 )
1271}
1272
1273pub fn pattern_similarity(a: &AntibodyPattern, b: &AntibodyPattern) -> f32 {
1278 if a.nodes.is_empty() && b.nodes.is_empty() {
1279 return 1.0;
1280 }
1281 if a.nodes.is_empty() || b.nodes.is_empty() {
1282 return 0.0;
1283 }
1284
1285 let a_types: HashSet<Option<&str>> = a.nodes.iter().map(|n| n.node_type.as_deref()).collect();
1287 let b_types: HashSet<Option<&str>> = b.nodes.iter().map(|n| n.node_type.as_deref()).collect();
1288 let type_intersection = a_types.intersection(&b_types).count();
1289 let type_union = a_types.union(&b_types).count();
1290 let type_sim = if type_union > 0 {
1291 type_intersection as f32 / type_union as f32
1292 } else {
1293 0.0
1294 };
1295
1296 let a_rels: HashSet<Option<&str>> = a.edges.iter().map(|e| e.relation.as_deref()).collect();
1298 let b_rels: HashSet<Option<&str>> = b.edges.iter().map(|e| e.relation.as_deref()).collect();
1299 let rel_intersection = a_rels.intersection(&b_rels).count();
1300 let rel_union = a_rels.union(&b_rels).count();
1301 let rel_sim = if rel_union > 0 {
1302 rel_intersection as f32 / rel_union as f32
1303 } else {
1304 1.0 };
1306
1307 let size_a = a.nodes.len() as f32;
1309 let size_b = b.nodes.len() as f32;
1310 let size_sim = size_a.min(size_b) / size_a.max(size_b);
1311
1312 let neg_sim = if a.negative_edges.is_empty() && b.negative_edges.is_empty() {
1314 1.0
1315 } else {
1316 let max_neg = a.negative_edges.len().max(b.negative_edges.len()) as f32;
1317 let min_neg = a.negative_edges.len().min(b.negative_edges.len()) as f32;
1318 min_neg / max_neg.max(1.0)
1319 };
1320
1321 0.4 * type_sim + 0.3 * rel_sim + 0.2 * size_sim + 0.1 * neg_sim
1323}
1324
1325#[derive(Serialize, Deserialize)]
1329struct AntibodyPersistence {
1330 version: u32,
1331 antibodies: Vec<Antibody>,
1332}
1333
1334pub fn save_antibodies(antibodies: &[Antibody], path: &Path) -> M1ndResult<()> {
1339 let data = AntibodyPersistence {
1340 version: 1,
1341 antibodies: antibodies.to_vec(),
1342 };
1343
1344 let json = serde_json::to_string_pretty(&data)
1345 .map_err(|e| M1ndError::PersistenceFailed(format!("antibody serialization: {}", e)))?;
1346
1347 let tmp_path = path.with_extension("json.tmp");
1349
1350 if path.exists() {
1352 let bak_path = path.with_extension("json.bak");
1353 let _ = std::fs::copy(path, &bak_path);
1354 }
1355
1356 let file = std::fs::File::create(&tmp_path)
1357 .map_err(|e| M1ndError::PersistenceFailed(format!("antibody temp file create: {}", e)))?;
1358 let mut writer = std::io::BufWriter::new(file);
1359 writer
1360 .write_all(json.as_bytes())
1361 .map_err(|e| M1ndError::PersistenceFailed(format!("antibody write: {}", e)))?;
1362 writer
1363 .flush()
1364 .map_err(|e| M1ndError::PersistenceFailed(format!("antibody flush: {}", e)))?;
1365 drop(writer);
1366
1367 std::fs::rename(&tmp_path, path)
1368 .map_err(|e| M1ndError::PersistenceFailed(format!("antibody rename: {}", e)))?;
1369
1370 Ok(())
1371}
1372
1373pub fn load_antibodies(path: &Path) -> M1ndResult<Vec<Antibody>> {
1381 if !path.exists() {
1382 return Ok(Vec::new());
1383 }
1384
1385 let content = std::fs::read_to_string(path)
1386 .map_err(|e| M1ndError::PersistenceFailed(format!("antibody read: {}", e)))?;
1387
1388 match serde_json::from_str::<AntibodyPersistence>(&content) {
1389 Ok(data) => Ok(data.antibodies),
1390 Err(e) => {
1391 eprintln!(
1392 "[m1nd] WARNING: antibodies.json parse failed: {}. Trying backup.",
1393 e
1394 );
1395 let bak_path = path.with_extension("json.bak");
1397 if bak_path.exists() {
1398 let bak_content = std::fs::read_to_string(&bak_path).map_err(|e2| {
1399 M1ndError::PersistenceFailed(format!("antibody backup read: {}", e2))
1400 })?;
1401 match serde_json::from_str::<AntibodyPersistence>(&bak_content) {
1402 Ok(data) => Ok(data.antibodies),
1403 Err(_) => {
1404 eprintln!("[m1nd] WARNING: antibody backup also failed. Starting empty.");
1405 Ok(Vec::new())
1406 }
1407 }
1408 } else {
1409 eprintln!("[m1nd] WARNING: no antibody backup found. Starting empty.");
1410 Ok(Vec::new())
1411 }
1412 }
1413 }
1414}
1415
1416#[cfg(test)]
1417mod tests {
1418 use super::*;
1419 use crate::graph::Graph;
1420 use crate::types::*;
1421
1422 fn make_antibody(pattern: AntibodyPattern) -> Antibody {
1425 Antibody {
1426 id: "test-ab-001".to_string(),
1427 name: "Test Antibody".to_string(),
1428 description: "unit test antibody".to_string(),
1429 pattern,
1430 severity: AntibodySeverity::Warning,
1431 match_count: 0,
1432 created_at: 0.0,
1433 last_match_at: None,
1434 created_by: "test".to_string(),
1435 source_query: "test".to_string(),
1436 source_nodes: Vec::new(),
1437 enabled: true,
1438 specificity: 0.5,
1439 }
1440 }
1441
1442 fn build_two_node_graph(label_a: &str, label_b: &str, relation: &str) -> Graph {
1443 let mut g = Graph::new();
1444 g.add_node("node_a", label_a, NodeType::Function, &[], 1.0, 0.5)
1445 .unwrap();
1446 g.add_node("node_b", label_b, NodeType::Module, &[], 0.8, 0.3)
1447 .unwrap();
1448 g.add_edge(
1449 NodeId::new(0),
1450 NodeId::new(1),
1451 relation,
1452 FiniteF32::new(0.9),
1453 EdgeDirection::Forward,
1454 false,
1455 FiniteF32::new(0.5),
1456 )
1457 .unwrap();
1458 g.finalize().unwrap();
1459 g
1460 }
1461
1462 #[test]
1464 fn create_antibody_single_node_pattern() {
1465 let pat = AntibodyPattern {
1466 nodes: vec![PatternNode {
1467 role: "target".to_string(),
1468 node_type: Some("function".to_string()),
1469 required_tags: Vec::new(),
1470 label_contains: Some("handle".to_string()),
1471 }],
1472 edges: Vec::new(),
1473 negative_edges: Vec::new(),
1474 };
1475 let ab = make_antibody(pat);
1476 assert!(ab.enabled);
1477 assert_eq!(ab.severity, AntibodySeverity::Warning);
1478 assert_eq!(ab.pattern.nodes.len(), 1);
1479 assert_eq!(ab.pattern.edges.len(), 0);
1480 }
1481
1482 #[test]
1484 fn scan_empty_graph_returns_no_matches() {
1485 let mut g = Graph::new();
1486 g.finalize().unwrap();
1487 let pat = AntibodyPattern {
1488 nodes: vec![PatternNode {
1489 role: "a".to_string(),
1490 node_type: None,
1491 required_tags: Vec::new(),
1492 label_contains: Some("anything".to_string()),
1493 }],
1494 edges: Vec::new(),
1495 negative_edges: Vec::new(),
1496 };
1497 let ab = make_antibody(pat);
1498 let matches = match_antibody(&g, &ab, 100);
1499 assert!(matches.is_empty());
1500 }
1501
1502 #[test]
1504 fn scan_substring_match_finds_node() {
1505 let g = build_two_node_graph("handle_request", "router_module", "calls");
1506 let pat = AntibodyPattern {
1507 nodes: vec![PatternNode {
1508 role: "entry".to_string(),
1509 node_type: None,
1510 required_tags: Vec::new(),
1511 label_contains: Some("handle".to_string()),
1512 }],
1513 edges: Vec::new(),
1514 negative_edges: Vec::new(),
1515 };
1516 let ab = make_antibody(pat);
1517 let matches = match_antibody(&g, &ab, 500);
1518 assert!(
1519 !matches.is_empty(),
1520 "should match handle_request via substring"
1521 );
1522 assert_eq!(matches[0].bound_nodes[0].label, "handle_request");
1523 }
1524
1525 #[test]
1527 fn scan_exact_mode_matches_only_exact_label() {
1528 let g = build_two_node_graph("handle_request", "handle_request_extra", "calls");
1529 let pat = AntibodyPattern {
1530 nodes: vec![PatternNode {
1531 role: "fn".to_string(),
1532 node_type: None,
1533 required_tags: Vec::new(),
1534 label_contains: Some("handle_request".to_string()),
1535 }],
1536 edges: Vec::new(),
1537 negative_edges: Vec::new(),
1538 };
1539 let ab = make_antibody(pat);
1540 let matches = match_antibody_with_options(&g, &ab, 500, 10, "exact", 0.8);
1542 assert!(!matches.is_empty());
1544 for m in &matches {
1546 for bn in &m.bound_nodes {
1547 assert_ne!(
1548 bn.label, "handle_request_extra",
1549 "exact mode should not match handle_request_extra"
1550 );
1551 }
1552 }
1553 }
1554
1555 #[test]
1557 fn specificity_all_constraints_filled() {
1558 let pat = AntibodyPattern {
1563 nodes: vec![PatternNode {
1564 role: "fn".to_string(),
1565 node_type: Some("function".to_string()),
1566 required_tags: vec!["hot".to_string()],
1567 label_contains: Some("init".to_string()),
1568 }],
1569 edges: vec![PatternEdge {
1570 source_idx: 0,
1571 target_idx: 0,
1572 relation: Some("calls".to_string()),
1573 }],
1574 negative_edges: vec![PatternEdge {
1575 source_idx: 0,
1576 target_idx: 0,
1577 relation: None,
1578 }],
1579 };
1580 let s = compute_specificity(&pat);
1581 assert!((s - 1.0).abs() < 0.01, "expected 1.0 but got {}", s);
1584 }
1585
1586 #[test]
1587 fn specificity_empty_pattern_returns_zero() {
1588 let pat = AntibodyPattern {
1589 nodes: Vec::new(),
1590 edges: Vec::new(),
1591 negative_edges: Vec::new(),
1592 };
1593 assert_eq!(compute_specificity(&pat), 0.0);
1594 }
1595
1596 #[test]
1598 fn save_and_load_antibodies_round_trip() {
1599 let tmpdir = std::env::temp_dir();
1600 let path = tmpdir.join("test_antibodies.json");
1601
1602 let pat = AntibodyPattern {
1603 nodes: vec![PatternNode {
1604 role: "r".to_string(),
1605 node_type: Some("module".to_string()),
1606 required_tags: Vec::new(),
1607 label_contains: Some("router".to_string()),
1608 }],
1609 edges: Vec::new(),
1610 negative_edges: Vec::new(),
1611 };
1612 let ab = make_antibody(pat);
1613 let antibodies = vec![ab];
1614
1615 save_antibodies(&antibodies, &path).expect("save should succeed");
1616 let loaded = load_antibodies(&path).expect("load should succeed");
1617
1618 assert_eq!(loaded.len(), 1);
1619 assert_eq!(loaded[0].id, "test-ab-001");
1620 assert_eq!(loaded[0].name, "Test Antibody");
1621 assert!(loaded[0].enabled);
1622
1623 let _ = std::fs::remove_file(&path);
1624 }
1625
1626 #[test]
1628 fn disabled_antibody_skipped_in_scan() {
1629 let g = build_two_node_graph("handle_request", "router", "calls");
1630 let pat = AntibodyPattern {
1631 nodes: vec![PatternNode {
1632 role: "n".to_string(),
1633 node_type: None,
1634 required_tags: Vec::new(),
1635 label_contains: Some("handle".to_string()),
1636 }],
1637 edges: Vec::new(),
1638 negative_edges: Vec::new(),
1639 };
1640 let mut ab = make_antibody(pat);
1641 ab.enabled = false;
1642
1643 let mut antibodies = vec![ab];
1644 let result = scan_antibodies(
1645 &g,
1646 &mut antibodies,
1647 "all",
1648 0,
1649 100,
1650 AntibodySeverity::Info,
1651 None,
1652 10,
1653 "substring",
1654 0.5,
1655 );
1656 assert_eq!(
1657 result.antibodies_checked, 0,
1658 "disabled antibody should be skipped"
1659 );
1660 assert!(result.matches.is_empty());
1661 }
1662
1663 #[test]
1665 fn negative_edge_prevents_match_when_edge_exists() {
1666 let g = build_two_node_graph("alpha", "beta", "calls");
1670
1671 let pat = AntibodyPattern {
1672 nodes: vec![
1673 PatternNode {
1674 role: "src".to_string(),
1675 node_type: None,
1676 required_tags: Vec::new(),
1677 label_contains: Some("alpha".to_string()),
1678 },
1679 PatternNode {
1680 role: "tgt".to_string(),
1681 node_type: None,
1682 required_tags: Vec::new(),
1683 label_contains: Some("beta".to_string()),
1684 },
1685 ],
1686 edges: Vec::new(),
1687 negative_edges: vec![PatternEdge {
1689 source_idx: 0,
1690 target_idx: 1,
1691 relation: None,
1692 }],
1693 };
1694 let ab = make_antibody(pat);
1695 let matches = match_antibody(&g, &ab, 500);
1696 assert!(
1698 matches.is_empty(),
1699 "negative edge should block match when edge exists"
1700 );
1701 }
1702
1703 #[test]
1705 fn pattern_similarity_identical_patterns() {
1706 let pat = AntibodyPattern {
1707 nodes: vec![PatternNode {
1708 role: "r".to_string(),
1709 node_type: Some("function".to_string()),
1710 required_tags: Vec::new(),
1711 label_contains: None,
1712 }],
1713 edges: Vec::new(),
1714 negative_edges: Vec::new(),
1715 };
1716 let sim = pattern_similarity(&pat, &pat);
1717 assert!(
1719 (sim - 1.0).abs() < 0.01,
1720 "identical patterns should have similarity ~1.0, got {}",
1721 sim
1722 );
1723 }
1724
1725 #[test]
1727 fn pattern_similarity_disjoint_node_types() {
1728 let pat_a = AntibodyPattern {
1729 nodes: vec![PatternNode {
1730 role: "r".to_string(),
1731 node_type: Some("function".to_string()),
1732 required_tags: Vec::new(),
1733 label_contains: None,
1734 }],
1735 edges: Vec::new(),
1736 negative_edges: Vec::new(),
1737 };
1738 let pat_b = AntibodyPattern {
1739 nodes: vec![PatternNode {
1740 role: "r".to_string(),
1741 node_type: Some("file".to_string()),
1742 required_tags: Vec::new(),
1743 label_contains: None,
1744 }],
1745 edges: Vec::new(),
1746 negative_edges: Vec::new(),
1747 };
1748 let sim = pattern_similarity(&pat_a, &pat_b);
1749 assert!(
1754 sim < 1.0,
1755 "disjoint node types should reduce similarity; got {}",
1756 sim
1757 );
1758 assert!(sim >= 0.0);
1759 }
1760}