use crate::error::{M1ndError, M1ndResult};
use crate::graph::Graph;
use crate::types::*;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::io::Write;
use std::path::Path;
use std::time::Instant;
pub const MAX_PATTERN_NODES: usize = 12;
pub const MAX_PATTERN_EDGES: usize = 20;
pub const MAX_ANTIBODIES: usize = 500;
pub const MAX_MATCHES_PER_ANTIBODY: usize = 100;
pub const MIN_SPECIFICITY: f32 = 0.15;
pub const MIN_AUTO_EXTRACT_SPECIFICITY: f32 = 0.4;
pub const PATTERN_MATCH_TIMEOUT_MS: u64 = 10;
pub const TOTAL_SCAN_TIMEOUT_MS: u64 = 100;
pub const STALE_THRESHOLD_DAYS: u64 = 30;
pub const DUPLICATE_SIMILARITY_THRESHOLD: f32 = 0.9;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum AntibodySeverity {
Info,
Warning,
Critical,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Antibody {
pub id: String,
pub name: String,
pub description: String,
pub pattern: AntibodyPattern,
pub severity: AntibodySeverity,
pub match_count: u32,
pub created_at: f64,
pub last_match_at: Option<f64>,
pub created_by: String,
pub source_query: String,
pub source_nodes: Vec<String>,
pub enabled: bool,
pub specificity: f32,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AntibodyPattern {
pub nodes: Vec<PatternNode>,
pub edges: Vec<PatternEdge>,
#[serde(default)]
pub negative_edges: Vec<PatternEdge>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PatternNode {
pub role: String,
pub node_type: Option<String>,
#[serde(default)]
pub required_tags: Vec<String>,
pub label_contains: Option<String>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PatternEdge {
pub source_idx: usize,
pub target_idx: usize,
pub relation: Option<String>,
}
#[derive(Clone, Debug, Serialize)]
pub struct AntibodyMatch {
pub antibody_id: String,
pub antibody_name: String,
pub severity: AntibodySeverity,
pub bound_nodes: Vec<BoundNode>,
pub confidence: f32,
pub location: Option<String>,
}
#[derive(Clone, Debug, Serialize)]
pub struct BoundNode {
pub node_id: String,
pub label: String,
pub role: String,
pub source_path: Option<String>,
pub line_start: Option<u32>,
pub line_end: Option<u32>,
}
#[derive(Clone, Debug, Serialize)]
pub struct AntibodyScanResult {
pub matches: Vec<AntibodyMatch>,
pub antibodies_checked: u32,
pub nodes_scanned: u32,
pub elapsed_ms: f64,
pub scan_scope: String,
pub timed_out_antibodies: Vec<String>,
pub auto_disabled_antibodies: Vec<String>,
}
fn ab_str_to_node_type(s: &str) -> Option<NodeType> {
match s.to_lowercase().as_str() {
"file" => Some(NodeType::File),
"directory" | "dir" => Some(NodeType::Directory),
"function" | "func" => Some(NodeType::Function),
"class" => Some(NodeType::Class),
"struct" => Some(NodeType::Struct),
"enum" => Some(NodeType::Enum),
"type" => Some(NodeType::Type),
"module" => Some(NodeType::Module),
"reference" | "ref" => Some(NodeType::Reference),
"concept" => Some(NodeType::Concept),
"material" => Some(NodeType::Material),
"process" => Some(NodeType::Process),
"product" => Some(NodeType::Product),
"supplier" => Some(NodeType::Supplier),
"regulatory" => Some(NodeType::Regulatory),
"system" => Some(NodeType::System),
"cost" => Some(NodeType::Cost),
_ => None,
}
}
fn ab_node_type_to_str(nt: NodeType) -> &'static str {
match nt {
NodeType::File => "File",
NodeType::Directory => "Directory",
NodeType::Function => "Function",
NodeType::Class => "Class",
NodeType::Struct => "Struct",
NodeType::Enum => "Enum",
NodeType::Type => "Type",
NodeType::Module => "Module",
NodeType::Reference => "Reference",
NodeType::Concept => "Concept",
NodeType::Material => "Material",
NodeType::Process => "Process",
NodeType::Product => "Product",
NodeType::Supplier => "Supplier",
NodeType::Regulatory => "Regulatory",
NodeType::System => "System",
NodeType::Cost => "Cost",
NodeType::Custom(_) => "Custom",
}
}
fn ab_node_constraint_count(node: &PatternNode) -> u32 {
let mut c: u32 = 0;
if node.node_type.is_some() {
c += 1;
}
if !node.required_tags.is_empty() {
c += 1;
}
if node.label_contains.is_some() {
c += 1;
}
c
}
fn ab_matches_node_constraints(graph: &Graph, node_id: NodeId, pattern_node: &PatternNode) -> bool {
let idx = node_id.as_usize();
if idx >= graph.nodes.count as usize {
return false;
}
if let Some(ref type_str) = pattern_node.node_type {
if let Some(expected_type) = ab_str_to_node_type(type_str) {
if graph.nodes.node_type[idx] != expected_type {
return false;
}
}
}
if let Some(ref substring) = pattern_node.label_contains {
let label = graph.strings.resolve(graph.nodes.label[idx]);
if !label.to_lowercase().contains(&substring.to_lowercase()) {
return false;
}
}
if !pattern_node.required_tags.is_empty() {
let node_tags = &graph.nodes.tags[idx];
for required_tag in &pattern_node.required_tags {
let tag_found = node_tags
.iter()
.any(|&t| graph.strings.resolve(t).eq_ignore_ascii_case(required_tag));
if !tag_found {
return false;
}
}
}
true
}
fn ab_edge_exists(
graph: &Graph,
source: NodeId,
target: NodeId,
relation: &Option<String>,
) -> bool {
if !graph.finalized {
return false;
}
let range = graph.csr.out_range(source);
for i in range {
if graph.csr.targets[i] == target {
if let Some(ref rel) = relation {
let edge_rel = graph.strings.resolve(graph.csr.relations[i]);
if edge_rel.eq_ignore_ascii_case(rel) {
return true;
}
} else {
return true;
}
}
}
false
}
fn ab_any_edge_exists(graph: &Graph, source: NodeId, target: NodeId) -> bool {
if !graph.finalized {
return false;
}
let range = graph.csr.out_range(source);
for i in range {
if graph.csr.targets[i] == target {
return true;
}
}
false
}
fn ab_pick_anchor(pattern: &AntibodyPattern) -> usize {
let mut best_idx = 0;
let mut best_count = 0u32;
for (i, node) in pattern.nodes.iter().enumerate() {
let c = ab_node_constraint_count(node);
if c > best_count {
best_count = c;
best_idx = i;
}
}
best_idx
}
fn ab_outgoing_neighbors(graph: &Graph, node: NodeId) -> Vec<NodeId> {
if !graph.finalized {
return Vec::new();
}
let range = graph.csr.out_range(node);
let mut neighbors = Vec::with_capacity(range.len());
for i in range {
neighbors.push(graph.csr.targets[i]);
}
neighbors
}
fn ab_incoming_neighbors(graph: &Graph, node: NodeId) -> Vec<NodeId> {
if !graph.finalized {
return Vec::new();
}
let range = graph.csr.in_range(node);
let mut neighbors = Vec::with_capacity(range.len());
for i in range {
neighbors.push(graph.csr.rev_sources[i]);
}
neighbors
}
fn ab_connected_candidates(
graph: &Graph,
pattern: &AntibodyPattern,
binding: &[Option<NodeId>],
target_idx: usize,
) -> Vec<NodeId> {
let mut candidates: Vec<NodeId> = Vec::new();
let mut seen = HashSet::new();
for edge in &pattern.edges {
if edge.target_idx == target_idx {
if let Some(src_node) = binding[edge.source_idx] {
for neighbor in ab_outgoing_neighbors(graph, src_node) {
if seen.insert(neighbor) {
candidates.push(neighbor);
}
}
}
}
if edge.source_idx == target_idx {
if let Some(tgt_node) = binding[edge.target_idx] {
for neighbor in ab_incoming_neighbors(graph, tgt_node) {
if seen.insert(neighbor) {
candidates.push(neighbor);
}
}
}
}
}
if candidates.is_empty() {
let n = graph.nodes.count as usize;
for i in 0..n {
let nid = NodeId::new(i as u32);
if seen.insert(nid) {
candidates.push(nid);
}
}
}
candidates
}
fn ab_verify_edges(graph: &Graph, edges: &[PatternEdge], binding: &[Option<NodeId>]) -> bool {
for edge in edges {
let src = match binding[edge.source_idx] {
Some(n) => n,
None => return false,
};
let tgt = match binding[edge.target_idx] {
Some(n) => n,
None => return false,
};
if !ab_edge_exists(graph, src, tgt, &edge.relation) {
return false;
}
}
true
}
fn ab_verify_negative_edges(
graph: &Graph,
negative_edges: &[PatternEdge],
binding: &[Option<NodeId>],
) -> bool {
for edge in negative_edges {
let src = match binding[edge.source_idx] {
Some(n) => n,
None => continue, };
let tgt = match binding[edge.target_idx] {
Some(n) => n,
None => continue,
};
if edge.relation.is_some() {
if ab_edge_exists(graph, src, tgt, &edge.relation) {
return false;
}
} else {
if ab_any_edge_exists(graph, src, tgt) {
return false;
}
}
}
true
}
fn ab_dfs_match(
graph: &Graph,
pattern: &AntibodyPattern,
binding: &mut Vec<Option<NodeId>>,
order: &[usize],
depth: usize,
used_nodes: &mut HashSet<NodeId>,
deadline: &Instant,
) -> bool {
ab_dfs_match_mode(
graph,
pattern,
binding,
order,
depth,
used_nodes,
deadline,
"substring",
)
}
#[allow(clippy::too_many_arguments)]
fn ab_dfs_match_mode(
graph: &Graph,
pattern: &AntibodyPattern,
binding: &mut Vec<Option<NodeId>>,
order: &[usize],
depth: usize,
used_nodes: &mut HashSet<NodeId>,
deadline: &Instant,
match_mode: &str,
) -> bool {
if depth >= order.len() {
return ab_verify_edges(graph, &pattern.edges, binding)
&& ab_verify_negative_edges(graph, &pattern.negative_edges, binding);
}
if deadline.elapsed().as_millis() > 0 && Instant::now() >= *deadline {
return false;
}
let pat_idx = order[depth];
if binding[pat_idx].is_some() {
return ab_dfs_match_mode(
graph,
pattern,
binding,
order,
depth + 1,
used_nodes,
deadline,
match_mode,
);
}
let candidates = ab_connected_candidates(graph, pattern, binding, pat_idx);
for candidate in candidates {
if used_nodes.contains(&candidate) {
continue; }
if !ab_matches_node_constraints_mode(graph, candidate, &pattern.nodes[pat_idx], match_mode)
{
continue;
}
binding[pat_idx] = Some(candidate);
used_nodes.insert(candidate);
if ab_dfs_match_mode(
graph,
pattern,
binding,
order,
depth + 1,
used_nodes,
deadline,
match_mode,
) {
return true;
}
binding[pat_idx] = None;
used_nodes.remove(&candidate);
}
false
}
fn ab_build_bound_node(graph: &Graph, node_id: NodeId, role: &str) -> BoundNode {
let idx = node_id.as_usize();
let label = graph.strings.resolve(graph.nodes.label[idx]).to_string();
let external_id = graph
.id_to_node
.iter()
.find(|(_, &nid)| nid == node_id)
.map(|(interned, _)| graph.strings.resolve(*interned).to_string())
.unwrap_or_else(|| format!("node_{}", idx));
let provenance = graph.resolve_node_provenance(node_id);
BoundNode {
node_id: external_id,
label,
role: role.to_string(),
source_path: provenance.source_path,
line_start: provenance.line_start,
line_end: provenance.line_end,
}
}
fn ab_compute_confidence(
graph: &Graph,
binding: &[Option<NodeId>],
pattern: &AntibodyPattern,
) -> f32 {
let mut confidence: f32 = 1.0;
let n = graph.nodes.count as usize;
for (i, pat_node) in pattern.nodes.iter().enumerate() {
if let Some(node_id) = binding[i] {
let idx = node_id.as_usize();
if let Some(ref substring) = pat_node.label_contains {
let lower = substring.to_lowercase();
let match_count = (0..n)
.filter(|&j| {
let lbl = graph.strings.resolve(graph.nodes.label[j]);
lbl.to_lowercase().contains(&lower)
})
.count();
let ratio = match_count as f32 / n.max(1) as f32;
if ratio > 0.1 {
confidence -= 0.1;
}
}
if pat_node.node_type.is_some() {
let nt = graph.nodes.node_type[idx];
let type_count = (0..n).filter(|&j| graph.nodes.node_type[j] == nt).count();
let ratio = type_count as f32 / n.max(1) as f32;
if ratio < 0.01 {
confidence += 0.1;
}
}
}
}
confidence.clamp(0.1, 1.0)
}
fn ab_now() -> f64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs_f64())
.unwrap_or(0.0)
}
pub fn compute_specificity(pattern: &AntibodyPattern) -> f32 {
if pattern.nodes.is_empty() {
return 0.0;
}
let mut constraints: u32 = 0;
for node in &pattern.nodes {
constraints += ab_node_constraint_count(node);
}
for edge in &pattern.edges {
if edge.relation.is_some() {
constraints += 1;
}
}
constraints += pattern.negative_edges.len() as u32;
let max_constraints =
(pattern.nodes.len() * 3 + pattern.edges.len() + pattern.negative_edges.len()) as u32;
if max_constraints == 0 {
return 0.0;
}
constraints as f32 / max_constraints as f32
}
pub fn match_antibody(graph: &Graph, antibody: &Antibody, timeout_ms: u64) -> Vec<AntibodyMatch> {
if !graph.finalized || graph.nodes.count == 0 || antibody.pattern.nodes.is_empty() {
return Vec::new();
}
let deadline = Instant::now() + std::time::Duration::from_millis(timeout_ms);
let pattern = &antibody.pattern;
let anchor_idx = ab_pick_anchor(pattern);
let mut order: Vec<usize> = (0..pattern.nodes.len()).collect();
order.retain(|&i| i != anchor_idx);
order.sort_by(|a, b| {
ab_node_constraint_count(&pattern.nodes[*b])
.cmp(&ab_node_constraint_count(&pattern.nodes[*a]))
});
order.insert(0, anchor_idx);
let n = graph.nodes.count;
let mut matches = Vec::new();
let mut seen_bindings: HashSet<Vec<u32>> = HashSet::new();
for node_idx in 0..n {
if Instant::now() >= deadline {
break;
}
let candidate = NodeId::new(node_idx);
if !ab_matches_node_constraints(graph, candidate, &pattern.nodes[anchor_idx]) {
continue;
}
let mut binding: Vec<Option<NodeId>> = vec![None; pattern.nodes.len()];
binding[anchor_idx] = Some(candidate);
let mut used_nodes = HashSet::new();
used_nodes.insert(candidate);
if ab_dfs_match(
graph,
pattern,
&mut binding,
&order,
0,
&mut used_nodes,
&deadline,
) {
let mut key: Vec<u32> = binding.iter().filter_map(|b| b.map(|n| n.0)).collect();
key.sort();
if !seen_bindings.insert(key) {
continue;
}
let confidence = ab_compute_confidence(graph, &binding, pattern);
let bound_nodes: Vec<BoundNode> = binding
.iter()
.enumerate()
.filter_map(|(i, b)| {
b.map(|nid| ab_build_bound_node(graph, nid, &pattern.nodes[i].role))
})
.collect();
let location = bound_nodes.first().and_then(|bn| bn.source_path.clone());
matches.push(AntibodyMatch {
antibody_id: antibody.id.clone(),
antibody_name: antibody.name.clone(),
severity: antibody.severity,
bound_nodes,
confidence,
location,
});
if matches.len() >= MAX_MATCHES_PER_ANTIBODY {
break;
}
}
}
matches
}
pub fn match_antibody_with_options(
graph: &Graph,
antibody: &Antibody,
timeout_ms: u64,
max_matches: usize,
match_mode: &str,
_similarity_threshold: f32,
) -> Vec<AntibodyMatch> {
if !graph.finalized || graph.nodes.count == 0 || antibody.pattern.nodes.is_empty() {
return Vec::new();
}
let deadline = Instant::now() + std::time::Duration::from_millis(timeout_ms);
let pattern = &antibody.pattern;
let anchor_idx = ab_pick_anchor(pattern);
let mut order: Vec<usize> = (0..pattern.nodes.len()).collect();
order.retain(|&i| i != anchor_idx);
order.sort_by(|a, b| {
ab_node_constraint_count(&pattern.nodes[*b])
.cmp(&ab_node_constraint_count(&pattern.nodes[*a]))
});
order.insert(0, anchor_idx);
let n = graph.nodes.count;
let mut matches = Vec::new();
let mut seen_bindings: HashSet<Vec<u32>> = HashSet::new();
for node_idx in 0..n {
if Instant::now() >= deadline {
break;
}
let candidate = NodeId::new(node_idx);
if !ab_matches_node_constraints_mode(
graph,
candidate,
&pattern.nodes[anchor_idx],
match_mode,
) {
continue;
}
let mut binding: Vec<Option<NodeId>> = vec![None; pattern.nodes.len()];
binding[anchor_idx] = Some(candidate);
let mut used_nodes = HashSet::new();
used_nodes.insert(candidate);
if ab_dfs_match_mode(
graph,
pattern,
&mut binding,
&order,
0,
&mut used_nodes,
&deadline,
match_mode,
) {
let mut key: Vec<u32> = binding.iter().filter_map(|b| b.map(|n| n.0)).collect();
key.sort();
if !seen_bindings.insert(key) {
continue;
}
let confidence = ab_compute_confidence(graph, &binding, pattern);
let bound_nodes: Vec<BoundNode> = binding
.iter()
.enumerate()
.filter_map(|(i, b)| {
b.map(|nid| ab_build_bound_node(graph, nid, &pattern.nodes[i].role))
})
.collect();
let location = bound_nodes.first().and_then(|bn| bn.source_path.clone());
matches.push(AntibodyMatch {
antibody_id: antibody.id.clone(),
antibody_name: antibody.name.clone(),
severity: antibody.severity,
bound_nodes,
confidence,
location,
});
if matches.len() >= max_matches {
break;
}
}
}
matches
}
fn ab_matches_node_constraints_mode(
graph: &Graph,
node_id: NodeId,
pattern_node: &PatternNode,
match_mode: &str,
) -> bool {
let idx = node_id.as_usize();
if idx >= graph.nodes.count as usize {
return false;
}
if let Some(ref type_str) = pattern_node.node_type {
if let Some(expected_type) = ab_str_to_node_type(type_str) {
if graph.nodes.node_type[idx] != expected_type {
return false;
}
}
}
if let Some(ref substring) = pattern_node.label_contains {
let label = graph.strings.resolve(graph.nodes.label[idx]);
match match_mode {
"exact" => {
if !label.eq_ignore_ascii_case(substring) {
return false;
}
}
"regex" => {
if !label.to_lowercase().contains(&substring.to_lowercase()) {
return false;
}
}
_ => {
if !label.to_lowercase().contains(&substring.to_lowercase()) {
return false;
}
}
}
}
if !pattern_node.required_tags.is_empty() {
let node_tags = &graph.nodes.tags[idx];
for required_tag in &pattern_node.required_tags {
let tag_found = node_tags
.iter()
.any(|&t| graph.strings.resolve(t).eq_ignore_ascii_case(required_tag));
if !tag_found {
return false;
}
}
}
true
}
#[allow(clippy::too_many_arguments)]
pub fn scan_antibodies(
graph: &Graph,
antibodies: &mut [Antibody],
scope: &str,
last_scan_generation: u64,
max_matches: usize,
min_severity: AntibodySeverity,
antibody_ids: Option<&[String]>,
max_matches_per_antibody: usize,
match_mode: &str,
similarity_threshold: f32,
) -> AntibodyScanResult {
let start = Instant::now();
let total_deadline = Instant::now() + std::time::Duration::from_millis(TOTAL_SCAN_TIMEOUT_MS);
let mut all_matches: Vec<AntibodyMatch> = Vec::new();
let mut antibodies_checked: u32 = 0;
let mut timed_out_antibodies: Vec<String> = Vec::new();
let mut auto_disabled_antibodies: Vec<String> = Vec::new();
let nodes_scanned = if scope == "changed" {
let gen_val = graph.generation.0;
if gen_val > last_scan_generation {
(gen_val - last_scan_generation).min(graph.nodes.count as u64) as u32
} else {
graph.nodes.count
}
} else {
graph.nodes.count
};
let severity_rank = |s: AntibodySeverity| -> u8 {
match s {
AntibodySeverity::Info => 0,
AntibodySeverity::Warning => 1,
AntibodySeverity::Critical => 2,
}
};
let min_sev_rank = severity_rank(min_severity);
let now = ab_now();
for antibody in antibodies.iter_mut() {
if Instant::now() >= total_deadline {
break;
}
if !antibody.enabled {
continue;
}
if severity_rank(antibody.severity) < min_sev_rank {
continue;
}
if let Some(ids) = antibody_ids {
if !ids.contains(&antibody.id) {
continue;
}
}
antibodies_checked += 1;
let before = Instant::now();
let effective_max = if max_matches_per_antibody > 0 {
max_matches_per_antibody
} else {
MAX_MATCHES_PER_ANTIBODY
};
let mut matches = match_antibody_with_options(
graph,
antibody,
PATTERN_MATCH_TIMEOUT_MS,
effective_max,
match_mode,
similarity_threshold,
);
let elapsed = before.elapsed().as_millis() as u64;
if elapsed >= PATTERN_MATCH_TIMEOUT_MS {
timed_out_antibodies.push(antibody.id.clone());
}
if matches.len() >= effective_max {
antibody.enabled = false;
auto_disabled_antibodies.push(antibody.id.clone());
}
if !matches.is_empty() {
antibody.match_count += matches.len() as u32;
antibody.last_match_at = Some(now);
}
let remaining = max_matches.saturating_sub(all_matches.len());
matches.truncate(remaining);
all_matches.extend(matches);
if all_matches.len() >= max_matches {
break;
}
}
let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
AntibodyScanResult {
matches: all_matches,
antibodies_checked,
nodes_scanned,
elapsed_ms,
scan_scope: scope.to_string(),
timed_out_antibodies,
auto_disabled_antibodies,
}
}
pub fn extract_antibody_from_learn(
graph: &Graph,
node_ids: &[NodeId],
name: &str,
query: &str,
agent_id: &str,
) -> Option<Antibody> {
if node_ids.is_empty() || !graph.finalized {
return None;
}
let node_set: HashSet<NodeId> = node_ids.iter().copied().collect();
let mut pattern_nodes: Vec<PatternNode> = Vec::new();
let mut pattern_edges: Vec<PatternEdge> = Vec::new();
let mut source_nodes: Vec<String> = Vec::new();
let mut node_to_pat: std::collections::HashMap<NodeId, usize> =
std::collections::HashMap::new();
for &nid in node_ids {
let idx = nid.as_usize();
if idx >= graph.nodes.count as usize {
continue;
}
let pat_idx = pattern_nodes.len();
node_to_pat.insert(nid, pat_idx);
let nt = graph.nodes.node_type[idx];
let label = graph.strings.resolve(graph.nodes.label[idx]);
let label_contains = ab_extract_discriminating_substring(graph, label);
let ext_id = graph
.id_to_node
.iter()
.find(|(_, &n)| n == nid)
.map(|(interned, _)| graph.strings.resolve(*interned).to_string())
.unwrap_or_default();
source_nodes.push(ext_id);
let role = format!("{}_{}", ab_node_type_to_str(nt).to_lowercase(), pat_idx);
let tags: Vec<String> = graph.nodes.tags[idx]
.iter()
.map(|&t| graph.strings.resolve(t).to_string())
.collect();
pattern_nodes.push(PatternNode {
role,
node_type: Some(ab_node_type_to_str(nt).to_string()),
required_tags: tags,
label_contains,
});
}
for &nid in node_ids {
let idx = nid.as_usize();
if idx >= graph.nodes.count as usize {
continue;
}
if let Some(&src_pat) = node_to_pat.get(&nid) {
let range = graph.csr.out_range(nid);
for i in range {
let target = graph.csr.targets[i];
if let Some(&tgt_pat) = node_to_pat.get(&target) {
let relation = graph.strings.resolve(graph.csr.relations[i]).to_string();
let edge_exists = pattern_edges
.iter()
.any(|e| e.source_idx == src_pat && e.target_idx == tgt_pat);
if !edge_exists {
pattern_edges.push(PatternEdge {
source_idx: src_pat,
target_idx: tgt_pat,
relation: Some(relation),
});
}
}
}
}
}
if pattern_nodes.len() > MAX_PATTERN_NODES {
pattern_nodes.truncate(MAX_PATTERN_NODES);
}
if pattern_edges.len() > MAX_PATTERN_EDGES {
pattern_edges.truncate(MAX_PATTERN_EDGES);
}
let pattern = AntibodyPattern {
nodes: pattern_nodes,
edges: pattern_edges,
negative_edges: Vec::new(),
};
let specificity = compute_specificity(&pattern);
if specificity < MIN_AUTO_EXTRACT_SPECIFICITY {
return None;
}
let id = ab_generate_id();
Some(Antibody {
id,
name: name.to_string(),
description: format!("Auto-extracted from learn() query: {}", query),
pattern,
severity: AntibodySeverity::Info,
match_count: 0,
created_at: ab_now(),
last_match_at: None,
created_by: agent_id.to_string(),
source_query: query.to_string(),
source_nodes,
enabled: true,
specificity,
})
}
fn ab_extract_discriminating_substring(graph: &Graph, label: &str) -> Option<String> {
let delimiters = &[':', '_', '.', '/', '\\'];
let segments: Vec<&str> = label
.split(|c: char| delimiters.contains(&c))
.filter(|s| s.len() >= 2)
.collect();
if segments.is_empty() {
if label.len() >= 2 {
return Some(label.to_string());
}
return None;
}
let n = graph.nodes.count as usize;
let mut best_segment: Option<&str> = None;
let mut best_count = usize::MAX;
for segment in &segments {
let lower = segment.to_lowercase();
let count = (0..n)
.filter(|&i| {
let lbl = graph.strings.resolve(graph.nodes.label[i]);
lbl.to_lowercase().contains(&lower)
})
.count();
if count < best_count {
best_count = count;
best_segment = Some(segment);
}
}
best_segment.map(|s| s.to_string())
}
fn ab_generate_id() -> String {
use std::time::{SystemTime, UNIX_EPOCH};
let ts = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
format!(
"ab-{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
(ts >> 32) as u32,
(ts >> 16) as u16,
((ts >> 8) & 0x0FFF | 0x4000) as u16,
((ts & 0x3FFF) | 0x8000) as u16,
(ts & 0xFFFFFFFFFFFF) as u64
)
}
pub fn pattern_similarity(a: &AntibodyPattern, b: &AntibodyPattern) -> f32 {
if a.nodes.is_empty() && b.nodes.is_empty() {
return 1.0;
}
if a.nodes.is_empty() || b.nodes.is_empty() {
return 0.0;
}
let a_types: HashSet<Option<&str>> = a.nodes.iter().map(|n| n.node_type.as_deref()).collect();
let b_types: HashSet<Option<&str>> = b.nodes.iter().map(|n| n.node_type.as_deref()).collect();
let type_intersection = a_types.intersection(&b_types).count();
let type_union = a_types.union(&b_types).count();
let type_sim = if type_union > 0 {
type_intersection as f32 / type_union as f32
} else {
0.0
};
let a_rels: HashSet<Option<&str>> = a.edges.iter().map(|e| e.relation.as_deref()).collect();
let b_rels: HashSet<Option<&str>> = b.edges.iter().map(|e| e.relation.as_deref()).collect();
let rel_intersection = a_rels.intersection(&b_rels).count();
let rel_union = a_rels.union(&b_rels).count();
let rel_sim = if rel_union > 0 {
rel_intersection as f32 / rel_union as f32
} else {
1.0 };
let size_a = a.nodes.len() as f32;
let size_b = b.nodes.len() as f32;
let size_sim = size_a.min(size_b) / size_a.max(size_b);
let neg_sim = if a.negative_edges.is_empty() && b.negative_edges.is_empty() {
1.0
} else {
let max_neg = a.negative_edges.len().max(b.negative_edges.len()) as f32;
let min_neg = a.negative_edges.len().min(b.negative_edges.len()) as f32;
min_neg / max_neg.max(1.0)
};
0.4 * type_sim + 0.3 * rel_sim + 0.2 * size_sim + 0.1 * neg_sim
}
#[derive(Serialize, Deserialize)]
struct AntibodyPersistence {
version: u32,
antibodies: Vec<Antibody>,
}
pub fn save_antibodies(antibodies: &[Antibody], path: &Path) -> M1ndResult<()> {
let data = AntibodyPersistence {
version: 1,
antibodies: antibodies.to_vec(),
};
let json = serde_json::to_string_pretty(&data)
.map_err(|e| M1ndError::PersistenceFailed(format!("antibody serialization: {}", e)))?;
let tmp_path = path.with_extension("json.tmp");
if path.exists() {
let bak_path = path.with_extension("json.bak");
let _ = std::fs::copy(path, &bak_path);
}
let file = std::fs::File::create(&tmp_path)
.map_err(|e| M1ndError::PersistenceFailed(format!("antibody temp file create: {}", e)))?;
let mut writer = std::io::BufWriter::new(file);
writer
.write_all(json.as_bytes())
.map_err(|e| M1ndError::PersistenceFailed(format!("antibody write: {}", e)))?;
writer
.flush()
.map_err(|e| M1ndError::PersistenceFailed(format!("antibody flush: {}", e)))?;
drop(writer);
std::fs::rename(&tmp_path, path)
.map_err(|e| M1ndError::PersistenceFailed(format!("antibody rename: {}", e)))?;
Ok(())
}
pub fn load_antibodies(path: &Path) -> M1ndResult<Vec<Antibody>> {
if !path.exists() {
return Ok(Vec::new());
}
let content = std::fs::read_to_string(path)
.map_err(|e| M1ndError::PersistenceFailed(format!("antibody read: {}", e)))?;
match serde_json::from_str::<AntibodyPersistence>(&content) {
Ok(data) => Ok(data.antibodies),
Err(e) => {
eprintln!(
"[m1nd] WARNING: antibodies.json parse failed: {}. Trying backup.",
e
);
let bak_path = path.with_extension("json.bak");
if bak_path.exists() {
let bak_content = std::fs::read_to_string(&bak_path).map_err(|e2| {
M1ndError::PersistenceFailed(format!("antibody backup read: {}", e2))
})?;
match serde_json::from_str::<AntibodyPersistence>(&bak_content) {
Ok(data) => Ok(data.antibodies),
Err(_) => {
eprintln!("[m1nd] WARNING: antibody backup also failed. Starting empty.");
Ok(Vec::new())
}
}
} else {
eprintln!("[m1nd] WARNING: no antibody backup found. Starting empty.");
Ok(Vec::new())
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::Graph;
use crate::types::*;
fn make_antibody(pattern: AntibodyPattern) -> Antibody {
Antibody {
id: "test-ab-001".to_string(),
name: "Test Antibody".to_string(),
description: "unit test antibody".to_string(),
pattern,
severity: AntibodySeverity::Warning,
match_count: 0,
created_at: 0.0,
last_match_at: None,
created_by: "test".to_string(),
source_query: "test".to_string(),
source_nodes: Vec::new(),
enabled: true,
specificity: 0.5,
}
}
fn build_two_node_graph(label_a: &str, label_b: &str, relation: &str) -> Graph {
let mut g = Graph::new();
g.add_node("node_a", label_a, NodeType::Function, &[], 1.0, 0.5)
.unwrap();
g.add_node("node_b", label_b, NodeType::Module, &[], 0.8, 0.3)
.unwrap();
g.add_edge(
NodeId::new(0),
NodeId::new(1),
relation,
FiniteF32::new(0.9),
EdgeDirection::Forward,
false,
FiniteF32::new(0.5),
)
.unwrap();
g.finalize().unwrap();
g
}
#[test]
fn create_antibody_single_node_pattern() {
let pat = AntibodyPattern {
nodes: vec![PatternNode {
role: "target".to_string(),
node_type: Some("function".to_string()),
required_tags: Vec::new(),
label_contains: Some("handle".to_string()),
}],
edges: Vec::new(),
negative_edges: Vec::new(),
};
let ab = make_antibody(pat);
assert!(ab.enabled);
assert_eq!(ab.severity, AntibodySeverity::Warning);
assert_eq!(ab.pattern.nodes.len(), 1);
assert_eq!(ab.pattern.edges.len(), 0);
}
#[test]
fn scan_empty_graph_returns_no_matches() {
let mut g = Graph::new();
g.finalize().unwrap();
let pat = AntibodyPattern {
nodes: vec![PatternNode {
role: "a".to_string(),
node_type: None,
required_tags: Vec::new(),
label_contains: Some("anything".to_string()),
}],
edges: Vec::new(),
negative_edges: Vec::new(),
};
let ab = make_antibody(pat);
let matches = match_antibody(&g, &ab, 100);
assert!(matches.is_empty());
}
#[test]
fn scan_substring_match_finds_node() {
let g = build_two_node_graph("handle_request", "router_module", "calls");
let pat = AntibodyPattern {
nodes: vec![PatternNode {
role: "entry".to_string(),
node_type: None,
required_tags: Vec::new(),
label_contains: Some("handle".to_string()),
}],
edges: Vec::new(),
negative_edges: Vec::new(),
};
let ab = make_antibody(pat);
let matches = match_antibody(&g, &ab, 500);
assert!(
!matches.is_empty(),
"should match handle_request via substring"
);
assert_eq!(matches[0].bound_nodes[0].label, "handle_request");
}
#[test]
fn scan_exact_mode_matches_only_exact_label() {
let g = build_two_node_graph("handle_request", "handle_request_extra", "calls");
let pat = AntibodyPattern {
nodes: vec![PatternNode {
role: "fn".to_string(),
node_type: None,
required_tags: Vec::new(),
label_contains: Some("handle_request".to_string()),
}],
edges: Vec::new(),
negative_edges: Vec::new(),
};
let ab = make_antibody(pat);
let matches = match_antibody_with_options(&g, &ab, 500, 10, "exact", 0.8);
assert!(!matches.is_empty());
for m in &matches {
for bn in &m.bound_nodes {
assert_ne!(
bn.label, "handle_request_extra",
"exact mode should not match handle_request_extra"
);
}
}
}
#[test]
fn specificity_all_constraints_filled() {
let pat = AntibodyPattern {
nodes: vec![PatternNode {
role: "fn".to_string(),
node_type: Some("function".to_string()),
required_tags: vec!["hot".to_string()],
label_contains: Some("init".to_string()),
}],
edges: vec![PatternEdge {
source_idx: 0,
target_idx: 0,
relation: Some("calls".to_string()),
}],
negative_edges: vec![PatternEdge {
source_idx: 0,
target_idx: 0,
relation: None,
}],
};
let s = compute_specificity(&pat);
assert!((s - 1.0).abs() < 0.01, "expected 1.0 but got {}", s);
}
#[test]
fn specificity_empty_pattern_returns_zero() {
let pat = AntibodyPattern {
nodes: Vec::new(),
edges: Vec::new(),
negative_edges: Vec::new(),
};
assert_eq!(compute_specificity(&pat), 0.0);
}
#[test]
fn save_and_load_antibodies_round_trip() {
let tmpdir = std::env::temp_dir();
let path = tmpdir.join("test_antibodies.json");
let pat = AntibodyPattern {
nodes: vec![PatternNode {
role: "r".to_string(),
node_type: Some("module".to_string()),
required_tags: Vec::new(),
label_contains: Some("router".to_string()),
}],
edges: Vec::new(),
negative_edges: Vec::new(),
};
let ab = make_antibody(pat);
let antibodies = vec![ab];
save_antibodies(&antibodies, &path).expect("save should succeed");
let loaded = load_antibodies(&path).expect("load should succeed");
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].id, "test-ab-001");
assert_eq!(loaded[0].name, "Test Antibody");
assert!(loaded[0].enabled);
let _ = std::fs::remove_file(&path);
}
#[test]
fn disabled_antibody_skipped_in_scan() {
let g = build_two_node_graph("handle_request", "router", "calls");
let pat = AntibodyPattern {
nodes: vec![PatternNode {
role: "n".to_string(),
node_type: None,
required_tags: Vec::new(),
label_contains: Some("handle".to_string()),
}],
edges: Vec::new(),
negative_edges: Vec::new(),
};
let mut ab = make_antibody(pat);
ab.enabled = false;
let mut antibodies = vec![ab];
let result = scan_antibodies(
&g,
&mut antibodies,
"all",
0,
100,
AntibodySeverity::Info,
None,
10,
"substring",
0.5,
);
assert_eq!(
result.antibodies_checked, 0,
"disabled antibody should be skipped"
);
assert!(result.matches.is_empty());
}
#[test]
fn negative_edge_prevents_match_when_edge_exists() {
let g = build_two_node_graph("alpha", "beta", "calls");
let pat = AntibodyPattern {
nodes: vec![
PatternNode {
role: "src".to_string(),
node_type: None,
required_tags: Vec::new(),
label_contains: Some("alpha".to_string()),
},
PatternNode {
role: "tgt".to_string(),
node_type: None,
required_tags: Vec::new(),
label_contains: Some("beta".to_string()),
},
],
edges: Vec::new(),
negative_edges: vec![PatternEdge {
source_idx: 0,
target_idx: 1,
relation: None,
}],
};
let ab = make_antibody(pat);
let matches = match_antibody(&g, &ab, 500);
assert!(
matches.is_empty(),
"negative edge should block match when edge exists"
);
}
#[test]
fn pattern_similarity_identical_patterns() {
let pat = AntibodyPattern {
nodes: vec![PatternNode {
role: "r".to_string(),
node_type: Some("function".to_string()),
required_tags: Vec::new(),
label_contains: None,
}],
edges: Vec::new(),
negative_edges: Vec::new(),
};
let sim = pattern_similarity(&pat, &pat);
assert!(
(sim - 1.0).abs() < 0.01,
"identical patterns should have similarity ~1.0, got {}",
sim
);
}
#[test]
fn pattern_similarity_disjoint_node_types() {
let pat_a = AntibodyPattern {
nodes: vec![PatternNode {
role: "r".to_string(),
node_type: Some("function".to_string()),
required_tags: Vec::new(),
label_contains: None,
}],
edges: Vec::new(),
negative_edges: Vec::new(),
};
let pat_b = AntibodyPattern {
nodes: vec![PatternNode {
role: "r".to_string(),
node_type: Some("file".to_string()),
required_tags: Vec::new(),
label_contains: None,
}],
edges: Vec::new(),
negative_edges: Vec::new(),
};
let sim = pattern_similarity(&pat_a, &pat_b);
assert!(
sim < 1.0,
"disjoint node types should reduce similarity; got {}",
sim
);
assert!(sim >= 0.0);
}
}