Skip to main content

engram/graph/
mod.rs

1//! Knowledge graph visualization (RML-894 improvements)
2//!
3//! Provides:
4//! - Interactive graph visualization with vis.js
5//! - Graph clustering and community detection
6//! - Graph statistics and metrics
7//! - Export to multiple formats (HTML, DOT, JSON)
8//! - Filtering and traversal utilities
9//! - Temporal knowledge graph with validity periods (RML-1235)
10
11pub mod coactivation;
12pub mod conflicts;
13pub mod temporal;
14pub mod triplets;
15
16use chrono::{DateTime, Utc};
17use serde::{Deserialize, Serialize};
18use std::collections::{HashMap, HashSet, VecDeque};
19
20use crate::types::{CrossReference, Memory, MemoryId};
21
22/// Graph node
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct GraphNode {
25    pub id: MemoryId,
26    pub label: String,
27    pub memory_type: String,
28    pub importance: f32,
29    pub tags: Vec<String>,
30}
31
32/// Graph edge
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct GraphEdge {
35    pub from: MemoryId,
36    pub to: MemoryId,
37    pub edge_type: String,
38    pub score: f32,
39    pub confidence: f32,
40}
41
42/// Knowledge graph structure
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct KnowledgeGraph {
45    pub nodes: Vec<GraphNode>,
46    pub edges: Vec<GraphEdge>,
47}
48
49impl KnowledgeGraph {
50    /// Create graph from memories and cross-references
51    pub fn from_data(memories: &[Memory], crossrefs: &[CrossReference]) -> Self {
52        let nodes: Vec<GraphNode> = memories
53            .iter()
54            .map(|m| GraphNode {
55                id: m.id,
56                label: truncate_label(&m.content, 50),
57                memory_type: m.memory_type.as_str().to_string(),
58                importance: m.importance,
59                tags: m.tags.clone(),
60            })
61            .collect();
62
63        let memory_ids: std::collections::HashSet<MemoryId> =
64            memories.iter().map(|m| m.id).collect();
65
66        let edges: Vec<GraphEdge> = crossrefs
67            .iter()
68            .filter(|cr| memory_ids.contains(&cr.from_id) && memory_ids.contains(&cr.to_id))
69            .map(|cr| GraphEdge {
70                from: cr.from_id,
71                to: cr.to_id,
72                edge_type: cr.edge_type.as_str().to_string(),
73                score: cr.score,
74                confidence: cr.confidence,
75            })
76            .collect();
77
78        Self { nodes, edges }
79    }
80
81    /// Export as vis.js compatible JSON
82    pub fn to_visjs_json(&self) -> serde_json::Value {
83        let nodes: Vec<serde_json::Value> = self
84            .nodes
85            .iter()
86            .map(|n| {
87                serde_json::json!({
88                    "id": n.id,
89                    "label": n.label,
90                    "group": n.memory_type,
91                    "value": (n.importance * 10.0) as i32 + 5,
92                    "title": format!("Type: {}\nTags: {}", n.memory_type, n.tags.join(", "))
93                })
94            })
95            .collect();
96
97        let edges: Vec<serde_json::Value> = self
98            .edges
99            .iter()
100            .map(|e| {
101                serde_json::json!({
102                    "from": e.from,
103                    "to": e.to,
104                    "label": e.edge_type,
105                    "value": (e.score * e.confidence * 5.0) as i32 + 1,
106                    "title": format!("Score: {:.2}, Confidence: {:.2}", e.score, e.confidence)
107                })
108            })
109            .collect();
110
111        serde_json::json!({
112            "nodes": nodes,
113            "edges": edges
114        })
115    }
116
117    /// Export as standalone HTML with vis.js
118    pub fn to_html(&self) -> String {
119        let graph_data = self.to_visjs_json();
120
121        format!(
122            r#"<!DOCTYPE html>
123<html>
124<head>
125    <title>Engram Knowledge Graph</title>
126    <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
127    <style>
128        body {{ margin: 0; padding: 0; font-family: system-ui, sans-serif; }}
129        #graph {{ width: 100vw; height: 100vh; }}
130        #controls {{
131            position: absolute;
132            top: 10px;
133            left: 10px;
134            background: white;
135            padding: 10px;
136            border-radius: 8px;
137            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
138        }}
139        #search {{ padding: 8px; width: 200px; border: 1px solid #ddd; border-radius: 4px; }}
140        .legend {{ display: flex; gap: 10px; margin-top: 10px; flex-wrap: wrap; }}
141        .legend-item {{ display: flex; align-items: center; gap: 5px; font-size: 12px; }}
142        .legend-dot {{ width: 12px; height: 12px; border-radius: 50%; }}
143    </style>
144</head>
145<body>
146    <div id="controls">
147        <input type="text" id="search" placeholder="Search nodes...">
148        <div class="legend">
149            <div class="legend-item"><span class="legend-dot" style="background: #97C2FC;"></span> note</div>
150            <div class="legend-item"><span class="legend-dot" style="background: #FFFF00;"></span> todo</div>
151            <div class="legend-item"><span class="legend-dot" style="background: #FB7E81;"></span> issue</div>
152            <div class="legend-item"><span class="legend-dot" style="background: #7BE141;"></span> decision</div>
153            <div class="legend-item"><span class="legend-dot" style="background: #FFA807;"></span> preference</div>
154            <div class="legend-item"><span class="legend-dot" style="background: #6E6EFD;"></span> learning</div>
155        </div>
156    </div>
157    <div id="graph"></div>
158    <script>
159        const data = {graph_data};
160
161        const options = {{
162            nodes: {{
163                shape: 'dot',
164                scaling: {{ min: 10, max: 30 }},
165                font: {{ size: 12, face: 'system-ui' }}
166            }},
167            edges: {{
168                arrows: 'to',
169                scaling: {{ min: 1, max: 5 }},
170                font: {{ size: 10, align: 'middle' }}
171            }},
172            groups: {{
173                note: {{ color: '#97C2FC' }},
174                todo: {{ color: '#FFFF00' }},
175                issue: {{ color: '#FB7E81' }},
176                decision: {{ color: '#7BE141' }},
177                preference: {{ color: '#FFA807' }},
178                learning: {{ color: '#6E6EFD' }},
179                context: {{ color: '#C2FABC' }},
180                credential: {{ color: '#FD6A6A' }}
181            }},
182            physics: {{
183                stabilization: {{ iterations: 100 }},
184                barnesHut: {{
185                    gravitationalConstant: -2000,
186                    springLength: 100
187                }}
188            }},
189            interaction: {{
190                hover: true,
191                tooltipDelay: 100
192            }}
193        }};
194
195        const container = document.getElementById('graph');
196        const network = new vis.Network(container, data, options);
197
198        // Search functionality
199        const searchInput = document.getElementById('search');
200        searchInput.addEventListener('input', function() {{
201            const query = this.value.toLowerCase();
202            if (query) {{
203                const matchingNodes = data.nodes.filter(n =>
204                    n.label.toLowerCase().includes(query)
205                ).map(n => n.id);
206                network.selectNodes(matchingNodes);
207                if (matchingNodes.length > 0) {{
208                    network.focus(matchingNodes[0], {{ scale: 1.5, animation: true }});
209                }}
210            }} else {{
211                network.unselectAll();
212            }}
213        }});
214
215        // Click to focus
216        network.on('click', function(params) {{
217            if (params.nodes.length > 0) {{
218                network.focus(params.nodes[0], {{ scale: 1.5, animation: true }});
219            }}
220        }});
221    </script>
222</body>
223</html>"#,
224            graph_data = serde_json::to_string(&graph_data).unwrap_or_default()
225        )
226    }
227}
228
229/// Truncate content for display as node label
230fn truncate_label(content: &str, max_len: usize) -> String {
231    let first_line = content.lines().next().unwrap_or(content);
232    if first_line.len() <= max_len {
233        first_line.to_string()
234    } else {
235        format!("{}...", &first_line[..max_len - 3])
236    }
237}
238
239// =============================================================================
240// Graph Statistics (RML-894)
241// =============================================================================
242
243/// Graph statistics and metrics
244#[derive(Debug, Clone, Serialize, Deserialize)]
245pub struct GraphStats {
246    /// Total number of nodes
247    pub node_count: usize,
248    /// Total number of edges
249    pub edge_count: usize,
250    /// Average degree (edges per node)
251    pub avg_degree: f32,
252    /// Graph density (actual edges / possible edges)
253    pub density: f32,
254    /// Number of connected components
255    pub component_count: usize,
256    /// Size of largest component
257    pub largest_component_size: usize,
258    /// Nodes by memory type
259    pub nodes_by_type: HashMap<String, usize>,
260    /// Edges by type
261    pub edges_by_type: HashMap<String, usize>,
262    /// Most connected nodes (top 10 by degree)
263    pub hub_nodes: Vec<(MemoryId, usize)>,
264    /// Isolated nodes (degree 0)
265    pub isolated_count: usize,
266}
267
268impl KnowledgeGraph {
269    /// Calculate graph statistics
270    pub fn stats(&self) -> GraphStats {
271        let node_count = self.nodes.len();
272        let edge_count = self.edges.len();
273
274        // Build adjacency for degree calculation
275        let mut degree: HashMap<MemoryId, usize> = HashMap::new();
276        for node in &self.nodes {
277            degree.insert(node.id, 0);
278        }
279        for edge in &self.edges {
280            *degree.entry(edge.from).or_insert(0) += 1;
281            *degree.entry(edge.to).or_insert(0) += 1;
282        }
283
284        let avg_degree = if node_count > 0 {
285            degree.values().sum::<usize>() as f32 / node_count as f32
286        } else {
287            0.0
288        };
289
290        // Density: edges / (n * (n-1) / 2) for undirected, edges / (n * (n-1)) for directed
291        let density = if node_count > 1 {
292            edge_count as f32 / (node_count * (node_count - 1)) as f32
293        } else {
294            0.0
295        };
296
297        // Count by type
298        let mut nodes_by_type: HashMap<String, usize> = HashMap::new();
299        for node in &self.nodes {
300            *nodes_by_type.entry(node.memory_type.clone()).or_insert(0) += 1;
301        }
302
303        let mut edges_by_type: HashMap<String, usize> = HashMap::new();
304        for edge in &self.edges {
305            *edges_by_type.entry(edge.edge_type.clone()).or_insert(0) += 1;
306        }
307
308        // Find hub nodes (top 10 by degree)
309        let mut degree_list: Vec<(MemoryId, usize)> =
310            degree.iter().map(|(&k, &v)| (k, v)).collect();
311        degree_list.sort_by(|a, b| b.1.cmp(&a.1));
312        let hub_nodes: Vec<(MemoryId, usize)> = degree_list.into_iter().take(10).collect();
313
314        // Count isolated nodes
315        let isolated_count = degree.values().filter(|&&d| d == 0).count();
316
317        // Find connected components using BFS
318        let components = self.find_connected_components();
319        let component_count = components.len();
320        let largest_component_size = components.iter().map(|c| c.len()).max().unwrap_or(0);
321
322        GraphStats {
323            node_count,
324            edge_count,
325            avg_degree,
326            density,
327            component_count,
328            largest_component_size,
329            nodes_by_type,
330            edges_by_type,
331            hub_nodes,
332            isolated_count,
333        }
334    }
335
336    /// Find connected components using BFS
337    fn find_connected_components(&self) -> Vec<Vec<MemoryId>> {
338        let node_ids: HashSet<MemoryId> = self.nodes.iter().map(|n| n.id).collect();
339
340        // Build adjacency list (undirected)
341        let mut adj: HashMap<MemoryId, Vec<MemoryId>> = HashMap::new();
342        for id in &node_ids {
343            adj.insert(*id, Vec::new());
344        }
345        for edge in &self.edges {
346            if let Some(list) = adj.get_mut(&edge.from) {
347                list.push(edge.to);
348            }
349            if let Some(list) = adj.get_mut(&edge.to) {
350                list.push(edge.from);
351            }
352        }
353
354        let mut visited: HashSet<MemoryId> = HashSet::new();
355        let mut components = Vec::new();
356
357        for &start in &node_ids {
358            if visited.contains(&start) {
359                continue;
360            }
361
362            let mut component = Vec::new();
363            let mut queue = VecDeque::new();
364            queue.push_back(start);
365            visited.insert(start);
366
367            while let Some(node) = queue.pop_front() {
368                component.push(node);
369                if let Some(neighbors) = adj.get(&node) {
370                    for &neighbor in neighbors {
371                        if !visited.contains(&neighbor) {
372                            visited.insert(neighbor);
373                            queue.push_back(neighbor);
374                        }
375                    }
376                }
377            }
378
379            components.push(component);
380        }
381
382        components
383    }
384
385    /// Calculate centrality scores for nodes
386    pub fn centrality(&self) -> HashMap<MemoryId, CentralityScores> {
387        let mut results: HashMap<MemoryId, CentralityScores> = HashMap::new();
388
389        // Build adjacency
390        let mut in_degree: HashMap<MemoryId, usize> = HashMap::new();
391        let mut out_degree: HashMap<MemoryId, usize> = HashMap::new();
392
393        for node in &self.nodes {
394            in_degree.insert(node.id, 0);
395            out_degree.insert(node.id, 0);
396        }
397
398        for edge in &self.edges {
399            *out_degree.entry(edge.from).or_insert(0) += 1;
400            *in_degree.entry(edge.to).or_insert(0) += 1;
401        }
402
403        let max_degree = self.nodes.len().saturating_sub(1).max(1) as f32;
404
405        for node in &self.nodes {
406            let in_d = *in_degree.get(&node.id).unwrap_or(&0) as f32;
407            let out_d = *out_degree.get(&node.id).unwrap_or(&0) as f32;
408
409            results.insert(
410                node.id,
411                CentralityScores {
412                    in_degree: in_d / max_degree,
413                    out_degree: out_d / max_degree,
414                    degree: (in_d + out_d) / (2.0 * max_degree),
415                    // Simplified closeness based on direct connections
416                    closeness: (in_d + out_d) / (2.0 * max_degree),
417                },
418            );
419        }
420
421        results
422    }
423}
424
425/// Centrality scores for a node
426#[derive(Debug, Clone, Serialize, Deserialize)]
427pub struct CentralityScores {
428    /// Normalized in-degree centrality
429    pub in_degree: f32,
430    /// Normalized out-degree centrality
431    pub out_degree: f32,
432    /// Combined degree centrality
433    pub degree: f32,
434    /// Closeness centrality (simplified)
435    pub closeness: f32,
436}
437
438// =============================================================================
439// Graph Filtering (RML-894)
440// =============================================================================
441
442/// Filter options for graph queries
443#[derive(Debug, Clone, Default)]
444pub struct GraphFilter {
445    /// Filter by memory types
446    pub memory_types: Option<Vec<String>>,
447    /// Filter by tags (any match)
448    pub tags: Option<Vec<String>>,
449    /// Filter by edge types
450    pub edge_types: Option<Vec<String>>,
451    /// Minimum importance threshold
452    pub min_importance: Option<f32>,
453    /// Maximum importance threshold
454    pub max_importance: Option<f32>,
455    /// Created after this date
456    pub created_after: Option<DateTime<Utc>>,
457    /// Created before this date
458    pub created_before: Option<DateTime<Utc>>,
459    /// Minimum edge confidence
460    pub min_confidence: Option<f32>,
461    /// Minimum edge score
462    pub min_score: Option<f32>,
463    /// Maximum number of nodes
464    pub limit: Option<usize>,
465}
466
467impl GraphFilter {
468    pub fn new() -> Self {
469        Self::default()
470    }
471
472    pub fn with_types(mut self, types: Vec<String>) -> Self {
473        self.memory_types = Some(types);
474        self
475    }
476
477    pub fn with_tags(mut self, tags: Vec<String>) -> Self {
478        self.tags = Some(tags);
479        self
480    }
481
482    pub fn with_min_importance(mut self, min: f32) -> Self {
483        self.min_importance = Some(min);
484        self
485    }
486
487    pub fn with_min_confidence(mut self, min: f32) -> Self {
488        self.min_confidence = Some(min);
489        self
490    }
491
492    pub fn with_limit(mut self, limit: usize) -> Self {
493        self.limit = Some(limit);
494        self
495    }
496}
497
498impl KnowledgeGraph {
499    /// Apply filter to create a subgraph
500    pub fn filter(&self, filter: &GraphFilter) -> KnowledgeGraph {
501        // Filter nodes
502        let mut filtered_nodes: Vec<GraphNode> = self
503            .nodes
504            .iter()
505            .filter(|n| {
506                // Type filter
507                if let Some(ref types) = filter.memory_types {
508                    if !types.contains(&n.memory_type) {
509                        return false;
510                    }
511                }
512
513                // Tag filter (any match)
514                if let Some(ref tags) = filter.tags {
515                    if !n.tags.iter().any(|t| tags.contains(t)) {
516                        return false;
517                    }
518                }
519
520                // Importance filter
521                if let Some(min) = filter.min_importance {
522                    if n.importance < min {
523                        return false;
524                    }
525                }
526                if let Some(max) = filter.max_importance {
527                    if n.importance > max {
528                        return false;
529                    }
530                }
531
532                true
533            })
534            .cloned()
535            .collect();
536
537        // Apply limit
538        if let Some(limit) = filter.limit {
539            filtered_nodes.truncate(limit);
540        }
541
542        // Get set of valid node IDs
543        let valid_ids: HashSet<MemoryId> = filtered_nodes.iter().map(|n| n.id).collect();
544
545        // Filter edges
546        let filtered_edges: Vec<GraphEdge> = self
547            .edges
548            .iter()
549            .filter(|e| {
550                // Both endpoints must be in filtered nodes
551                if !valid_ids.contains(&e.from) || !valid_ids.contains(&e.to) {
552                    return false;
553                }
554
555                // Edge type filter
556                if let Some(ref types) = filter.edge_types {
557                    if !types.contains(&e.edge_type) {
558                        return false;
559                    }
560                }
561
562                // Confidence filter
563                if let Some(min) = filter.min_confidence {
564                    if e.confidence < min {
565                        return false;
566                    }
567                }
568
569                // Score filter
570                if let Some(min) = filter.min_score {
571                    if e.score < min {
572                        return false;
573                    }
574                }
575
576                true
577            })
578            .cloned()
579            .collect();
580
581        KnowledgeGraph {
582            nodes: filtered_nodes,
583            edges: filtered_edges,
584        }
585    }
586
587    /// Get subgraph centered on a node with given depth
588    pub fn neighborhood(&self, center: MemoryId, depth: usize) -> KnowledgeGraph {
589        let mut visited: HashSet<MemoryId> = HashSet::new();
590        let mut current_level: HashSet<MemoryId> = HashSet::new();
591        current_level.insert(center);
592        visited.insert(center);
593
594        // Build adjacency
595        let mut adj: HashMap<MemoryId, Vec<MemoryId>> = HashMap::new();
596        for edge in &self.edges {
597            adj.entry(edge.from).or_default().push(edge.to);
598            adj.entry(edge.to).or_default().push(edge.from);
599        }
600
601        // BFS to depth
602        for _ in 0..depth {
603            let mut next_level: HashSet<MemoryId> = HashSet::new();
604            for &node in &current_level {
605                if let Some(neighbors) = adj.get(&node) {
606                    for &neighbor in neighbors {
607                        if !visited.contains(&neighbor) {
608                            visited.insert(neighbor);
609                            next_level.insert(neighbor);
610                        }
611                    }
612                }
613            }
614            current_level = next_level;
615        }
616
617        // Filter to visited nodes
618        let nodes: Vec<GraphNode> = self
619            .nodes
620            .iter()
621            .filter(|n| visited.contains(&n.id))
622            .cloned()
623            .collect();
624
625        let edges: Vec<GraphEdge> = self
626            .edges
627            .iter()
628            .filter(|e| visited.contains(&e.from) && visited.contains(&e.to))
629            .cloned()
630            .collect();
631
632        KnowledgeGraph { nodes, edges }
633    }
634}
635
636// =============================================================================
637// DOT Export (RML-894)
638// =============================================================================
639
640impl KnowledgeGraph {
641    /// Export as DOT format for Graphviz
642    pub fn to_dot(&self) -> String {
643        let mut dot = String::from("digraph knowledge_graph {\n");
644        dot.push_str("    rankdir=LR;\n");
645        dot.push_str("    node [shape=box, style=rounded];\n\n");
646
647        // Color mapping for memory types
648        let colors: HashMap<&str, &str> = [
649            ("note", "#97C2FC"),
650            ("todo", "#FFFF00"),
651            ("issue", "#FB7E81"),
652            ("decision", "#7BE141"),
653            ("preference", "#FFA807"),
654            ("learning", "#6E6EFD"),
655            ("context", "#C2FABC"),
656            ("credential", "#FD6A6A"),
657        ]
658        .into_iter()
659        .collect();
660
661        // Write nodes
662        for node in &self.nodes {
663            let color = colors.get(node.memory_type.as_str()).unwrap_or(&"#CCCCCC");
664            let label = node.label.replace('"', "\\\"");
665            dot.push_str(&format!(
666                "    \"{}\" [label=\"{}\", fillcolor=\"{}\", style=\"filled,rounded\"];\n",
667                node.id, label, color
668            ));
669        }
670
671        dot.push('\n');
672
673        // Write edges
674        for edge in &self.edges {
675            let style = match edge.edge_type.as_str() {
676                "related_to" => "solid",
677                "part_of" => "dashed",
678                "depends_on" => "bold",
679                "contradicts" => "dotted",
680                "supports" => "solid",
681                "references" => "dashed",
682                _ => "solid",
683            };
684            dot.push_str(&format!(
685                "    \"{}\" -> \"{}\" [label=\"{}\", style={}, penwidth={}];\n",
686                edge.from,
687                edge.to,
688                edge.edge_type,
689                style,
690                (edge.score * 2.0 + 0.5).min(3.0)
691            ));
692        }
693
694        dot.push_str("}\n");
695        dot
696    }
697
698    /// Export as GEXF format for Gephi
699    pub fn to_gexf(&self) -> String {
700        let mut gexf = String::from(
701            r#"<?xml version="1.0" encoding="UTF-8"?>
702<gexf xmlns="http://gexf.net/1.3" version="1.3">
703  <meta>
704    <creator>Engram</creator>
705    <description>Knowledge Graph Export</description>
706  </meta>
707  <graph mode="static" defaultedgetype="directed">
708    <attributes class="node">
709      <attribute id="0" title="type" type="string"/>
710      <attribute id="1" title="importance" type="float"/>
711    </attributes>
712    <attributes class="edge">
713      <attribute id="0" title="score" type="float"/>
714      <attribute id="1" title="confidence" type="float"/>
715    </attributes>
716    <nodes>
717"#,
718        );
719
720        for node in &self.nodes {
721            let label = node
722                .label
723                .replace('&', "&amp;")
724                .replace('<', "&lt;")
725                .replace('>', "&gt;")
726                .replace('"', "&quot;");
727            gexf.push_str(&format!(
728                r#"      <node id="{}" label="{}">
729        <attvalues>
730          <attvalue for="0" value="{}"/>
731          <attvalue for="1" value="{}"/>
732        </attvalues>
733      </node>
734"#,
735                node.id, label, node.memory_type, node.importance
736            ));
737        }
738
739        gexf.push_str("    </nodes>\n    <edges>\n");
740
741        for (i, edge) in self.edges.iter().enumerate() {
742            gexf.push_str(&format!(
743                r#"      <edge id="{}" source="{}" target="{}" label="{}">
744        <attvalues>
745          <attvalue for="0" value="{}"/>
746          <attvalue for="1" value="{}"/>
747        </attvalues>
748      </edge>
749"#,
750                i, edge.from, edge.to, edge.edge_type, edge.score, edge.confidence
751            ));
752        }
753
754        gexf.push_str("    </edges>\n  </graph>\n</gexf>\n");
755        gexf
756    }
757}
758
759// =============================================================================
760// Community Detection (RML-894)
761// =============================================================================
762
763/// A cluster/community of nodes
764#[derive(Debug, Clone, Serialize, Deserialize)]
765pub struct GraphCluster {
766    /// Cluster identifier
767    pub id: usize,
768    /// Node IDs in this cluster
769    pub members: Vec<MemoryId>,
770    /// Dominant memory type in cluster
771    pub dominant_type: Option<String>,
772    /// Common tags across cluster
773    pub common_tags: Vec<String>,
774    /// Internal edge count
775    pub internal_edges: usize,
776    /// Cluster cohesion score
777    pub cohesion: f32,
778}
779
780impl KnowledgeGraph {
781    /// Detect communities using label propagation algorithm
782    pub fn detect_communities(&self, max_iterations: usize) -> Vec<GraphCluster> {
783        if self.nodes.is_empty() {
784            return Vec::new();
785        }
786
787        // Initialize: each node in its own community
788        let mut labels: HashMap<MemoryId, usize> = self
789            .nodes
790            .iter()
791            .enumerate()
792            .map(|(i, n)| (n.id, i))
793            .collect();
794
795        // Build adjacency
796        let mut adj: HashMap<MemoryId, Vec<(MemoryId, f32)>> = HashMap::new();
797        for node in &self.nodes {
798            adj.insert(node.id, Vec::new());
799        }
800        for edge in &self.edges {
801            let weight = edge.score * edge.confidence;
802            adj.entry(edge.from).or_default().push((edge.to, weight));
803            adj.entry(edge.to).or_default().push((edge.from, weight));
804        }
805
806        // Label propagation
807        let node_ids: Vec<MemoryId> = self.nodes.iter().map(|n| n.id).collect();
808
809        for _ in 0..max_iterations {
810            let mut changed = false;
811
812            for &node_id in &node_ids {
813                if let Some(neighbors) = adj.get(&node_id) {
814                    if neighbors.is_empty() {
815                        continue;
816                    }
817
818                    // Count weighted votes for each label
819                    let mut votes: HashMap<usize, f32> = HashMap::new();
820                    for &(neighbor, weight) in neighbors {
821                        if let Some(&label) = labels.get(&neighbor) {
822                            *votes.entry(label).or_insert(0.0) += weight;
823                        }
824                    }
825
826                    // Pick label with most votes
827                    if let Some((&best_label, _)) = votes.iter().max_by(|a, b| a.1.total_cmp(b.1)) {
828                        let current = labels.get(&node_id).copied().unwrap_or(0);
829                        if best_label != current {
830                            labels.insert(node_id, best_label);
831                            changed = true;
832                        }
833                    }
834                }
835            }
836
837            if !changed {
838                break;
839            }
840        }
841
842        // Group nodes by label
843        let mut clusters_map: HashMap<usize, Vec<MemoryId>> = HashMap::new();
844        for (node_id, label) in &labels {
845            clusters_map.entry(*label).or_default().push(*node_id);
846        }
847
848        // Build cluster objects
849        let node_map: HashMap<MemoryId, &GraphNode> =
850            self.nodes.iter().map(|n| (n.id, n)).collect();
851
852        let mut clusters: Vec<GraphCluster> = clusters_map
853            .into_iter()
854            .enumerate()
855            .map(|(new_id, (_, members))| {
856                // Find dominant type
857                let mut type_counts: HashMap<&str, usize> = HashMap::new();
858                let mut all_tags: HashMap<&str, usize> = HashMap::new();
859
860                for &member_id in &members {
861                    if let Some(node) = node_map.get(&member_id) {
862                        *type_counts.entry(node.memory_type.as_str()).or_insert(0) += 1;
863                        for tag in &node.tags {
864                            *all_tags.entry(tag.as_str()).or_insert(0) += 1;
865                        }
866                    }
867                }
868
869                let dominant_type = type_counts
870                    .into_iter()
871                    .max_by_key(|(_, count)| *count)
872                    .map(|(t, _)| t.to_string());
873
874                // Common tags (present in > 50% of members)
875                let threshold = members.len() / 2;
876                let common_tags: Vec<String> = all_tags
877                    .into_iter()
878                    .filter(|(_, count)| *count > threshold)
879                    .map(|(tag, _)| tag.to_string())
880                    .collect();
881
882                // Count internal edges
883                let member_set: HashSet<MemoryId> = members.iter().copied().collect();
884                let internal_edges = self
885                    .edges
886                    .iter()
887                    .filter(|e| member_set.contains(&e.from) && member_set.contains(&e.to))
888                    .count();
889
890                // Cohesion: internal edges / possible internal edges
891                let n = members.len();
892                let possible = if n > 1 { n * (n - 1) } else { 1 };
893                let cohesion = internal_edges as f32 / possible as f32;
894
895                GraphCluster {
896                    id: new_id,
897                    members,
898                    dominant_type,
899                    common_tags,
900                    internal_edges,
901                    cohesion,
902                }
903            })
904            .collect();
905
906        // Sort by size (largest first)
907        clusters.sort_by(|a, b| b.members.len().cmp(&a.members.len()));
908
909        // Renumber IDs
910        for (i, cluster) in clusters.iter_mut().enumerate() {
911            cluster.id = i;
912        }
913
914        clusters
915    }
916}
917
918#[cfg(test)]
919mod tests {
920    use super::*;
921
922    fn make_node(id: MemoryId, memory_type: &str, tags: Vec<&str>) -> GraphNode {
923        GraphNode {
924            id,
925            label: format!("Node {}", id),
926            memory_type: memory_type.to_string(),
927            importance: 0.5,
928            tags: tags.into_iter().map(String::from).collect(),
929        }
930    }
931
932    fn make_edge(from: MemoryId, to: MemoryId, edge_type: &str) -> GraphEdge {
933        GraphEdge {
934            from,
935            to,
936            edge_type: edge_type.to_string(),
937            score: 0.8,
938            confidence: 0.9,
939        }
940    }
941
942    #[test]
943    fn test_truncate_label() {
944        assert_eq!(truncate_label("short", 50), "short");
945        assert_eq!(
946            truncate_label("this is a very long label that should be truncated", 20),
947            "this is a very lo..."
948        );
949    }
950
951    #[test]
952    fn test_graph_stats() {
953        let id1: MemoryId = 1;
954        let id2: MemoryId = 2;
955        let id3: MemoryId = 3;
956
957        let graph = KnowledgeGraph {
958            nodes: vec![
959                make_node(id1, "note", vec!["rust"]),
960                make_node(id2, "note", vec!["rust"]),
961                make_node(id3, "todo", vec!["python"]),
962            ],
963            edges: vec![
964                make_edge(id1, id2, "related_to"),
965                make_edge(id2, id3, "depends_on"),
966            ],
967        };
968
969        let stats = graph.stats();
970        assert_eq!(stats.node_count, 3);
971        assert_eq!(stats.edge_count, 2);
972        assert_eq!(stats.nodes_by_type.get("note"), Some(&2));
973        assert_eq!(stats.nodes_by_type.get("todo"), Some(&1));
974        assert_eq!(stats.isolated_count, 0);
975        assert_eq!(stats.component_count, 1);
976    }
977
978    #[test]
979    fn test_graph_filter() {
980        let id1: MemoryId = 1;
981        let id2: MemoryId = 2;
982        let id3: MemoryId = 3;
983
984        let graph = KnowledgeGraph {
985            nodes: vec![
986                make_node(id1, "note", vec!["rust"]),
987                make_node(id2, "note", vec!["python"]),
988                make_node(id3, "todo", vec!["rust"]),
989            ],
990            edges: vec![
991                make_edge(id1, id2, "related_to"),
992                make_edge(id2, id3, "depends_on"),
993            ],
994        };
995
996        // Filter by type
997        let filter = GraphFilter::new().with_types(vec!["note".to_string()]);
998        let filtered = graph.filter(&filter);
999        assert_eq!(filtered.nodes.len(), 2);
1000        assert_eq!(filtered.edges.len(), 1); // Only edge between notes
1001
1002        // Filter by tag
1003        let filter = GraphFilter::new().with_tags(vec!["rust".to_string()]);
1004        let filtered = graph.filter(&filter);
1005        assert_eq!(filtered.nodes.len(), 2); // id1 and id3 have "rust"
1006    }
1007
1008    #[test]
1009    fn test_neighborhood() {
1010        let id1: MemoryId = 1;
1011        let id2: MemoryId = 2;
1012        let id3: MemoryId = 3;
1013        let id4: MemoryId = 4;
1014
1015        let graph = KnowledgeGraph {
1016            nodes: vec![
1017                make_node(id1, "note", vec![]),
1018                make_node(id2, "note", vec![]),
1019                make_node(id3, "note", vec![]),
1020                make_node(id4, "note", vec![]),
1021            ],
1022            edges: vec![
1023                make_edge(id1, id2, "related_to"),
1024                make_edge(id2, id3, "related_to"),
1025                make_edge(id3, id4, "related_to"),
1026            ],
1027        };
1028
1029        // Depth 1 from id1 should include id1, id2
1030        let subgraph = graph.neighborhood(id1, 1);
1031        assert_eq!(subgraph.nodes.len(), 2);
1032
1033        // Depth 2 from id1 should include id1, id2, id3
1034        let subgraph = graph.neighborhood(id1, 2);
1035        assert_eq!(subgraph.nodes.len(), 3);
1036    }
1037
1038    #[test]
1039    fn test_to_dot() {
1040        let id1: MemoryId = 1;
1041        let id2: MemoryId = 2;
1042
1043        let graph = KnowledgeGraph {
1044            nodes: vec![
1045                make_node(id1, "note", vec![]),
1046                make_node(id2, "todo", vec![]),
1047            ],
1048            edges: vec![make_edge(id1, id2, "related_to")],
1049        };
1050
1051        let dot = graph.to_dot();
1052        assert!(dot.contains("digraph knowledge_graph"));
1053        assert!(dot.contains(&id1.to_string()));
1054        assert!(dot.contains(&id2.to_string()));
1055        assert!(dot.contains("related_to"));
1056    }
1057
1058    #[test]
1059    fn test_community_detection() {
1060        // Create two clusters
1061        let a1: MemoryId = 1;
1062        let a2: MemoryId = 2;
1063        let a3: MemoryId = 3;
1064        let b1: MemoryId = 4;
1065        let b2: MemoryId = 5;
1066
1067        let graph = KnowledgeGraph {
1068            nodes: vec![
1069                make_node(a1, "note", vec!["cluster-a"]),
1070                make_node(a2, "note", vec!["cluster-a"]),
1071                make_node(a3, "note", vec!["cluster-a"]),
1072                make_node(b1, "todo", vec!["cluster-b"]),
1073                make_node(b2, "todo", vec!["cluster-b"]),
1074            ],
1075            edges: vec![
1076                // Cluster A - densely connected
1077                make_edge(a1, a2, "related_to"),
1078                make_edge(a2, a3, "related_to"),
1079                make_edge(a1, a3, "related_to"),
1080                // Cluster B - connected
1081                make_edge(b1, b2, "related_to"),
1082                // Weak link between clusters
1083                GraphEdge {
1084                    from: a3,
1085                    to: b1,
1086                    edge_type: "related_to".to_string(),
1087                    score: 0.1, // weak
1088                    confidence: 0.1,
1089                },
1090            ],
1091        };
1092
1093        let communities = graph.detect_communities(10);
1094        // Should detect at least the general structure
1095        assert!(!communities.is_empty());
1096        // Largest community should have at least 2 members
1097        assert!(communities[0].members.len() >= 2);
1098    }
1099}