Skip to main content

engram/graph/
mod.rs

1//! Knowledge graph visualization (RML-894 improvements)
2//!
3//! Provides:
4//! - Interactive graph visualization with vis.js
5//! - Graph clustering and community detection
6//! - Graph statistics and metrics
7//! - Export to multiple formats (HTML, DOT, JSON)
8//! - Filtering and traversal utilities
9
10use chrono::{DateTime, Utc};
11use serde::{Deserialize, Serialize};
12use std::collections::{HashMap, HashSet, VecDeque};
13
14use crate::types::{CrossReference, Memory, MemoryId};
15
16/// Graph node
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct GraphNode {
19    pub id: MemoryId,
20    pub label: String,
21    pub memory_type: String,
22    pub importance: f32,
23    pub tags: Vec<String>,
24}
25
26/// Graph edge
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct GraphEdge {
29    pub from: MemoryId,
30    pub to: MemoryId,
31    pub edge_type: String,
32    pub score: f32,
33    pub confidence: f32,
34}
35
36/// Knowledge graph structure
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct KnowledgeGraph {
39    pub nodes: Vec<GraphNode>,
40    pub edges: Vec<GraphEdge>,
41}
42
43impl KnowledgeGraph {
44    /// Create graph from memories and cross-references
45    pub fn from_data(memories: &[Memory], crossrefs: &[CrossReference]) -> Self {
46        let nodes: Vec<GraphNode> = memories
47            .iter()
48            .map(|m| GraphNode {
49                id: m.id,
50                label: truncate_label(&m.content, 50),
51                memory_type: m.memory_type.as_str().to_string(),
52                importance: m.importance,
53                tags: m.tags.clone(),
54            })
55            .collect();
56
57        let memory_ids: std::collections::HashSet<MemoryId> =
58            memories.iter().map(|m| m.id).collect();
59
60        let edges: Vec<GraphEdge> = crossrefs
61            .iter()
62            .filter(|cr| memory_ids.contains(&cr.from_id) && memory_ids.contains(&cr.to_id))
63            .map(|cr| GraphEdge {
64                from: cr.from_id,
65                to: cr.to_id,
66                edge_type: cr.edge_type.as_str().to_string(),
67                score: cr.score,
68                confidence: cr.confidence,
69            })
70            .collect();
71
72        Self { nodes, edges }
73    }
74
75    /// Export as vis.js compatible JSON
76    pub fn to_visjs_json(&self) -> serde_json::Value {
77        let nodes: Vec<serde_json::Value> = self
78            .nodes
79            .iter()
80            .map(|n| {
81                serde_json::json!({
82                    "id": n.id,
83                    "label": n.label,
84                    "group": n.memory_type,
85                    "value": (n.importance * 10.0) as i32 + 5,
86                    "title": format!("Type: {}\nTags: {}", n.memory_type, n.tags.join(", "))
87                })
88            })
89            .collect();
90
91        let edges: Vec<serde_json::Value> = self
92            .edges
93            .iter()
94            .map(|e| {
95                serde_json::json!({
96                    "from": e.from,
97                    "to": e.to,
98                    "label": e.edge_type,
99                    "value": (e.score * e.confidence * 5.0) as i32 + 1,
100                    "title": format!("Score: {:.2}, Confidence: {:.2}", e.score, e.confidence)
101                })
102            })
103            .collect();
104
105        serde_json::json!({
106            "nodes": nodes,
107            "edges": edges
108        })
109    }
110
111    /// Export as standalone HTML with vis.js
112    pub fn to_html(&self) -> String {
113        let graph_data = self.to_visjs_json();
114
115        format!(
116            r#"<!DOCTYPE html>
117<html>
118<head>
119    <title>Engram Knowledge Graph</title>
120    <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
121    <style>
122        body {{ margin: 0; padding: 0; font-family: system-ui, sans-serif; }}
123        #graph {{ width: 100vw; height: 100vh; }}
124        #controls {{
125            position: absolute;
126            top: 10px;
127            left: 10px;
128            background: white;
129            padding: 10px;
130            border-radius: 8px;
131            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
132        }}
133        #search {{ padding: 8px; width: 200px; border: 1px solid #ddd; border-radius: 4px; }}
134        .legend {{ display: flex; gap: 10px; margin-top: 10px; flex-wrap: wrap; }}
135        .legend-item {{ display: flex; align-items: center; gap: 5px; font-size: 12px; }}
136        .legend-dot {{ width: 12px; height: 12px; border-radius: 50%; }}
137    </style>
138</head>
139<body>
140    <div id="controls">
141        <input type="text" id="search" placeholder="Search nodes...">
142        <div class="legend">
143            <div class="legend-item"><span class="legend-dot" style="background: #97C2FC;"></span> note</div>
144            <div class="legend-item"><span class="legend-dot" style="background: #FFFF00;"></span> todo</div>
145            <div class="legend-item"><span class="legend-dot" style="background: #FB7E81;"></span> issue</div>
146            <div class="legend-item"><span class="legend-dot" style="background: #7BE141;"></span> decision</div>
147            <div class="legend-item"><span class="legend-dot" style="background: #FFA807;"></span> preference</div>
148            <div class="legend-item"><span class="legend-dot" style="background: #6E6EFD;"></span> learning</div>
149        </div>
150    </div>
151    <div id="graph"></div>
152    <script>
153        const data = {graph_data};
154
155        const options = {{
156            nodes: {{
157                shape: 'dot',
158                scaling: {{ min: 10, max: 30 }},
159                font: {{ size: 12, face: 'system-ui' }}
160            }},
161            edges: {{
162                arrows: 'to',
163                scaling: {{ min: 1, max: 5 }},
164                font: {{ size: 10, align: 'middle' }}
165            }},
166            groups: {{
167                note: {{ color: '#97C2FC' }},
168                todo: {{ color: '#FFFF00' }},
169                issue: {{ color: '#FB7E81' }},
170                decision: {{ color: '#7BE141' }},
171                preference: {{ color: '#FFA807' }},
172                learning: {{ color: '#6E6EFD' }},
173                context: {{ color: '#C2FABC' }},
174                credential: {{ color: '#FD6A6A' }}
175            }},
176            physics: {{
177                stabilization: {{ iterations: 100 }},
178                barnesHut: {{
179                    gravitationalConstant: -2000,
180                    springLength: 100
181                }}
182            }},
183            interaction: {{
184                hover: true,
185                tooltipDelay: 100
186            }}
187        }};
188
189        const container = document.getElementById('graph');
190        const network = new vis.Network(container, data, options);
191
192        // Search functionality
193        const searchInput = document.getElementById('search');
194        searchInput.addEventListener('input', function() {{
195            const query = this.value.toLowerCase();
196            if (query) {{
197                const matchingNodes = data.nodes.filter(n =>
198                    n.label.toLowerCase().includes(query)
199                ).map(n => n.id);
200                network.selectNodes(matchingNodes);
201                if (matchingNodes.length > 0) {{
202                    network.focus(matchingNodes[0], {{ scale: 1.5, animation: true }});
203                }}
204            }} else {{
205                network.unselectAll();
206            }}
207        }});
208
209        // Click to focus
210        network.on('click', function(params) {{
211            if (params.nodes.length > 0) {{
212                network.focus(params.nodes[0], {{ scale: 1.5, animation: true }});
213            }}
214        }});
215    </script>
216</body>
217</html>"#,
218            graph_data = serde_json::to_string(&graph_data).unwrap_or_default()
219        )
220    }
221}
222
223/// Truncate content for display as node label
224fn truncate_label(content: &str, max_len: usize) -> String {
225    let first_line = content.lines().next().unwrap_or(content);
226    if first_line.len() <= max_len {
227        first_line.to_string()
228    } else {
229        format!("{}...", &first_line[..max_len - 3])
230    }
231}
232
233// =============================================================================
234// Graph Statistics (RML-894)
235// =============================================================================
236
237/// Graph statistics and metrics
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct GraphStats {
240    /// Total number of nodes
241    pub node_count: usize,
242    /// Total number of edges
243    pub edge_count: usize,
244    /// Average degree (edges per node)
245    pub avg_degree: f32,
246    /// Graph density (actual edges / possible edges)
247    pub density: f32,
248    /// Number of connected components
249    pub component_count: usize,
250    /// Size of largest component
251    pub largest_component_size: usize,
252    /// Nodes by memory type
253    pub nodes_by_type: HashMap<String, usize>,
254    /// Edges by type
255    pub edges_by_type: HashMap<String, usize>,
256    /// Most connected nodes (top 10 by degree)
257    pub hub_nodes: Vec<(MemoryId, usize)>,
258    /// Isolated nodes (degree 0)
259    pub isolated_count: usize,
260}
261
262impl KnowledgeGraph {
263    /// Calculate graph statistics
264    pub fn stats(&self) -> GraphStats {
265        let node_count = self.nodes.len();
266        let edge_count = self.edges.len();
267
268        // Build adjacency for degree calculation
269        let mut degree: HashMap<MemoryId, usize> = HashMap::new();
270        for node in &self.nodes {
271            degree.insert(node.id, 0);
272        }
273        for edge in &self.edges {
274            *degree.entry(edge.from).or_insert(0) += 1;
275            *degree.entry(edge.to).or_insert(0) += 1;
276        }
277
278        let avg_degree = if node_count > 0 {
279            degree.values().sum::<usize>() as f32 / node_count as f32
280        } else {
281            0.0
282        };
283
284        // Density: edges / (n * (n-1) / 2) for undirected, edges / (n * (n-1)) for directed
285        let density = if node_count > 1 {
286            edge_count as f32 / (node_count * (node_count - 1)) as f32
287        } else {
288            0.0
289        };
290
291        // Count by type
292        let mut nodes_by_type: HashMap<String, usize> = HashMap::new();
293        for node in &self.nodes {
294            *nodes_by_type.entry(node.memory_type.clone()).or_insert(0) += 1;
295        }
296
297        let mut edges_by_type: HashMap<String, usize> = HashMap::new();
298        for edge in &self.edges {
299            *edges_by_type.entry(edge.edge_type.clone()).or_insert(0) += 1;
300        }
301
302        // Find hub nodes (top 10 by degree)
303        let mut degree_list: Vec<(MemoryId, usize)> =
304            degree.iter().map(|(&k, &v)| (k, v)).collect();
305        degree_list.sort_by(|a, b| b.1.cmp(&a.1));
306        let hub_nodes: Vec<(MemoryId, usize)> = degree_list.into_iter().take(10).collect();
307
308        // Count isolated nodes
309        let isolated_count = degree.values().filter(|&&d| d == 0).count();
310
311        // Find connected components using BFS
312        let components = self.find_connected_components();
313        let component_count = components.len();
314        let largest_component_size = components.iter().map(|c| c.len()).max().unwrap_or(0);
315
316        GraphStats {
317            node_count,
318            edge_count,
319            avg_degree,
320            density,
321            component_count,
322            largest_component_size,
323            nodes_by_type,
324            edges_by_type,
325            hub_nodes,
326            isolated_count,
327        }
328    }
329
330    /// Find connected components using BFS
331    fn find_connected_components(&self) -> Vec<Vec<MemoryId>> {
332        let node_ids: HashSet<MemoryId> = self.nodes.iter().map(|n| n.id).collect();
333
334        // Build adjacency list (undirected)
335        let mut adj: HashMap<MemoryId, Vec<MemoryId>> = HashMap::new();
336        for id in &node_ids {
337            adj.insert(*id, Vec::new());
338        }
339        for edge in &self.edges {
340            if let Some(list) = adj.get_mut(&edge.from) {
341                list.push(edge.to);
342            }
343            if let Some(list) = adj.get_mut(&edge.to) {
344                list.push(edge.from);
345            }
346        }
347
348        let mut visited: HashSet<MemoryId> = HashSet::new();
349        let mut components = Vec::new();
350
351        for &start in &node_ids {
352            if visited.contains(&start) {
353                continue;
354            }
355
356            let mut component = Vec::new();
357            let mut queue = VecDeque::new();
358            queue.push_back(start);
359            visited.insert(start);
360
361            while let Some(node) = queue.pop_front() {
362                component.push(node);
363                if let Some(neighbors) = adj.get(&node) {
364                    for &neighbor in neighbors {
365                        if !visited.contains(&neighbor) {
366                            visited.insert(neighbor);
367                            queue.push_back(neighbor);
368                        }
369                    }
370                }
371            }
372
373            components.push(component);
374        }
375
376        components
377    }
378
379    /// Calculate centrality scores for nodes
380    pub fn centrality(&self) -> HashMap<MemoryId, CentralityScores> {
381        let mut results: HashMap<MemoryId, CentralityScores> = HashMap::new();
382
383        // Build adjacency
384        let mut in_degree: HashMap<MemoryId, usize> = HashMap::new();
385        let mut out_degree: HashMap<MemoryId, usize> = HashMap::new();
386
387        for node in &self.nodes {
388            in_degree.insert(node.id, 0);
389            out_degree.insert(node.id, 0);
390        }
391
392        for edge in &self.edges {
393            *out_degree.entry(edge.from).or_insert(0) += 1;
394            *in_degree.entry(edge.to).or_insert(0) += 1;
395        }
396
397        let max_degree = self.nodes.len().saturating_sub(1).max(1) as f32;
398
399        for node in &self.nodes {
400            let in_d = *in_degree.get(&node.id).unwrap_or(&0) as f32;
401            let out_d = *out_degree.get(&node.id).unwrap_or(&0) as f32;
402
403            results.insert(
404                node.id,
405                CentralityScores {
406                    in_degree: in_d / max_degree,
407                    out_degree: out_d / max_degree,
408                    degree: (in_d + out_d) / (2.0 * max_degree),
409                    // Simplified closeness based on direct connections
410                    closeness: (in_d + out_d) / (2.0 * max_degree),
411                },
412            );
413        }
414
415        results
416    }
417}
418
419/// Centrality scores for a node
420#[derive(Debug, Clone, Serialize, Deserialize)]
421pub struct CentralityScores {
422    /// Normalized in-degree centrality
423    pub in_degree: f32,
424    /// Normalized out-degree centrality
425    pub out_degree: f32,
426    /// Combined degree centrality
427    pub degree: f32,
428    /// Closeness centrality (simplified)
429    pub closeness: f32,
430}
431
432// =============================================================================
433// Graph Filtering (RML-894)
434// =============================================================================
435
436/// Filter options for graph queries
437#[derive(Debug, Clone, Default)]
438pub struct GraphFilter {
439    /// Filter by memory types
440    pub memory_types: Option<Vec<String>>,
441    /// Filter by tags (any match)
442    pub tags: Option<Vec<String>>,
443    /// Filter by edge types
444    pub edge_types: Option<Vec<String>>,
445    /// Minimum importance threshold
446    pub min_importance: Option<f32>,
447    /// Maximum importance threshold
448    pub max_importance: Option<f32>,
449    /// Created after this date
450    pub created_after: Option<DateTime<Utc>>,
451    /// Created before this date
452    pub created_before: Option<DateTime<Utc>>,
453    /// Minimum edge confidence
454    pub min_confidence: Option<f32>,
455    /// Minimum edge score
456    pub min_score: Option<f32>,
457    /// Maximum number of nodes
458    pub limit: Option<usize>,
459}
460
461impl GraphFilter {
462    pub fn new() -> Self {
463        Self::default()
464    }
465
466    pub fn with_types(mut self, types: Vec<String>) -> Self {
467        self.memory_types = Some(types);
468        self
469    }
470
471    pub fn with_tags(mut self, tags: Vec<String>) -> Self {
472        self.tags = Some(tags);
473        self
474    }
475
476    pub fn with_min_importance(mut self, min: f32) -> Self {
477        self.min_importance = Some(min);
478        self
479    }
480
481    pub fn with_min_confidence(mut self, min: f32) -> Self {
482        self.min_confidence = Some(min);
483        self
484    }
485
486    pub fn with_limit(mut self, limit: usize) -> Self {
487        self.limit = Some(limit);
488        self
489    }
490}
491
492impl KnowledgeGraph {
493    /// Apply filter to create a subgraph
494    pub fn filter(&self, filter: &GraphFilter) -> KnowledgeGraph {
495        // Filter nodes
496        let mut filtered_nodes: Vec<GraphNode> = self
497            .nodes
498            .iter()
499            .filter(|n| {
500                // Type filter
501                if let Some(ref types) = filter.memory_types {
502                    if !types.contains(&n.memory_type) {
503                        return false;
504                    }
505                }
506
507                // Tag filter (any match)
508                if let Some(ref tags) = filter.tags {
509                    if !n.tags.iter().any(|t| tags.contains(t)) {
510                        return false;
511                    }
512                }
513
514                // Importance filter
515                if let Some(min) = filter.min_importance {
516                    if n.importance < min {
517                        return false;
518                    }
519                }
520                if let Some(max) = filter.max_importance {
521                    if n.importance > max {
522                        return false;
523                    }
524                }
525
526                true
527            })
528            .cloned()
529            .collect();
530
531        // Apply limit
532        if let Some(limit) = filter.limit {
533            filtered_nodes.truncate(limit);
534        }
535
536        // Get set of valid node IDs
537        let valid_ids: HashSet<MemoryId> = filtered_nodes.iter().map(|n| n.id).collect();
538
539        // Filter edges
540        let filtered_edges: Vec<GraphEdge> = self
541            .edges
542            .iter()
543            .filter(|e| {
544                // Both endpoints must be in filtered nodes
545                if !valid_ids.contains(&e.from) || !valid_ids.contains(&e.to) {
546                    return false;
547                }
548
549                // Edge type filter
550                if let Some(ref types) = filter.edge_types {
551                    if !types.contains(&e.edge_type) {
552                        return false;
553                    }
554                }
555
556                // Confidence filter
557                if let Some(min) = filter.min_confidence {
558                    if e.confidence < min {
559                        return false;
560                    }
561                }
562
563                // Score filter
564                if let Some(min) = filter.min_score {
565                    if e.score < min {
566                        return false;
567                    }
568                }
569
570                true
571            })
572            .cloned()
573            .collect();
574
575        KnowledgeGraph {
576            nodes: filtered_nodes,
577            edges: filtered_edges,
578        }
579    }
580
581    /// Get subgraph centered on a node with given depth
582    pub fn neighborhood(&self, center: MemoryId, depth: usize) -> KnowledgeGraph {
583        let mut visited: HashSet<MemoryId> = HashSet::new();
584        let mut current_level: HashSet<MemoryId> = HashSet::new();
585        current_level.insert(center);
586        visited.insert(center);
587
588        // Build adjacency
589        let mut adj: HashMap<MemoryId, Vec<MemoryId>> = HashMap::new();
590        for edge in &self.edges {
591            adj.entry(edge.from).or_default().push(edge.to);
592            adj.entry(edge.to).or_default().push(edge.from);
593        }
594
595        // BFS to depth
596        for _ in 0..depth {
597            let mut next_level: HashSet<MemoryId> = HashSet::new();
598            for &node in &current_level {
599                if let Some(neighbors) = adj.get(&node) {
600                    for &neighbor in neighbors {
601                        if !visited.contains(&neighbor) {
602                            visited.insert(neighbor);
603                            next_level.insert(neighbor);
604                        }
605                    }
606                }
607            }
608            current_level = next_level;
609        }
610
611        // Filter to visited nodes
612        let nodes: Vec<GraphNode> = self
613            .nodes
614            .iter()
615            .filter(|n| visited.contains(&n.id))
616            .cloned()
617            .collect();
618
619        let edges: Vec<GraphEdge> = self
620            .edges
621            .iter()
622            .filter(|e| visited.contains(&e.from) && visited.contains(&e.to))
623            .cloned()
624            .collect();
625
626        KnowledgeGraph { nodes, edges }
627    }
628}
629
630// =============================================================================
631// DOT Export (RML-894)
632// =============================================================================
633
634impl KnowledgeGraph {
635    /// Export as DOT format for Graphviz
636    pub fn to_dot(&self) -> String {
637        let mut dot = String::from("digraph knowledge_graph {\n");
638        dot.push_str("    rankdir=LR;\n");
639        dot.push_str("    node [shape=box, style=rounded];\n\n");
640
641        // Color mapping for memory types
642        let colors: HashMap<&str, &str> = [
643            ("note", "#97C2FC"),
644            ("todo", "#FFFF00"),
645            ("issue", "#FB7E81"),
646            ("decision", "#7BE141"),
647            ("preference", "#FFA807"),
648            ("learning", "#6E6EFD"),
649            ("context", "#C2FABC"),
650            ("credential", "#FD6A6A"),
651        ]
652        .into_iter()
653        .collect();
654
655        // Write nodes
656        for node in &self.nodes {
657            let color = colors.get(node.memory_type.as_str()).unwrap_or(&"#CCCCCC");
658            let label = node.label.replace('"', "\\\"");
659            dot.push_str(&format!(
660                "    \"{}\" [label=\"{}\", fillcolor=\"{}\", style=\"filled,rounded\"];\n",
661                node.id, label, color
662            ));
663        }
664
665        dot.push('\n');
666
667        // Write edges
668        for edge in &self.edges {
669            let style = match edge.edge_type.as_str() {
670                "related_to" => "solid",
671                "part_of" => "dashed",
672                "depends_on" => "bold",
673                "contradicts" => "dotted",
674                "supports" => "solid",
675                "references" => "dashed",
676                _ => "solid",
677            };
678            dot.push_str(&format!(
679                "    \"{}\" -> \"{}\" [label=\"{}\", style={}, penwidth={}];\n",
680                edge.from,
681                edge.to,
682                edge.edge_type,
683                style,
684                (edge.score * 2.0 + 0.5).min(3.0)
685            ));
686        }
687
688        dot.push_str("}\n");
689        dot
690    }
691
692    /// Export as GEXF format for Gephi
693    pub fn to_gexf(&self) -> String {
694        let mut gexf = String::from(
695            r#"<?xml version="1.0" encoding="UTF-8"?>
696<gexf xmlns="http://gexf.net/1.3" version="1.3">
697  <meta>
698    <creator>Engram</creator>
699    <description>Knowledge Graph Export</description>
700  </meta>
701  <graph mode="static" defaultedgetype="directed">
702    <attributes class="node">
703      <attribute id="0" title="type" type="string"/>
704      <attribute id="1" title="importance" type="float"/>
705    </attributes>
706    <attributes class="edge">
707      <attribute id="0" title="score" type="float"/>
708      <attribute id="1" title="confidence" type="float"/>
709    </attributes>
710    <nodes>
711"#,
712        );
713
714        for node in &self.nodes {
715            let label = node
716                .label
717                .replace('&', "&amp;")
718                .replace('<', "&lt;")
719                .replace('>', "&gt;")
720                .replace('"', "&quot;");
721            gexf.push_str(&format!(
722                r#"      <node id="{}" label="{}">
723        <attvalues>
724          <attvalue for="0" value="{}"/>
725          <attvalue for="1" value="{}"/>
726        </attvalues>
727      </node>
728"#,
729                node.id, label, node.memory_type, node.importance
730            ));
731        }
732
733        gexf.push_str("    </nodes>\n    <edges>\n");
734
735        for (i, edge) in self.edges.iter().enumerate() {
736            gexf.push_str(&format!(
737                r#"      <edge id="{}" source="{}" target="{}" label="{}">
738        <attvalues>
739          <attvalue for="0" value="{}"/>
740          <attvalue for="1" value="{}"/>
741        </attvalues>
742      </edge>
743"#,
744                i, edge.from, edge.to, edge.edge_type, edge.score, edge.confidence
745            ));
746        }
747
748        gexf.push_str("    </edges>\n  </graph>\n</gexf>\n");
749        gexf
750    }
751}
752
753// =============================================================================
754// Community Detection (RML-894)
755// =============================================================================
756
757/// A cluster/community of nodes
758#[derive(Debug, Clone, Serialize, Deserialize)]
759pub struct GraphCluster {
760    /// Cluster identifier
761    pub id: usize,
762    /// Node IDs in this cluster
763    pub members: Vec<MemoryId>,
764    /// Dominant memory type in cluster
765    pub dominant_type: Option<String>,
766    /// Common tags across cluster
767    pub common_tags: Vec<String>,
768    /// Internal edge count
769    pub internal_edges: usize,
770    /// Cluster cohesion score
771    pub cohesion: f32,
772}
773
774impl KnowledgeGraph {
775    /// Detect communities using label propagation algorithm
776    pub fn detect_communities(&self, max_iterations: usize) -> Vec<GraphCluster> {
777        if self.nodes.is_empty() {
778            return Vec::new();
779        }
780
781        // Initialize: each node in its own community
782        let mut labels: HashMap<MemoryId, usize> = self
783            .nodes
784            .iter()
785            .enumerate()
786            .map(|(i, n)| (n.id, i))
787            .collect();
788
789        // Build adjacency
790        let mut adj: HashMap<MemoryId, Vec<(MemoryId, f32)>> = HashMap::new();
791        for node in &self.nodes {
792            adj.insert(node.id, Vec::new());
793        }
794        for edge in &self.edges {
795            let weight = edge.score * edge.confidence;
796            adj.entry(edge.from).or_default().push((edge.to, weight));
797            adj.entry(edge.to).or_default().push((edge.from, weight));
798        }
799
800        // Label propagation
801        let node_ids: Vec<MemoryId> = self.nodes.iter().map(|n| n.id).collect();
802
803        for _ in 0..max_iterations {
804            let mut changed = false;
805
806            for &node_id in &node_ids {
807                if let Some(neighbors) = adj.get(&node_id) {
808                    if neighbors.is_empty() {
809                        continue;
810                    }
811
812                    // Count weighted votes for each label
813                    let mut votes: HashMap<usize, f32> = HashMap::new();
814                    for &(neighbor, weight) in neighbors {
815                        if let Some(&label) = labels.get(&neighbor) {
816                            *votes.entry(label).or_insert(0.0) += weight;
817                        }
818                    }
819
820                    // Pick label with most votes
821                    if let Some((&best_label, _)) = votes.iter().max_by(|a, b| a.1.total_cmp(b.1)) {
822                        let current = labels.get(&node_id).copied().unwrap_or(0);
823                        if best_label != current {
824                            labels.insert(node_id, best_label);
825                            changed = true;
826                        }
827                    }
828                }
829            }
830
831            if !changed {
832                break;
833            }
834        }
835
836        // Group nodes by label
837        let mut clusters_map: HashMap<usize, Vec<MemoryId>> = HashMap::new();
838        for (node_id, label) in &labels {
839            clusters_map.entry(*label).or_default().push(*node_id);
840        }
841
842        // Build cluster objects
843        let node_map: HashMap<MemoryId, &GraphNode> =
844            self.nodes.iter().map(|n| (n.id, n)).collect();
845
846        let mut clusters: Vec<GraphCluster> = clusters_map
847            .into_iter()
848            .enumerate()
849            .map(|(new_id, (_, members))| {
850                // Find dominant type
851                let mut type_counts: HashMap<&str, usize> = HashMap::new();
852                let mut all_tags: HashMap<&str, usize> = HashMap::new();
853
854                for &member_id in &members {
855                    if let Some(node) = node_map.get(&member_id) {
856                        *type_counts.entry(node.memory_type.as_str()).or_insert(0) += 1;
857                        for tag in &node.tags {
858                            *all_tags.entry(tag.as_str()).or_insert(0) += 1;
859                        }
860                    }
861                }
862
863                let dominant_type = type_counts
864                    .into_iter()
865                    .max_by_key(|(_, count)| *count)
866                    .map(|(t, _)| t.to_string());
867
868                // Common tags (present in > 50% of members)
869                let threshold = members.len() / 2;
870                let common_tags: Vec<String> = all_tags
871                    .into_iter()
872                    .filter(|(_, count)| *count > threshold)
873                    .map(|(tag, _)| tag.to_string())
874                    .collect();
875
876                // Count internal edges
877                let member_set: HashSet<MemoryId> = members.iter().copied().collect();
878                let internal_edges = self
879                    .edges
880                    .iter()
881                    .filter(|e| member_set.contains(&e.from) && member_set.contains(&e.to))
882                    .count();
883
884                // Cohesion: internal edges / possible internal edges
885                let n = members.len();
886                let possible = if n > 1 { n * (n - 1) } else { 1 };
887                let cohesion = internal_edges as f32 / possible as f32;
888
889                GraphCluster {
890                    id: new_id,
891                    members,
892                    dominant_type,
893                    common_tags,
894                    internal_edges,
895                    cohesion,
896                }
897            })
898            .collect();
899
900        // Sort by size (largest first)
901        clusters.sort_by(|a, b| b.members.len().cmp(&a.members.len()));
902
903        // Renumber IDs
904        for (i, cluster) in clusters.iter_mut().enumerate() {
905            cluster.id = i;
906        }
907
908        clusters
909    }
910}
911
912#[cfg(test)]
913mod tests {
914    use super::*;
915
916    fn make_node(id: MemoryId, memory_type: &str, tags: Vec<&str>) -> GraphNode {
917        GraphNode {
918            id,
919            label: format!("Node {}", id),
920            memory_type: memory_type.to_string(),
921            importance: 0.5,
922            tags: tags.into_iter().map(String::from).collect(),
923        }
924    }
925
926    fn make_edge(from: MemoryId, to: MemoryId, edge_type: &str) -> GraphEdge {
927        GraphEdge {
928            from,
929            to,
930            edge_type: edge_type.to_string(),
931            score: 0.8,
932            confidence: 0.9,
933        }
934    }
935
936    #[test]
937    fn test_truncate_label() {
938        assert_eq!(truncate_label("short", 50), "short");
939        assert_eq!(
940            truncate_label("this is a very long label that should be truncated", 20),
941            "this is a very lo..."
942        );
943    }
944
945    #[test]
946    fn test_graph_stats() {
947        let id1: MemoryId = 1;
948        let id2: MemoryId = 2;
949        let id3: MemoryId = 3;
950
951        let graph = KnowledgeGraph {
952            nodes: vec![
953                make_node(id1, "note", vec!["rust"]),
954                make_node(id2, "note", vec!["rust"]),
955                make_node(id3, "todo", vec!["python"]),
956            ],
957            edges: vec![
958                make_edge(id1, id2, "related_to"),
959                make_edge(id2, id3, "depends_on"),
960            ],
961        };
962
963        let stats = graph.stats();
964        assert_eq!(stats.node_count, 3);
965        assert_eq!(stats.edge_count, 2);
966        assert_eq!(stats.nodes_by_type.get("note"), Some(&2));
967        assert_eq!(stats.nodes_by_type.get("todo"), Some(&1));
968        assert_eq!(stats.isolated_count, 0);
969        assert_eq!(stats.component_count, 1);
970    }
971
972    #[test]
973    fn test_graph_filter() {
974        let id1: MemoryId = 1;
975        let id2: MemoryId = 2;
976        let id3: MemoryId = 3;
977
978        let graph = KnowledgeGraph {
979            nodes: vec![
980                make_node(id1, "note", vec!["rust"]),
981                make_node(id2, "note", vec!["python"]),
982                make_node(id3, "todo", vec!["rust"]),
983            ],
984            edges: vec![
985                make_edge(id1, id2, "related_to"),
986                make_edge(id2, id3, "depends_on"),
987            ],
988        };
989
990        // Filter by type
991        let filter = GraphFilter::new().with_types(vec!["note".to_string()]);
992        let filtered = graph.filter(&filter);
993        assert_eq!(filtered.nodes.len(), 2);
994        assert_eq!(filtered.edges.len(), 1); // Only edge between notes
995
996        // Filter by tag
997        let filter = GraphFilter::new().with_tags(vec!["rust".to_string()]);
998        let filtered = graph.filter(&filter);
999        assert_eq!(filtered.nodes.len(), 2); // id1 and id3 have "rust"
1000    }
1001
1002    #[test]
1003    fn test_neighborhood() {
1004        let id1: MemoryId = 1;
1005        let id2: MemoryId = 2;
1006        let id3: MemoryId = 3;
1007        let id4: MemoryId = 4;
1008
1009        let graph = KnowledgeGraph {
1010            nodes: vec![
1011                make_node(id1, "note", vec![]),
1012                make_node(id2, "note", vec![]),
1013                make_node(id3, "note", vec![]),
1014                make_node(id4, "note", vec![]),
1015            ],
1016            edges: vec![
1017                make_edge(id1, id2, "related_to"),
1018                make_edge(id2, id3, "related_to"),
1019                make_edge(id3, id4, "related_to"),
1020            ],
1021        };
1022
1023        // Depth 1 from id1 should include id1, id2
1024        let subgraph = graph.neighborhood(id1, 1);
1025        assert_eq!(subgraph.nodes.len(), 2);
1026
1027        // Depth 2 from id1 should include id1, id2, id3
1028        let subgraph = graph.neighborhood(id1, 2);
1029        assert_eq!(subgraph.nodes.len(), 3);
1030    }
1031
1032    #[test]
1033    fn test_to_dot() {
1034        let id1: MemoryId = 1;
1035        let id2: MemoryId = 2;
1036
1037        let graph = KnowledgeGraph {
1038            nodes: vec![
1039                make_node(id1, "note", vec![]),
1040                make_node(id2, "todo", vec![]),
1041            ],
1042            edges: vec![make_edge(id1, id2, "related_to")],
1043        };
1044
1045        let dot = graph.to_dot();
1046        assert!(dot.contains("digraph knowledge_graph"));
1047        assert!(dot.contains(&id1.to_string()));
1048        assert!(dot.contains(&id2.to_string()));
1049        assert!(dot.contains("related_to"));
1050    }
1051
1052    #[test]
1053    fn test_community_detection() {
1054        // Create two clusters
1055        let a1: MemoryId = 1;
1056        let a2: MemoryId = 2;
1057        let a3: MemoryId = 3;
1058        let b1: MemoryId = 4;
1059        let b2: MemoryId = 5;
1060
1061        let graph = KnowledgeGraph {
1062            nodes: vec![
1063                make_node(a1, "note", vec!["cluster-a"]),
1064                make_node(a2, "note", vec!["cluster-a"]),
1065                make_node(a3, "note", vec!["cluster-a"]),
1066                make_node(b1, "todo", vec!["cluster-b"]),
1067                make_node(b2, "todo", vec!["cluster-b"]),
1068            ],
1069            edges: vec![
1070                // Cluster A - densely connected
1071                make_edge(a1, a2, "related_to"),
1072                make_edge(a2, a3, "related_to"),
1073                make_edge(a1, a3, "related_to"),
1074                // Cluster B - connected
1075                make_edge(b1, b2, "related_to"),
1076                // Weak link between clusters
1077                GraphEdge {
1078                    from: a3,
1079                    to: b1,
1080                    edge_type: "related_to".to_string(),
1081                    score: 0.1, // weak
1082                    confidence: 0.1,
1083                },
1084            ],
1085        };
1086
1087        let communities = graph.detect_communities(10);
1088        // Should detect at least the general structure
1089        assert!(!communities.is_empty());
1090        // Largest community should have at least 2 members
1091        assert!(communities[0].members.len() >= 2);
1092    }
1093}