Skip to main content

graphrag_core/graph/
hierarchical_relationships.rs

1//! Hierarchical Relationship Clustering (Phase 3.1)
2//!
3//! This module implements hierarchical clustering of relationships in the knowledge graph,
4//! creating multi-level summaries for efficient retrieval and reasoning.
5//!
6//! Key features:
7//! - Leiden community detection applied recursively to relationships
8//! - LLM-generated summaries for each cluster
9//! - Tree structure with parent-child relationships between levels
10//! - Enables query routing to relevant relationship clusters
11
12use crate::{
13    core::{KnowledgeGraph, Relationship, Result},
14    ollama::OllamaClient,
15};
16use petgraph::visit::EdgeRef;
17use serde::{Deserialize, Serialize};
18use std::collections::HashMap;
19
20/// Complete hierarchical structure of relationship clusters
21///
22/// Organizes relationships into multiple levels of granularity,
23/// from fine-grained (Level 0) to coarse-grained (higher levels).
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct RelationshipHierarchy {
26    /// Hierarchy levels, ordered from most detailed (0) to most abstract
27    pub levels: Vec<HierarchyLevel>,
28}
29
30impl RelationshipHierarchy {
31    /// Create a new empty hierarchy
32    pub fn new() -> Self {
33        Self { levels: Vec::new() }
34    }
35
36    /// Add a new level to the hierarchy
37    pub fn add_level(&mut self, level: HierarchyLevel) {
38        self.levels.push(level);
39    }
40
41    /// Get a specific level by ID
42    pub fn get_level(&self, level_id: usize) -> Option<&HierarchyLevel> {
43        self.levels.iter().find(|l| l.level_id == level_id)
44    }
45
46    /// Get the total number of levels
47    pub fn num_levels(&self) -> usize {
48        self.levels.len()
49    }
50
51    /// Find clusters containing a specific relationship
52    pub fn find_clusters_for_relationship(
53        &self,
54        rel_id: &str,
55    ) -> Vec<(&HierarchyLevel, &RelationshipCluster)> {
56        let mut results = Vec::new();
57
58        for level in &self.levels {
59            for cluster in &level.clusters {
60                if cluster.relationship_ids.contains(&rel_id.to_string()) {
61                    results.push((level, cluster));
62                }
63            }
64        }
65
66        results
67    }
68}
69
70impl Default for RelationshipHierarchy {
71    fn default() -> Self {
72        Self::new()
73    }
74}
75
76/// A single level in the hierarchy
77///
78/// Each level contains clusters of relationships at a specific granularity.
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct HierarchyLevel {
81    /// Unique identifier for this level (0 = most detailed)
82    pub level_id: usize,
83
84    /// Clusters at this level
85    pub clusters: Vec<RelationshipCluster>,
86
87    /// Resolution parameter used for clustering at this level
88    pub resolution: f32,
89}
90
91impl HierarchyLevel {
92    /// Create a new hierarchy level
93    pub fn new(level_id: usize, resolution: f32) -> Self {
94        Self {
95            level_id,
96            clusters: Vec::new(),
97            resolution,
98        }
99    }
100
101    /// Add a cluster to this level
102    pub fn add_cluster(&mut self, cluster: RelationshipCluster) {
103        self.clusters.push(cluster);
104    }
105
106    /// Get total number of relationships across all clusters
107    pub fn total_relationships(&self) -> usize {
108        self.clusters.iter().map(|c| c.relationship_ids.len()).sum()
109    }
110}
111
112/// A cluster of related relationships
113///
114/// Groups semantically or structurally similar relationships together
115/// with an LLM-generated summary.
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct RelationshipCluster {
118    /// Unique identifier for this cluster
119    pub cluster_id: String,
120
121    /// IDs of relationships in this cluster (format: "source_target_type")
122    pub relationship_ids: Vec<String>,
123
124    /// LLM-generated summary describing the cluster's theme
125    pub summary: String,
126
127    /// Parent cluster ID in the next level (None for top level)
128    pub parent_cluster: Option<String>,
129
130    /// Cohesion score indicating how tightly related the relationships are (0.0-1.0)
131    pub cohesion_score: f32,
132}
133
134impl RelationshipCluster {
135    /// Create a new relationship cluster
136    pub fn new(cluster_id: String) -> Self {
137        Self {
138            cluster_id,
139            relationship_ids: Vec::new(),
140            summary: String::new(),
141            parent_cluster: None,
142            cohesion_score: 0.0,
143        }
144    }
145
146    /// Add a relationship to this cluster
147    pub fn add_relationship(&mut self, rel_id: String) {
148        if !self.relationship_ids.contains(&rel_id) {
149            self.relationship_ids.push(rel_id);
150        }
151    }
152
153    /// Set the cluster summary
154    pub fn with_summary(mut self, summary: String) -> Self {
155        self.summary = summary;
156        self
157    }
158
159    /// Set the parent cluster
160    pub fn with_parent(mut self, parent_id: String) -> Self {
161        self.parent_cluster = Some(parent_id);
162        self
163    }
164
165    /// Set the cohesion score
166    pub fn with_cohesion(mut self, score: f32) -> Self {
167        self.cohesion_score = score.clamp(0.0, 1.0);
168        self
169    }
170
171    /// Check if cluster is empty
172    pub fn is_empty(&self) -> bool {
173        self.relationship_ids.is_empty()
174    }
175
176    /// Get number of relationships in cluster
177    pub fn size(&self) -> usize {
178        self.relationship_ids.len()
179    }
180}
181
182/// Builder for hierarchical relationship clustering
183pub struct HierarchyBuilder {
184    /// Relationships to cluster
185    relationships: Vec<Relationship>,
186
187    /// Ollama client for generating summaries
188    ollama_client: Option<OllamaClient>,
189
190    /// Number of levels to create
191    num_levels: usize,
192
193    /// Resolution parameters for each level (higher = more clusters)
194    resolutions: Vec<f32>,
195
196    /// Minimum cluster size to keep
197    min_cluster_size: usize,
198}
199
200impl HierarchyBuilder {
201    /// Create a new hierarchy builder
202    ///
203    /// # Arguments
204    ///
205    /// * `relationships` - Relationships to cluster
206    pub fn new(relationships: Vec<Relationship>) -> Self {
207        Self {
208            relationships,
209            ollama_client: None,
210            num_levels: 3,
211            resolutions: vec![1.0, 0.5, 0.2], // High to low resolution
212            min_cluster_size: 2,
213        }
214    }
215
216    /// Create a builder from a knowledge graph
217    pub fn from_graph(graph: &KnowledgeGraph) -> Self {
218        Self::new(graph.get_all_relationships().into_iter().cloned().collect())
219    }
220
221    /// Set Ollama client for summary generation
222    pub fn with_ollama_client(mut self, client: OllamaClient) -> Self {
223        self.ollama_client = Some(client);
224        self
225    }
226
227    /// Set number of hierarchy levels
228    pub fn with_num_levels(mut self, num_levels: usize) -> Self {
229        self.num_levels = num_levels;
230        self
231    }
232
233    /// Set resolution parameters for clustering
234    pub fn with_resolutions(mut self, resolutions: Vec<f32>) -> Self {
235        self.resolutions = resolutions;
236        self
237    }
238
239    /// Set minimum cluster size
240    pub fn with_min_cluster_size(mut self, size: usize) -> Self {
241        self.min_cluster_size = size;
242        self
243    }
244
245    /// Build the hierarchical relationship structure
246    ///
247    /// # Returns
248    ///
249    /// RelationshipHierarchy with multiple levels of clusters
250    #[cfg(feature = "async")]
251    pub async fn build(self) -> Result<RelationshipHierarchy> {
252        let mut hierarchy = RelationshipHierarchy::new();
253
254        // Build each level from fine to coarse
255        for (level_id, resolution) in self.resolutions.iter().enumerate().take(self.num_levels) {
256            #[cfg(feature = "tracing")]
257            tracing::info!(
258                level_id = level_id,
259                resolution = resolution,
260                "Building hierarchy level"
261            );
262
263            let level = self.build_level(level_id, *resolution, &hierarchy).await?;
264            hierarchy.add_level(level);
265        }
266
267        Ok(hierarchy)
268    }
269
270    /// Build a single level of the hierarchy
271    #[cfg(feature = "async")]
272    async fn build_level(
273        &self,
274        level_id: usize,
275        resolution: f32,
276        _existing_hierarchy: &RelationshipHierarchy,
277    ) -> Result<HierarchyLevel> {
278        let mut level = HierarchyLevel::new(level_id, resolution);
279
280        if self.relationships.is_empty() {
281            return Ok(level);
282        }
283
284        // Create relationship graph for clustering
285        // Each relationship becomes a node, edges between similar relationships
286        let rel_graph = self.build_relationship_graph(&self.relationships);
287
288        // Apply Leiden clustering
289        let communities = self.cluster_relationships(&rel_graph, resolution)?;
290
291        // Create clusters from communities
292        for (community_id, rel_indices) in communities {
293            let cluster_id = format!("L{}C{}", level_id, community_id);
294            let mut cluster = RelationshipCluster::new(cluster_id.clone());
295
296            // Add relationships to cluster
297            for idx in rel_indices {
298                if let Some(rel) = self.relationships.get(idx) {
299                    let rel_id = format!("{}_{}_{}", rel.source.0, rel.target.0, rel.relation_type);
300                    cluster.add_relationship(rel_id);
301                }
302            }
303
304            // Skip if cluster is too small
305            if cluster.size() < self.min_cluster_size {
306                continue;
307            }
308
309            // Generate summary if Ollama client available
310            if let Some(ref ollama_client) = self.ollama_client {
311                let summary = self
312                    .generate_cluster_summary(&cluster, &self.relationships, ollama_client)
313                    .await?;
314                cluster.summary = summary;
315            } else {
316                cluster.summary = format!(
317                    "Cluster {} with {} relationships",
318                    cluster_id,
319                    cluster.size()
320                );
321            }
322
323            // Calculate cohesion score
324            cluster.cohesion_score = self.calculate_cohesion(&cluster, &rel_graph);
325
326            level.add_cluster(cluster);
327        }
328
329        #[cfg(feature = "tracing")]
330        tracing::info!(
331            level_id = level_id,
332            num_clusters = level.clusters.len(),
333            total_relationships = level.total_relationships(),
334            "Completed hierarchy level"
335        );
336
337        Ok(level)
338    }
339
340    /// Build a graph where relationships are nodes
341    fn build_relationship_graph(
342        &self,
343        relationships: &[Relationship],
344    ) -> petgraph::Graph<usize, f32> {
345        use petgraph::Graph;
346
347        let mut graph = Graph::new();
348        let mut nodes = Vec::new();
349
350        // Create nodes
351        for (idx, _rel) in relationships.iter().enumerate() {
352            nodes.push(graph.add_node(idx));
353        }
354
355        // Add edges between similar relationships
356        for i in 0..relationships.len() {
357            for j in (i + 1)..relationships.len() {
358                let similarity = self.relationship_similarity(&relationships[i], &relationships[j]);
359
360                // Only connect relationships with sufficient similarity
361                if similarity > 0.3 {
362                    graph.add_edge(nodes[i], nodes[j], similarity);
363                }
364            }
365        }
366
367        graph
368    }
369
370    /// Calculate similarity between two relationships
371    fn relationship_similarity(&self, rel1: &Relationship, rel2: &Relationship) -> f32 {
372        let mut similarity = 0.0;
373
374        // Same relation type (strong signal)
375        if rel1.relation_type == rel2.relation_type {
376            similarity += 0.5;
377        }
378
379        // Share source or target entity
380        if rel1.source == rel2.source || rel1.target == rel2.target {
381            similarity += 0.3;
382        }
383
384        // Temporal proximity (if both have temporal info)
385        if let (Some(range1), Some(range2)) = (&rel1.temporal_range, &rel2.temporal_range) {
386            let overlap = self.temporal_overlap(range1, range2);
387            similarity += overlap * 0.2;
388        }
389
390        similarity.min(1.0)
391    }
392
393    /// Calculate temporal overlap between two ranges
394    fn temporal_overlap(
395        &self,
396        range1: &crate::graph::temporal::TemporalRange,
397        range2: &crate::graph::temporal::TemporalRange,
398    ) -> f32 {
399        let start = range1.start.max(range2.start);
400        let end = range1.end.min(range2.end);
401
402        if start < end {
403            let overlap = (end - start) as f32;
404            let total = ((range1.end - range1.start) + (range2.end - range2.start)) as f32 / 2.0;
405            (overlap / total.max(1.0)).min(1.0)
406        } else {
407            0.0
408        }
409    }
410
411    /// Cluster relationships using Leiden algorithm (when enabled) or SCC fallback
412    fn cluster_relationships(
413        &self,
414        graph: &petgraph::Graph<usize, f32>,
415        resolution: f32,
416    ) -> Result<HashMap<usize, Vec<usize>>> {
417        #[cfg(feature = "leiden")]
418        {
419            // Use proper Leiden algorithm for community detection
420            self.cluster_with_leiden(graph, resolution)
421        }
422        #[cfg(not(feature = "leiden"))]
423        {
424            // Fallback to simple connected components when Leiden feature is disabled
425            self.cluster_with_scc(graph, resolution)
426        }
427    }
428
429    /// Cluster using Leiden algorithm (when leiden feature is enabled)
430    #[cfg(feature = "leiden")]
431    fn cluster_with_leiden(
432        &self,
433        graph: &petgraph::Graph<usize, f32>,
434        resolution: f32,
435    ) -> Result<HashMap<usize, Vec<usize>>> {
436        use crate::graph::leiden::{LeidenCommunityDetector, LeidenConfig};
437        use petgraph::{Graph, Undirected};
438
439        // Convert relationship ID graph to string-labeled graph for Leiden
440        let mut leiden_graph: Graph<String, f32, Undirected> = Graph::new_undirected();
441        let mut node_mapping = HashMap::new(); // Maps relationship ID -> NodeIndex in leiden_graph
442        let mut reverse_mapping = HashMap::new(); // Maps NodeIndex -> relationship ID
443
444        // Add all nodes (relationship IDs) to the Leiden graph
445        for node_idx in graph.node_indices() {
446            if let Some(&rel_id) = graph.node_weight(node_idx) {
447                let leiden_node = leiden_graph.add_node(rel_id.to_string());
448                node_mapping.insert(rel_id, leiden_node);
449                reverse_mapping.insert(leiden_node, rel_id);
450            }
451        }
452
453        // Add all edges with weights
454        for edge in graph.edge_references() {
455            if let (Some(&src_rel_id), Some(&tgt_rel_id)) = (
456                graph.node_weight(edge.source()),
457                graph.node_weight(edge.target()),
458            ) {
459                if let (Some(&src_node), Some(&tgt_node)) =
460                    (node_mapping.get(&src_rel_id), node_mapping.get(&tgt_rel_id))
461                {
462                    leiden_graph.add_edge(src_node, tgt_node, *edge.weight());
463                }
464            }
465        }
466
467        // Configure and run Leiden
468        let config = LeidenConfig {
469            resolution,
470            max_cluster_size: 50, // Allow larger clusters for relationship graphs
471            use_lcc: true,
472            seed: Some(42), // Reproducible results
473            max_levels: 1,  // Single-level clustering for now
474            min_improvement: 0.001,
475        };
476
477        let detector = LeidenCommunityDetector::new(config);
478        let result = detector.detect_communities(&leiden_graph)?;
479
480        // Extract communities from level 0
481        let mut communities = HashMap::new();
482        if let Some(level_0) = result.levels.get(&0) {
483            // Group nodes by community ID
484            let mut temp_communities: HashMap<usize, Vec<usize>> = HashMap::new();
485            for (node_idx, &community_id) in level_0 {
486                if let Some(&rel_id) = reverse_mapping.get(node_idx) {
487                    temp_communities
488                        .entry(community_id)
489                        .or_insert_with(Vec::new)
490                        .push(rel_id);
491                }
492            }
493            communities = temp_communities;
494        }
495
496        Ok(communities)
497    }
498
499    /// Fallback clustering using strongly connected components
500    #[cfg(not(feature = "leiden"))]
501    fn cluster_with_scc(
502        &self,
503        graph: &petgraph::Graph<usize, f32>,
504        resolution: f32,
505    ) -> Result<HashMap<usize, Vec<usize>>> {
506        use petgraph::algo::kosaraju_scc;
507
508        let components = kosaraju_scc(&graph);
509
510        let mut communities = HashMap::new();
511        for (community_id, component) in components.into_iter().enumerate() {
512            let node_indices: Vec<usize> = component
513                .into_iter()
514                .filter_map(|node_idx| graph.node_weight(node_idx).copied())
515                .collect();
516
517            if !node_indices.is_empty() {
518                communities.insert(community_id, node_indices);
519            }
520        }
521
522        // Adjust for resolution by merging/splitting if needed
523        // For now, use as-is
524        let _ = resolution; // Acknowledge parameter
525
526        Ok(communities)
527    }
528
529    /// Generate LLM summary for a relationship cluster
530    #[cfg(feature = "async")]
531    async fn generate_cluster_summary(
532        &self,
533        cluster: &RelationshipCluster,
534        _all_relationships: &[Relationship],
535        ollama_client: &OllamaClient,
536    ) -> Result<String> {
537        // Collect relationship descriptions
538        let mut rel_descriptions = Vec::new();
539
540        for rel_id in &cluster.relationship_ids {
541            // Parse rel_id format: "source_target_type"
542            let parts: Vec<&str> = rel_id.split('_').collect();
543            if parts.len() >= 3 {
544                let rel_type = parts[2..].join("_");
545                rel_descriptions.push(format!("{} --[{}]--> {}", parts[0], rel_type, parts[1]));
546            }
547        }
548
549        // Limit to first 10 relationships for summary
550        let sample: Vec<_> = rel_descriptions.iter().take(10).cloned().collect();
551        let total = rel_descriptions.len();
552
553        let prompt = format!(
554            r#"Summarize the theme of these {} relationships in 1-2 sentences:
555
556{}
557
558Theme:"#,
559            total,
560            sample.join("\n")
561        );
562
563        match ollama_client.generate(&prompt).await {
564            Ok(summary) => Ok(summary.trim().to_string()),
565            Err(e) => {
566                #[cfg(feature = "tracing")]
567                tracing::warn!(
568                    error = %e,
569                    cluster_id = %cluster.cluster_id,
570                    "Failed to generate cluster summary, using fallback"
571                );
572
573                Ok(format!("Cluster of {} relationships", total))
574            },
575        }
576    }
577
578    /// Calculate cohesion score for a cluster
579    ///
580    /// Uses internal edge density to measure how tightly connected
581    /// relationships within the cluster are.
582    ///
583    /// Cohesion = (internal edges) / (possible edges)
584    ///
585    /// Falls back to size-based heuristic if graph metrics unavailable.
586    fn calculate_cohesion(
587        &self,
588        cluster: &RelationshipCluster,
589        rel_graph: &petgraph::Graph<usize, f32>,
590    ) -> f32 {
591        let size = cluster.size();
592
593        // Handle edge cases
594        if size == 0 {
595            return 0.0;
596        }
597        if size == 1 {
598            return 1.0; // Single item is perfectly cohesive
599        }
600
601        // Calculate internal edge density
602        // Count edges between nodes that belong to this cluster
603        let mut internal_edges = 0;
604
605        // Map relationship IDs to their indices for lookup
606        let cluster_rel_ids: std::collections::HashSet<_> =
607            cluster.relationship_ids.iter().cloned().collect();
608
609        // Count edges within the cluster in the relationship graph
610        for node_idx in rel_graph.node_indices() {
611            if let Some(&rel_idx) = rel_graph.node_weight(node_idx) {
612                // Check if this relationship belongs to our cluster
613                if let Some(rel) = self.relationships.get(rel_idx) {
614                    let rel_id = format!("{}_{}_{}", rel.source.0, rel.target.0, rel.relation_type);
615
616                    if cluster_rel_ids.contains(&rel_id) {
617                        // Count outgoing edges to other cluster members
618                        for edge in rel_graph.edges(node_idx) {
619                            if let Some(&target_rel_idx) = rel_graph.node_weight(edge.target()) {
620                                if let Some(target_rel) = self.relationships.get(target_rel_idx) {
621                                    let target_rel_id = format!(
622                                        "{}_{}_{}",
623                                        target_rel.source.0,
624                                        target_rel.target.0,
625                                        target_rel.relation_type
626                                    );
627
628                                    if cluster_rel_ids.contains(&target_rel_id) {
629                                        internal_edges += 1;
630                                    }
631                                }
632                            }
633                        }
634                    }
635                }
636            }
637        }
638
639        // Avoid double-counting (undirected graph assumption)
640        internal_edges /= 2;
641
642        // Calculate maximum possible edges in a complete graph: n*(n-1)/2
643        let max_possible_edges = (size * (size - 1)) / 2;
644
645        if max_possible_edges == 0 {
646            // Fallback to size-based heuristic
647            return (size as f32 / (size as f32 + 10.0)).min(1.0);
648        }
649
650        // Edge density as cohesion score
651        let density = internal_edges as f32 / max_possible_edges as f32;
652
653        // Apply sigmoid-like transformation to make scores more meaningful
654        // Low density clusters still get some credit (0.2-0.8 range)
655        0.2 + (density * 0.6)
656    }
657}
658
659#[cfg(test)]
660mod tests {
661    use super::*;
662    use crate::core::{Entity, EntityId};
663
664    fn create_test_graph() -> KnowledgeGraph {
665        let mut graph = KnowledgeGraph::new();
666
667        // Create test entities
668        for i in 0..5 {
669            let entity = Entity::new(
670                EntityId::new(format!("entity{}", i)),
671                format!("Entity {}", i),
672                "CONCEPT".to_string(),
673                0.9,
674            );
675            graph.add_entity(entity).unwrap();
676        }
677
678        // Create test relationships
679        let relationships = vec![
680            ("entity0", "entity1", "RELATED_TO"),
681            ("entity0", "entity2", "RELATED_TO"),
682            ("entity1", "entity2", "RELATED_TO"),
683            ("entity3", "entity4", "CAUSED"),
684            ("entity3", "entity4", "ENABLED"),
685        ];
686
687        for (src, tgt, rel_type) in relationships {
688            let rel = Relationship::new(
689                EntityId::new(src.to_string()),
690                EntityId::new(tgt.to_string()),
691                rel_type.to_string(),
692                0.8,
693            );
694            graph.add_relationship(rel).unwrap();
695        }
696
697        graph
698    }
699
700    #[test]
701    fn test_relationship_cluster_creation() {
702        let mut cluster = RelationshipCluster::new("test_cluster".to_string());
703        cluster.add_relationship("rel1".to_string());
704        cluster.add_relationship("rel2".to_string());
705
706        assert_eq!(cluster.size(), 2);
707        assert!(!cluster.is_empty());
708        assert_eq!(cluster.cluster_id, "test_cluster");
709    }
710
711    #[test]
712    fn test_hierarchy_level_creation() {
713        let mut level = HierarchyLevel::new(0, 1.0);
714        let cluster = RelationshipCluster::new("cluster1".to_string());
715        level.add_cluster(cluster);
716
717        assert_eq!(level.level_id, 0);
718        assert_eq!(level.clusters.len(), 1);
719    }
720
721    #[test]
722    fn test_relationship_hierarchy_structure() {
723        let mut hierarchy = RelationshipHierarchy::new();
724
725        let mut level0 = HierarchyLevel::new(0, 1.0);
726        let mut cluster = RelationshipCluster::new("L0C0".to_string());
727        cluster.add_relationship("rel1".to_string());
728        level0.add_cluster(cluster);
729
730        hierarchy.add_level(level0);
731
732        assert_eq!(hierarchy.num_levels(), 1);
733        assert!(hierarchy.get_level(0).is_some());
734    }
735
736    #[test]
737    fn test_hierarchy_builder_initialization() {
738        let graph = create_test_graph();
739        let builder = HierarchyBuilder::from_graph(&graph)
740            .with_num_levels(3)
741            .with_min_cluster_size(2);
742
743        assert_eq!(builder.num_levels, 3);
744        assert_eq!(builder.min_cluster_size, 2);
745    }
746
747    #[test]
748    fn test_cohesion_calculation() {
749        // Test cohesion with a properly structured relationship graph
750        // This tests the edge density calculation
751
752        let rel0 = Relationship::new(
753            EntityId::new("e0".to_string()),
754            EntityId::new("e1".to_string()),
755            "REL".to_string(),
756            0.8,
757        );
758        let rel1 = Relationship::new(
759            EntityId::new("e1".to_string()),
760            EntityId::new("e2".to_string()),
761            "REL".to_string(),
762            0.8,
763        );
764        let rel2 = Relationship::new(
765            EntityId::new("e2".to_string()),
766            EntityId::new("e0".to_string()),
767            "REL".to_string(),
768            0.8,
769        );
770
771        let relationships = vec![rel0.clone(), rel1.clone(), rel2.clone()];
772        let builder = HierarchyBuilder::new(relationships);
773
774        // Create a relationship graph where all 3 relationships are connected
775        let mut rel_graph = petgraph::Graph::<usize, f32>::new();
776        let n0 = rel_graph.add_node(0); // rel0
777        let n1 = rel_graph.add_node(1); // rel1
778        let n2 = rel_graph.add_node(2); // rel2
779
780        // Fully connected: each relationship connects to the others
781        rel_graph.add_edge(n0, n1, 1.0);
782        rel_graph.add_edge(n1, n0, 1.0);
783        rel_graph.add_edge(n1, n2, 1.0);
784        rel_graph.add_edge(n2, n1, 1.0);
785        rel_graph.add_edge(n2, n0, 1.0);
786        rel_graph.add_edge(n0, n2, 1.0);
787
788        // Create cluster with all 3 relationships
789        let mut cluster = RelationshipCluster::new("test_cluster".to_string());
790        cluster.add_relationship("e0_e1_REL".to_string());
791        cluster.add_relationship("e1_e2_REL".to_string());
792        cluster.add_relationship("e2_e0_REL".to_string());
793
794        let cohesion = builder.calculate_cohesion(&cluster, &rel_graph);
795
796        // Fully connected triangle should have high cohesion
797        // Exact value depends on implementation but should be > 0.5
798        assert!(
799            cohesion > 0.5,
800            "Expected high cohesion for fully connected cluster, got {}",
801            cohesion
802        );
803    }
804
805    #[test]
806    fn test_cohesion_single_relationship() {
807        let rel_graph = petgraph::Graph::<usize, f32>::new();
808
809        let rel0 = Relationship::new(
810            EntityId::new("e0".to_string()),
811            EntityId::new("e1".to_string()),
812            "REL".to_string(),
813            0.8,
814        );
815
816        let relationships = vec![rel0];
817        let builder = HierarchyBuilder::new(relationships);
818
819        let mut cluster = RelationshipCluster::new("test_cluster".to_string());
820        cluster.add_relationship("e0_e1_REL".to_string());
821
822        let cohesion = builder.calculate_cohesion(&cluster, &rel_graph);
823
824        // Single relationship should have perfect cohesion
825        assert_eq!(
826            cohesion, 1.0,
827            "Single relationship should have cohesion 1.0"
828        );
829    }
830
831    #[test]
832    #[cfg(feature = "leiden")]
833    fn test_leiden_integration() {
834        // Test that Leiden algorithm is used for clustering when feature is enabled
835        // Creates a graph with clear community structure
836
837        use petgraph::Graph;
838
839        let rel0 = Relationship::new(
840            EntityId::new("A".to_string()),
841            EntityId::new("B".to_string()),
842            "CONNECTED".to_string(),
843            0.9,
844        );
845        let rel1 = Relationship::new(
846            EntityId::new("B".to_string()),
847            EntityId::new("C".to_string()),
848            "CONNECTED".to_string(),
849            0.9,
850        );
851        let rel2 = Relationship::new(
852            EntityId::new("C".to_string()),
853            EntityId::new("A".to_string()),
854            "CONNECTED".to_string(),
855            0.9,
856        );
857        // Separate cluster
858        let rel3 = Relationship::new(
859            EntityId::new("X".to_string()),
860            EntityId::new("Y".to_string()),
861            "CONNECTED".to_string(),
862            0.9,
863        );
864        let rel4 = Relationship::new(
865            EntityId::new("Y".to_string()),
866            EntityId::new("Z".to_string()),
867            "CONNECTED".to_string(),
868            0.9,
869        );
870
871        let relationships = vec![
872            rel0.clone(),
873            rel1.clone(),
874            rel2.clone(),
875            rel3.clone(),
876            rel4.clone(),
877        ];
878        let builder = HierarchyBuilder::new(relationships);
879
880        // Build relationship graph with two clear communities
881        // Community 1: rel0-rel1-rel2 (triangle ABC)
882        // Community 2: rel3-rel4 (line XYZ)
883        let mut rel_graph = Graph::<usize, f32>::new();
884
885        // Add nodes for each relationship
886        let n0 = rel_graph.add_node(0); // A-B
887        let n1 = rel_graph.add_node(1); // B-C
888        let n2 = rel_graph.add_node(2); // C-A
889        let n3 = rel_graph.add_node(3); // X-Y
890        let n4 = rel_graph.add_node(4); // Y-Z
891
892        // Community 1 (fully connected triangle)
893        rel_graph.add_edge(n0, n1, 1.0); // A-B connects to B-C via B
894        rel_graph.add_edge(n1, n2, 1.0); // B-C connects to C-A via C
895        rel_graph.add_edge(n2, n0, 1.0); // C-A connects to A-B via A
896
897        // Community 2 (line)
898        rel_graph.add_edge(n3, n4, 1.0); // X-Y connects to Y-Z via Y
899
900        // Test clustering with Leiden
901        let communities = builder.cluster_relationships(&rel_graph, 1.0).unwrap();
902
903        // Should detect 2 communities
904        assert!(
905            communities.len() >= 2,
906            "Leiden should detect at least 2 communities, found {}",
907            communities.len()
908        );
909
910        // Verify that relationships 0,1,2 are in one community
911        // and relationships 3,4 are in another (or similar grouping)
912        let community_sizes: Vec<usize> = communities.values().map(|c| c.len()).collect();
913        assert!(
914            community_sizes.contains(&3) || community_sizes.contains(&2),
915            "Expected communities of size 3 or 2, got {:?}",
916            community_sizes
917        );
918    }
919}