1use crate::{
13 core::{KnowledgeGraph, Relationship, Result},
14 ollama::OllamaClient,
15};
16use petgraph::visit::EdgeRef;
17use serde::{Deserialize, Serialize};
18use std::collections::HashMap;
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct RelationshipHierarchy {
26 pub levels: Vec<HierarchyLevel>,
28}
29
30impl RelationshipHierarchy {
31 pub fn new() -> Self {
33 Self { levels: Vec::new() }
34 }
35
36 pub fn add_level(&mut self, level: HierarchyLevel) {
38 self.levels.push(level);
39 }
40
41 pub fn get_level(&self, level_id: usize) -> Option<&HierarchyLevel> {
43 self.levels.iter().find(|l| l.level_id == level_id)
44 }
45
46 pub fn num_levels(&self) -> usize {
48 self.levels.len()
49 }
50
51 pub fn find_clusters_for_relationship(
53 &self,
54 rel_id: &str,
55 ) -> Vec<(&HierarchyLevel, &RelationshipCluster)> {
56 let mut results = Vec::new();
57
58 for level in &self.levels {
59 for cluster in &level.clusters {
60 if cluster.relationship_ids.contains(&rel_id.to_string()) {
61 results.push((level, cluster));
62 }
63 }
64 }
65
66 results
67 }
68}
69
70impl Default for RelationshipHierarchy {
71 fn default() -> Self {
72 Self::new()
73 }
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct HierarchyLevel {
81 pub level_id: usize,
83
84 pub clusters: Vec<RelationshipCluster>,
86
87 pub resolution: f32,
89}
90
91impl HierarchyLevel {
92 pub fn new(level_id: usize, resolution: f32) -> Self {
94 Self {
95 level_id,
96 clusters: Vec::new(),
97 resolution,
98 }
99 }
100
101 pub fn add_cluster(&mut self, cluster: RelationshipCluster) {
103 self.clusters.push(cluster);
104 }
105
106 pub fn total_relationships(&self) -> usize {
108 self.clusters.iter().map(|c| c.relationship_ids.len()).sum()
109 }
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct RelationshipCluster {
118 pub cluster_id: String,
120
121 pub relationship_ids: Vec<String>,
123
124 pub summary: String,
126
127 pub parent_cluster: Option<String>,
129
130 pub cohesion_score: f32,
132}
133
134impl RelationshipCluster {
135 pub fn new(cluster_id: String) -> Self {
137 Self {
138 cluster_id,
139 relationship_ids: Vec::new(),
140 summary: String::new(),
141 parent_cluster: None,
142 cohesion_score: 0.0,
143 }
144 }
145
146 pub fn add_relationship(&mut self, rel_id: String) {
148 if !self.relationship_ids.contains(&rel_id) {
149 self.relationship_ids.push(rel_id);
150 }
151 }
152
153 pub fn with_summary(mut self, summary: String) -> Self {
155 self.summary = summary;
156 self
157 }
158
159 pub fn with_parent(mut self, parent_id: String) -> Self {
161 self.parent_cluster = Some(parent_id);
162 self
163 }
164
165 pub fn with_cohesion(mut self, score: f32) -> Self {
167 self.cohesion_score = score.clamp(0.0, 1.0);
168 self
169 }
170
171 pub fn is_empty(&self) -> bool {
173 self.relationship_ids.is_empty()
174 }
175
176 pub fn size(&self) -> usize {
178 self.relationship_ids.len()
179 }
180}
181
182pub struct HierarchyBuilder {
184 relationships: Vec<Relationship>,
186
187 ollama_client: Option<OllamaClient>,
189
190 num_levels: usize,
192
193 resolutions: Vec<f32>,
195
196 min_cluster_size: usize,
198}
199
200impl HierarchyBuilder {
201 pub fn new(relationships: Vec<Relationship>) -> Self {
207 Self {
208 relationships,
209 ollama_client: None,
210 num_levels: 3,
211 resolutions: vec![1.0, 0.5, 0.2], min_cluster_size: 2,
213 }
214 }
215
216 pub fn from_graph(graph: &KnowledgeGraph) -> Self {
218 Self::new(graph.get_all_relationships().into_iter().cloned().collect())
219 }
220
221 pub fn with_ollama_client(mut self, client: OllamaClient) -> Self {
223 self.ollama_client = Some(client);
224 self
225 }
226
227 pub fn with_num_levels(mut self, num_levels: usize) -> Self {
229 self.num_levels = num_levels;
230 self
231 }
232
233 pub fn with_resolutions(mut self, resolutions: Vec<f32>) -> Self {
235 self.resolutions = resolutions;
236 self
237 }
238
239 pub fn with_min_cluster_size(mut self, size: usize) -> Self {
241 self.min_cluster_size = size;
242 self
243 }
244
245 #[cfg(feature = "async")]
251 pub async fn build(self) -> Result<RelationshipHierarchy> {
252 let mut hierarchy = RelationshipHierarchy::new();
253
254 for (level_id, resolution) in self.resolutions.iter().enumerate().take(self.num_levels) {
256 #[cfg(feature = "tracing")]
257 tracing::info!(
258 level_id = level_id,
259 resolution = resolution,
260 "Building hierarchy level"
261 );
262
263 let level = self.build_level(level_id, *resolution, &hierarchy).await?;
264 hierarchy.add_level(level);
265 }
266
267 Ok(hierarchy)
268 }
269
270 #[cfg(feature = "async")]
272 async fn build_level(
273 &self,
274 level_id: usize,
275 resolution: f32,
276 _existing_hierarchy: &RelationshipHierarchy,
277 ) -> Result<HierarchyLevel> {
278 let mut level = HierarchyLevel::new(level_id, resolution);
279
280 if self.relationships.is_empty() {
281 return Ok(level);
282 }
283
284 let rel_graph = self.build_relationship_graph(&self.relationships);
287
288 let communities = self.cluster_relationships(&rel_graph, resolution)?;
290
291 for (community_id, rel_indices) in communities {
293 let cluster_id = format!("L{}C{}", level_id, community_id);
294 let mut cluster = RelationshipCluster::new(cluster_id.clone());
295
296 for idx in rel_indices {
298 if let Some(rel) = self.relationships.get(idx) {
299 let rel_id = format!("{}_{}_{}", rel.source.0, rel.target.0, rel.relation_type);
300 cluster.add_relationship(rel_id);
301 }
302 }
303
304 if cluster.size() < self.min_cluster_size {
306 continue;
307 }
308
309 if let Some(ref ollama_client) = self.ollama_client {
311 let summary = self
312 .generate_cluster_summary(&cluster, &self.relationships, ollama_client)
313 .await?;
314 cluster.summary = summary;
315 } else {
316 cluster.summary = format!(
317 "Cluster {} with {} relationships",
318 cluster_id,
319 cluster.size()
320 );
321 }
322
323 cluster.cohesion_score = self.calculate_cohesion(&cluster, &rel_graph);
325
326 level.add_cluster(cluster);
327 }
328
329 #[cfg(feature = "tracing")]
330 tracing::info!(
331 level_id = level_id,
332 num_clusters = level.clusters.len(),
333 total_relationships = level.total_relationships(),
334 "Completed hierarchy level"
335 );
336
337 Ok(level)
338 }
339
340 fn build_relationship_graph(
342 &self,
343 relationships: &[Relationship],
344 ) -> petgraph::Graph<usize, f32> {
345 use petgraph::Graph;
346
347 let mut graph = Graph::new();
348 let mut nodes = Vec::new();
349
350 for (idx, _rel) in relationships.iter().enumerate() {
352 nodes.push(graph.add_node(idx));
353 }
354
355 for i in 0..relationships.len() {
357 for j in (i + 1)..relationships.len() {
358 let similarity = self.relationship_similarity(&relationships[i], &relationships[j]);
359
360 if similarity > 0.3 {
362 graph.add_edge(nodes[i], nodes[j], similarity);
363 }
364 }
365 }
366
367 graph
368 }
369
370 fn relationship_similarity(&self, rel1: &Relationship, rel2: &Relationship) -> f32 {
372 let mut similarity = 0.0;
373
374 if rel1.relation_type == rel2.relation_type {
376 similarity += 0.5;
377 }
378
379 if rel1.source == rel2.source || rel1.target == rel2.target {
381 similarity += 0.3;
382 }
383
384 if let (Some(range1), Some(range2)) = (&rel1.temporal_range, &rel2.temporal_range) {
386 let overlap = self.temporal_overlap(range1, range2);
387 similarity += overlap * 0.2;
388 }
389
390 similarity.min(1.0)
391 }
392
393 fn temporal_overlap(
395 &self,
396 range1: &crate::graph::temporal::TemporalRange,
397 range2: &crate::graph::temporal::TemporalRange,
398 ) -> f32 {
399 let start = range1.start.max(range2.start);
400 let end = range1.end.min(range2.end);
401
402 if start < end {
403 let overlap = (end - start) as f32;
404 let total = ((range1.end - range1.start) + (range2.end - range2.start)) as f32 / 2.0;
405 (overlap / total.max(1.0)).min(1.0)
406 } else {
407 0.0
408 }
409 }
410
411 fn cluster_relationships(
413 &self,
414 graph: &petgraph::Graph<usize, f32>,
415 resolution: f32,
416 ) -> Result<HashMap<usize, Vec<usize>>> {
417 #[cfg(feature = "leiden")]
418 {
419 self.cluster_with_leiden(graph, resolution)
421 }
422 #[cfg(not(feature = "leiden"))]
423 {
424 self.cluster_with_scc(graph, resolution)
426 }
427 }
428
429 #[cfg(feature = "leiden")]
431 fn cluster_with_leiden(
432 &self,
433 graph: &petgraph::Graph<usize, f32>,
434 resolution: f32,
435 ) -> Result<HashMap<usize, Vec<usize>>> {
436 use crate::graph::leiden::{LeidenCommunityDetector, LeidenConfig};
437 use petgraph::{Graph, Undirected};
438
439 let mut leiden_graph: Graph<String, f32, Undirected> = Graph::new_undirected();
441 let mut node_mapping = HashMap::new(); let mut reverse_mapping = HashMap::new(); for node_idx in graph.node_indices() {
446 if let Some(&rel_id) = graph.node_weight(node_idx) {
447 let leiden_node = leiden_graph.add_node(rel_id.to_string());
448 node_mapping.insert(rel_id, leiden_node);
449 reverse_mapping.insert(leiden_node, rel_id);
450 }
451 }
452
453 for edge in graph.edge_references() {
455 if let (Some(&src_rel_id), Some(&tgt_rel_id)) = (
456 graph.node_weight(edge.source()),
457 graph.node_weight(edge.target()),
458 ) {
459 if let (Some(&src_node), Some(&tgt_node)) =
460 (node_mapping.get(&src_rel_id), node_mapping.get(&tgt_rel_id))
461 {
462 leiden_graph.add_edge(src_node, tgt_node, *edge.weight());
463 }
464 }
465 }
466
467 let config = LeidenConfig {
469 resolution,
470 max_cluster_size: 50, use_lcc: true,
472 seed: Some(42), max_levels: 1, min_improvement: 0.001,
475 };
476
477 let detector = LeidenCommunityDetector::new(config);
478 let result = detector.detect_communities(&leiden_graph)?;
479
480 let mut communities = HashMap::new();
482 if let Some(level_0) = result.levels.get(&0) {
483 let mut temp_communities: HashMap<usize, Vec<usize>> = HashMap::new();
485 for (node_idx, &community_id) in level_0 {
486 if let Some(&rel_id) = reverse_mapping.get(node_idx) {
487 temp_communities
488 .entry(community_id)
489 .or_insert_with(Vec::new)
490 .push(rel_id);
491 }
492 }
493 communities = temp_communities;
494 }
495
496 Ok(communities)
497 }
498
499 #[cfg(not(feature = "leiden"))]
501 fn cluster_with_scc(
502 &self,
503 graph: &petgraph::Graph<usize, f32>,
504 resolution: f32,
505 ) -> Result<HashMap<usize, Vec<usize>>> {
506 use petgraph::algo::kosaraju_scc;
507
508 let components = kosaraju_scc(&graph);
509
510 let mut communities = HashMap::new();
511 for (community_id, component) in components.into_iter().enumerate() {
512 let node_indices: Vec<usize> = component
513 .into_iter()
514 .filter_map(|node_idx| graph.node_weight(node_idx).copied())
515 .collect();
516
517 if !node_indices.is_empty() {
518 communities.insert(community_id, node_indices);
519 }
520 }
521
522 let _ = resolution; Ok(communities)
527 }
528
529 #[cfg(feature = "async")]
531 async fn generate_cluster_summary(
532 &self,
533 cluster: &RelationshipCluster,
534 _all_relationships: &[Relationship],
535 ollama_client: &OllamaClient,
536 ) -> Result<String> {
537 let mut rel_descriptions = Vec::new();
539
540 for rel_id in &cluster.relationship_ids {
541 let parts: Vec<&str> = rel_id.split('_').collect();
543 if parts.len() >= 3 {
544 let rel_type = parts[2..].join("_");
545 rel_descriptions.push(format!("{} --[{}]--> {}", parts[0], rel_type, parts[1]));
546 }
547 }
548
549 let sample: Vec<_> = rel_descriptions.iter().take(10).cloned().collect();
551 let total = rel_descriptions.len();
552
553 let prompt = format!(
554 r#"Summarize the theme of these {} relationships in 1-2 sentences:
555
556{}
557
558Theme:"#,
559 total,
560 sample.join("\n")
561 );
562
563 match ollama_client.generate(&prompt).await {
564 Ok(summary) => Ok(summary.trim().to_string()),
565 Err(e) => {
566 #[cfg(feature = "tracing")]
567 tracing::warn!(
568 error = %e,
569 cluster_id = %cluster.cluster_id,
570 "Failed to generate cluster summary, using fallback"
571 );
572
573 Ok(format!("Cluster of {} relationships", total))
574 },
575 }
576 }
577
578 fn calculate_cohesion(
587 &self,
588 cluster: &RelationshipCluster,
589 rel_graph: &petgraph::Graph<usize, f32>,
590 ) -> f32 {
591 let size = cluster.size();
592
593 if size == 0 {
595 return 0.0;
596 }
597 if size == 1 {
598 return 1.0; }
600
601 let mut internal_edges = 0;
604
605 let cluster_rel_ids: std::collections::HashSet<_> =
607 cluster.relationship_ids.iter().cloned().collect();
608
609 for node_idx in rel_graph.node_indices() {
611 if let Some(&rel_idx) = rel_graph.node_weight(node_idx) {
612 if let Some(rel) = self.relationships.get(rel_idx) {
614 let rel_id = format!("{}_{}_{}", rel.source.0, rel.target.0, rel.relation_type);
615
616 if cluster_rel_ids.contains(&rel_id) {
617 for edge in rel_graph.edges(node_idx) {
619 if let Some(&target_rel_idx) = rel_graph.node_weight(edge.target()) {
620 if let Some(target_rel) = self.relationships.get(target_rel_idx) {
621 let target_rel_id = format!(
622 "{}_{}_{}",
623 target_rel.source.0,
624 target_rel.target.0,
625 target_rel.relation_type
626 );
627
628 if cluster_rel_ids.contains(&target_rel_id) {
629 internal_edges += 1;
630 }
631 }
632 }
633 }
634 }
635 }
636 }
637 }
638
639 internal_edges /= 2;
641
642 let max_possible_edges = (size * (size - 1)) / 2;
644
645 if max_possible_edges == 0 {
646 return (size as f32 / (size as f32 + 10.0)).min(1.0);
648 }
649
650 let density = internal_edges as f32 / max_possible_edges as f32;
652
653 0.2 + (density * 0.6)
656 }
657}
658
659#[cfg(test)]
660mod tests {
661 use super::*;
662 use crate::core::{Entity, EntityId};
663
664 fn create_test_graph() -> KnowledgeGraph {
665 let mut graph = KnowledgeGraph::new();
666
667 for i in 0..5 {
669 let entity = Entity::new(
670 EntityId::new(format!("entity{}", i)),
671 format!("Entity {}", i),
672 "CONCEPT".to_string(),
673 0.9,
674 );
675 graph.add_entity(entity).unwrap();
676 }
677
678 let relationships = vec![
680 ("entity0", "entity1", "RELATED_TO"),
681 ("entity0", "entity2", "RELATED_TO"),
682 ("entity1", "entity2", "RELATED_TO"),
683 ("entity3", "entity4", "CAUSED"),
684 ("entity3", "entity4", "ENABLED"),
685 ];
686
687 for (src, tgt, rel_type) in relationships {
688 let rel = Relationship::new(
689 EntityId::new(src.to_string()),
690 EntityId::new(tgt.to_string()),
691 rel_type.to_string(),
692 0.8,
693 );
694 graph.add_relationship(rel).unwrap();
695 }
696
697 graph
698 }
699
700 #[test]
701 fn test_relationship_cluster_creation() {
702 let mut cluster = RelationshipCluster::new("test_cluster".to_string());
703 cluster.add_relationship("rel1".to_string());
704 cluster.add_relationship("rel2".to_string());
705
706 assert_eq!(cluster.size(), 2);
707 assert!(!cluster.is_empty());
708 assert_eq!(cluster.cluster_id, "test_cluster");
709 }
710
711 #[test]
712 fn test_hierarchy_level_creation() {
713 let mut level = HierarchyLevel::new(0, 1.0);
714 let cluster = RelationshipCluster::new("cluster1".to_string());
715 level.add_cluster(cluster);
716
717 assert_eq!(level.level_id, 0);
718 assert_eq!(level.clusters.len(), 1);
719 }
720
721 #[test]
722 fn test_relationship_hierarchy_structure() {
723 let mut hierarchy = RelationshipHierarchy::new();
724
725 let mut level0 = HierarchyLevel::new(0, 1.0);
726 let mut cluster = RelationshipCluster::new("L0C0".to_string());
727 cluster.add_relationship("rel1".to_string());
728 level0.add_cluster(cluster);
729
730 hierarchy.add_level(level0);
731
732 assert_eq!(hierarchy.num_levels(), 1);
733 assert!(hierarchy.get_level(0).is_some());
734 }
735
736 #[test]
737 fn test_hierarchy_builder_initialization() {
738 let graph = create_test_graph();
739 let builder = HierarchyBuilder::from_graph(&graph)
740 .with_num_levels(3)
741 .with_min_cluster_size(2);
742
743 assert_eq!(builder.num_levels, 3);
744 assert_eq!(builder.min_cluster_size, 2);
745 }
746
747 #[test]
748 fn test_cohesion_calculation() {
749 let rel0 = Relationship::new(
753 EntityId::new("e0".to_string()),
754 EntityId::new("e1".to_string()),
755 "REL".to_string(),
756 0.8,
757 );
758 let rel1 = Relationship::new(
759 EntityId::new("e1".to_string()),
760 EntityId::new("e2".to_string()),
761 "REL".to_string(),
762 0.8,
763 );
764 let rel2 = Relationship::new(
765 EntityId::new("e2".to_string()),
766 EntityId::new("e0".to_string()),
767 "REL".to_string(),
768 0.8,
769 );
770
771 let relationships = vec![rel0.clone(), rel1.clone(), rel2.clone()];
772 let builder = HierarchyBuilder::new(relationships);
773
774 let mut rel_graph = petgraph::Graph::<usize, f32>::new();
776 let n0 = rel_graph.add_node(0); let n1 = rel_graph.add_node(1); let n2 = rel_graph.add_node(2); rel_graph.add_edge(n0, n1, 1.0);
782 rel_graph.add_edge(n1, n0, 1.0);
783 rel_graph.add_edge(n1, n2, 1.0);
784 rel_graph.add_edge(n2, n1, 1.0);
785 rel_graph.add_edge(n2, n0, 1.0);
786 rel_graph.add_edge(n0, n2, 1.0);
787
788 let mut cluster = RelationshipCluster::new("test_cluster".to_string());
790 cluster.add_relationship("e0_e1_REL".to_string());
791 cluster.add_relationship("e1_e2_REL".to_string());
792 cluster.add_relationship("e2_e0_REL".to_string());
793
794 let cohesion = builder.calculate_cohesion(&cluster, &rel_graph);
795
796 assert!(
799 cohesion > 0.5,
800 "Expected high cohesion for fully connected cluster, got {}",
801 cohesion
802 );
803 }
804
805 #[test]
806 fn test_cohesion_single_relationship() {
807 let rel_graph = petgraph::Graph::<usize, f32>::new();
808
809 let rel0 = Relationship::new(
810 EntityId::new("e0".to_string()),
811 EntityId::new("e1".to_string()),
812 "REL".to_string(),
813 0.8,
814 );
815
816 let relationships = vec![rel0];
817 let builder = HierarchyBuilder::new(relationships);
818
819 let mut cluster = RelationshipCluster::new("test_cluster".to_string());
820 cluster.add_relationship("e0_e1_REL".to_string());
821
822 let cohesion = builder.calculate_cohesion(&cluster, &rel_graph);
823
824 assert_eq!(
826 cohesion, 1.0,
827 "Single relationship should have cohesion 1.0"
828 );
829 }
830
831 #[test]
832 #[cfg(feature = "leiden")]
833 fn test_leiden_integration() {
834 use petgraph::Graph;
838
839 let rel0 = Relationship::new(
840 EntityId::new("A".to_string()),
841 EntityId::new("B".to_string()),
842 "CONNECTED".to_string(),
843 0.9,
844 );
845 let rel1 = Relationship::new(
846 EntityId::new("B".to_string()),
847 EntityId::new("C".to_string()),
848 "CONNECTED".to_string(),
849 0.9,
850 );
851 let rel2 = Relationship::new(
852 EntityId::new("C".to_string()),
853 EntityId::new("A".to_string()),
854 "CONNECTED".to_string(),
855 0.9,
856 );
857 let rel3 = Relationship::new(
859 EntityId::new("X".to_string()),
860 EntityId::new("Y".to_string()),
861 "CONNECTED".to_string(),
862 0.9,
863 );
864 let rel4 = Relationship::new(
865 EntityId::new("Y".to_string()),
866 EntityId::new("Z".to_string()),
867 "CONNECTED".to_string(),
868 0.9,
869 );
870
871 let relationships = vec![
872 rel0.clone(),
873 rel1.clone(),
874 rel2.clone(),
875 rel3.clone(),
876 rel4.clone(),
877 ];
878 let builder = HierarchyBuilder::new(relationships);
879
880 let mut rel_graph = Graph::<usize, f32>::new();
884
885 let n0 = rel_graph.add_node(0); let n1 = rel_graph.add_node(1); let n2 = rel_graph.add_node(2); let n3 = rel_graph.add_node(3); let n4 = rel_graph.add_node(4); rel_graph.add_edge(n0, n1, 1.0); rel_graph.add_edge(n1, n2, 1.0); rel_graph.add_edge(n2, n0, 1.0); rel_graph.add_edge(n3, n4, 1.0); let communities = builder.cluster_relationships(&rel_graph, 1.0).unwrap();
902
903 assert!(
905 communities.len() >= 2,
906 "Leiden should detect at least 2 communities, found {}",
907 communities.len()
908 );
909
910 let community_sizes: Vec<usize> = communities.values().map(|c| c.len()).collect();
913 assert!(
914 community_sizes.contains(&3) || community_sizes.contains(&2),
915 "Expected communities of size 3 or 2, got {:?}",
916 community_sizes
917 );
918 }
919}