1use std::collections::HashMap;
44
45mod graph_summarizer_impl {
48 use crate::graph::community::{CommunityConfig, CommunityDetector};
49 use crate::Triple;
50 use rayon::prelude::*;
51 use std::collections::HashMap;
52
53 #[derive(Debug, Clone)]
55 pub struct GraphSummary {
56 pub entities: Vec<String>,
58 pub relations: Vec<(String, String, String)>,
60 pub community_labels: Vec<String>,
62 }
63
64 impl GraphSummary {
65 pub fn to_text(&self) -> String {
67 if self.entities.is_empty() {
68 return "The graph is empty.".to_string();
69 }
70 let n_comm = self.community_labels.len();
71 let n_ent = self.entities.len();
72 let entity_list = self
73 .entities
74 .iter()
75 .take(5)
76 .cloned()
77 .collect::<Vec<_>>()
78 .join(", ");
79 let mut seen_preds: std::collections::HashSet<String> =
81 std::collections::HashSet::new();
82 let mut pred_list: Vec<String> = Vec::new();
83 for (_, p, _) in &self.relations {
84 if seen_preds.insert(p.clone()) {
85 pred_list.push(p.clone());
86 if pred_list.len() >= 5 {
87 break;
88 }
89 }
90 }
91 format!(
92 "The graph contains {} {} across {} {}. \
93 Key entities include: {}. \
94 Primary relationships: {}.",
95 n_ent,
96 if n_ent == 1 { "entity" } else { "entities" },
97 n_comm,
98 if n_comm == 1 {
99 "community"
100 } else {
101 "communities"
102 },
103 entity_list,
104 if pred_list.is_empty() {
105 "none".to_string()
106 } else {
107 pred_list.join(", ")
108 },
109 )
110 }
111 }
112
113 pub struct GraphSummarizer {
122 pub max_nodes: usize,
123 pub max_triples: usize,
124 }
125
126 impl GraphSummarizer {
127 pub fn new(max_nodes: usize, max_triples: usize) -> Self {
128 Self {
129 max_nodes,
130 max_triples,
131 }
132 }
133
134 pub fn summarize(&self, triples: &[(String, String, String)]) -> GraphSummary {
136 if triples.is_empty() {
137 return GraphSummary {
138 entities: Vec::new(),
139 relations: Vec::new(),
140 community_labels: Vec::new(),
141 };
142 }
143
144 let core_triples: Vec<Triple> = triples
146 .iter()
147 .map(|(s, p, o)| Triple::new(s.clone(), p.clone(), o.clone()))
148 .collect();
149
150 let mut in_degree: HashMap<String, usize> = HashMap::new();
152 for (s, _, o) in triples {
153 in_degree.entry(s.clone()).or_insert(0);
155 *in_degree.entry(o.clone()).or_insert(0) += 1;
156 }
157
158 let config = CommunityConfig {
160 min_community_size: 1,
161 ..CommunityConfig::default()
162 };
163 let detector = CommunityDetector::new(config);
164 let communities = detector.detect(&core_triples).unwrap_or_default();
165
166 let representatives: Vec<(String, String)> = communities
169 .par_iter()
170 .enumerate()
171 .filter_map(|(idx, comm)| {
172 let rep = comm
173 .entities
174 .iter()
175 .max_by_key(|e| in_degree.get(e.as_str()).copied().unwrap_or(0))
176 .cloned()?;
177 let label = format!("Community {} ({} entities)", idx, comm.entities.len());
178 Some((rep, label))
179 })
180 .collect();
181
182 let mut pred_freq: HashMap<&str, usize> = HashMap::new();
184 for (_, p, _) in triples {
185 *pred_freq.entry(p.as_str()).or_insert(0) += 1;
186 }
187 let mut pred_ranked: Vec<(&str, usize)> = pred_freq.into_iter().collect();
188 pred_ranked.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(b.0)));
189 let top_predicates: std::collections::HashSet<&str> =
190 pred_ranked.iter().take(10).map(|(p, _)| *p).collect();
191
192 let entities: Vec<String> = representatives
194 .iter()
195 .map(|(rep, _)| rep.clone())
196 .take(self.max_nodes)
197 .collect();
198
199 let rep_set: std::collections::HashSet<&str> =
200 entities.iter().map(|s| s.as_str()).collect();
201
202 let mut selected: Vec<(String, String, String)> = triples
204 .iter()
205 .filter(|(s, p, _)| {
206 rep_set.contains(s.as_str()) && top_predicates.contains(p.as_str())
207 })
208 .map(|(s, p, o)| (s.clone(), p.clone(), o.clone()))
209 .take(self.max_triples)
210 .collect();
211
212 if selected.len() < self.max_triples {
214 for (s, p, o) in triples {
215 if selected.len() >= self.max_triples {
216 break;
217 }
218 if rep_set.contains(s.as_str()) {
219 let candidate = (s.clone(), p.clone(), o.clone());
220 if !selected.contains(&candidate) {
221 selected.push(candidate);
222 }
223 }
224 }
225 }
226
227 let community_labels: Vec<String> = representatives
228 .iter()
229 .take(self.max_nodes)
230 .map(|(_, label)| label.clone())
231 .collect();
232
233 GraphSummary {
234 entities,
235 relations: selected,
236 community_labels,
237 }
238 }
239 }
240} pub use graph_summarizer_impl::{GraphSummarizer, GraphSummary};
243
244#[derive(Debug, Clone)]
248pub struct KgNode {
249 pub id: String,
251 pub label: String,
253 pub node_type: String,
255 pub properties: HashMap<String, String>,
257}
258
259impl KgNode {
260 pub fn simple(
262 id: impl Into<String>,
263 label: impl Into<String>,
264 node_type: impl Into<String>,
265 ) -> Self {
266 Self {
267 id: id.into(),
268 label: label.into(),
269 node_type: node_type.into(),
270 properties: HashMap::new(),
271 }
272 }
273}
274
275#[derive(Debug, Clone)]
277pub struct KgEdge {
278 pub source: String,
280 pub target: String,
282 pub relation: String,
284 pub weight: f64,
286}
287
288impl KgEdge {
289 pub fn unweighted(
291 source: impl Into<String>,
292 target: impl Into<String>,
293 relation: impl Into<String>,
294 ) -> Self {
295 Self {
296 source: source.into(),
297 target: target.into(),
298 relation: relation.into(),
299 weight: 1.0,
300 }
301 }
302}
303
304#[derive(Debug, Clone, Default)]
308pub struct KgSubgraph {
309 pub nodes: Vec<KgNode>,
311 pub edges: Vec<KgEdge>,
313}
314
315impl KgSubgraph {
316 pub fn new() -> Self {
318 Self::default()
319 }
320
321 pub fn add_node(&mut self, node: KgNode) {
323 self.nodes.push(node);
324 }
325
326 pub fn add_edge(&mut self, edge: KgEdge) {
328 self.edges.push(edge);
329 }
330
331 pub fn node_count(&self) -> usize {
333 self.nodes.len()
334 }
335
336 pub fn edge_count(&self) -> usize {
338 self.edges.len()
339 }
340
341 pub fn node(&self, id: &str) -> Option<&KgNode> {
343 self.nodes.iter().find(|n| n.id == id)
344 }
345}
346
347#[derive(Debug, Clone)]
351pub struct SummaryCluster {
352 pub id: usize,
354 pub representative_node: String,
356 pub member_nodes: Vec<String>,
358 pub internal_edges: usize,
360 pub summary_label: String,
362}
363
364impl SummaryCluster {
365 pub fn size(&self) -> usize {
367 self.member_nodes.len()
368 }
369}
370
371pub struct SubgraphSummarizer;
375
376impl SubgraphSummarizer {
377 pub fn new() -> Self {
379 Self
380 }
381
382 pub fn summarize(&self, graph: &KgSubgraph, max_clusters: usize) -> Vec<SummaryCluster> {
393 if graph.nodes.is_empty() || max_clusters == 0 {
394 return Vec::new();
395 }
396
397 let mut type_groups: HashMap<String, Vec<String>> = HashMap::new();
399 for node in &graph.nodes {
400 type_groups
401 .entry(node.node_type.clone())
402 .or_default()
403 .push(node.id.clone());
404 }
405
406 let mut groups: Vec<(String, Vec<String>)> = type_groups.into_iter().collect();
408 groups.sort_by(|a, b| a.0.cmp(&b.0));
409
410 let groups = if groups.len() > max_clusters {
412 let (keep, overflow) = groups.split_at(max_clusters - 1);
413 let mut merged = keep.to_vec();
414 let other_members: Vec<String> = overflow
415 .iter()
416 .flat_map(|(_, ids)| ids.iter().cloned())
417 .collect();
418 if !other_members.is_empty() {
419 merged.push(("Other".to_string(), other_members));
420 }
421 merged
422 } else {
423 groups
424 };
425
426 let degree_map = build_degree_map(graph);
428
429 groups
431 .into_iter()
432 .enumerate()
433 .map(|(cluster_id, (node_type, members))| {
434 let representative = members
436 .iter()
437 .max_by_key(|id| degree_map.get(*id).copied().unwrap_or(0))
438 .cloned()
439 .unwrap_or_default();
440
441 let member_set: std::collections::HashSet<&str> =
443 members.iter().map(|s| s.as_str()).collect();
444 let internal_edges = graph
445 .edges
446 .iter()
447 .filter(|e| {
448 member_set.contains(e.source.as_str())
449 && member_set.contains(e.target.as_str())
450 })
451 .count();
452
453 SummaryCluster {
454 id: cluster_id,
455 representative_node: representative,
456 member_nodes: members,
457 internal_edges,
458 summary_label: format!("{node_type} cluster"),
459 }
460 })
461 .collect()
462 }
463
464 pub fn extract_key_relations(&self, graph: &KgSubgraph, top_n: usize) -> Vec<(String, usize)> {
468 if top_n == 0 {
469 return Vec::new();
470 }
471 let mut counts: HashMap<String, usize> = HashMap::new();
472 for edge in &graph.edges {
473 *counts.entry(edge.relation.clone()).or_insert(0) += 1;
474 }
475 let mut sorted: Vec<(String, usize)> = counts.into_iter().collect();
476 sorted.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
477 sorted.truncate(top_n);
478 sorted
479 }
480
481 pub fn node_degree(&self, graph: &KgSubgraph, node_id: &str) -> usize {
485 graph
486 .edges
487 .iter()
488 .filter(|e| e.source == node_id || e.target == node_id)
489 .count()
490 }
491
492 pub fn generate_text_summary(&self, clusters: &[SummaryCluster]) -> String {
497 if clusters.is_empty() {
498 return "The subgraph contains no identifiable clusters.".to_string();
499 }
500
501 let mut parts: Vec<String> = Vec::new();
502 for cluster in clusters {
503 parts.push(format!(
504 "The {} (representative: {}, {} members, {} internal edges)",
505 cluster.summary_label,
506 cluster.representative_node,
507 cluster.member_nodes.len(),
508 cluster.internal_edges,
509 ));
510 }
511
512 format!(
513 "{}. The subgraph contains {} cluster{}.",
514 parts.join(". "),
515 clusters.len(),
516 if clusters.len() == 1 { "" } else { "s" }
517 )
518 }
519}
520
521impl Default for SubgraphSummarizer {
522 fn default() -> Self {
523 Self::new()
524 }
525}
526
527fn build_degree_map(graph: &KgSubgraph) -> HashMap<String, usize> {
531 let mut map: HashMap<String, usize> = HashMap::new();
532 for edge in &graph.edges {
533 *map.entry(edge.source.clone()).or_insert(0) += 1;
534 if edge.source != edge.target {
535 *map.entry(edge.target.clone()).or_insert(0) += 1;
536 }
537 }
538 map
539}
540
541#[cfg(test)]
544mod tests {
545 use super::*;
546
547 fn node(id: &str, node_type: &str) -> KgNode {
550 KgNode::simple(id, id, node_type)
551 }
552
553 fn edge(src: &str, tgt: &str, rel: &str) -> KgEdge {
554 KgEdge::unweighted(src, tgt, rel)
555 }
556
557 fn make_graph_with_types(specs: &[(&str, &str)], edges: &[(&str, &str, &str)]) -> KgSubgraph {
558 let mut g = KgSubgraph::new();
559 for (id, typ) in specs {
560 g.add_node(node(id, typ));
561 }
562 for (s, t, r) in edges {
563 g.add_edge(edge(s, t, r));
564 }
565 g
566 }
567
568 #[test]
571 fn test_new_subgraph_empty() {
572 let g = KgSubgraph::new();
573 assert_eq!(g.node_count(), 0);
574 assert_eq!(g.edge_count(), 0);
575 }
576
577 #[test]
578 fn test_add_node_increments_count() {
579 let mut g = KgSubgraph::new();
580 g.add_node(node("n1", "Person"));
581 assert_eq!(g.node_count(), 1);
582 }
583
584 #[test]
585 fn test_add_edge_increments_count() {
586 let mut g = KgSubgraph::new();
587 g.add_edge(edge("a", "b", "knows"));
588 assert_eq!(g.edge_count(), 1);
589 }
590
591 #[test]
592 fn test_node_lookup_found() {
593 let mut g = KgSubgraph::new();
594 g.add_node(node("alice", "Person"));
595 assert!(g.node("alice").is_some());
596 assert_eq!(g.node("alice").expect("node").node_type, "Person");
597 }
598
599 #[test]
600 fn test_node_lookup_missing() {
601 let g = KgSubgraph::new();
602 assert!(g.node("nope").is_none());
603 }
604
605 #[test]
608 fn test_kg_node_simple() {
609 let n = KgNode::simple("id1", "Label", "Type");
610 assert_eq!(n.id, "id1");
611 assert_eq!(n.label, "Label");
612 assert_eq!(n.node_type, "Type");
613 assert!(n.properties.is_empty());
614 }
615
616 #[test]
619 fn test_kg_edge_unweighted_weight_one() {
620 let e = KgEdge::unweighted("a", "b", "rel");
621 assert_eq!(e.weight, 1.0);
622 }
623
624 #[test]
627 fn test_summarize_empty_graph() {
628 let g = KgSubgraph::new();
629 let s = SubgraphSummarizer::new();
630 assert!(s.summarize(&g, 10).is_empty());
631 }
632
633 #[test]
634 fn test_summarize_max_clusters_zero() {
635 let g = make_graph_with_types(&[("n1", "Person")], &[]);
636 let s = SubgraphSummarizer::new();
637 assert!(s.summarize(&g, 0).is_empty());
638 }
639
640 #[test]
643 fn test_summarize_single_type_one_cluster() {
644 let g = make_graph_with_types(&[("a", "Person"), ("b", "Person"), ("c", "Person")], &[]);
645 let s = SubgraphSummarizer::new();
646 let clusters = s.summarize(&g, 5);
647 assert_eq!(clusters.len(), 1);
648 assert_eq!(clusters[0].member_nodes.len(), 3);
649 }
650
651 #[test]
652 fn test_summarize_cluster_label_contains_type() {
653 let g = make_graph_with_types(&[("a", "Organization")], &[]);
654 let s = SubgraphSummarizer::new();
655 let clusters = s.summarize(&g, 5);
656 assert!(
657 clusters[0].summary_label.contains("Organization"),
658 "label should mention the type: {}",
659 clusters[0].summary_label
660 );
661 }
662
663 #[test]
666 fn test_summarize_two_types_two_clusters() {
667 let g = make_graph_with_types(&[("a", "Person"), ("b", "Person"), ("c", "Company")], &[]);
668 let s = SubgraphSummarizer::new();
669 let clusters = s.summarize(&g, 10);
670 assert_eq!(clusters.len(), 2);
671 }
672
673 #[test]
674 fn test_summarize_respects_max_clusters() {
675 let g = make_graph_with_types(&[("a", "T1"), ("b", "T2"), ("c", "T3"), ("d", "T4")], &[]);
676 let s = SubgraphSummarizer::new();
677 let clusters = s.summarize(&g, 2);
678 assert!(
679 clusters.len() <= 2,
680 "should have at most 2 clusters, got {}",
681 clusters.len()
682 );
683 }
684
685 #[test]
686 fn test_summarize_overflow_goes_to_other() {
687 let g = make_graph_with_types(
688 &[
689 ("a", "T1"),
690 ("b", "T2"),
691 ("c", "T3"),
692 ("d", "T4"),
693 ("e", "T5"),
694 ],
695 &[],
696 );
697 let s = SubgraphSummarizer::new();
698 let clusters = s.summarize(&g, 3);
699 assert_eq!(clusters.len(), 3);
701 let has_other = clusters.iter().any(|c| c.summary_label.contains("Other"));
702 assert!(has_other, "overflow should be merged into Other cluster");
703 }
704
705 #[test]
708 fn test_representative_is_most_connected() {
709 let g = make_graph_with_types(
711 &[("a", "Person"), ("b", "Person"), ("c", "Person")],
712 &[("a", "b", "knows"), ("b", "c", "knows")],
713 );
714 let s = SubgraphSummarizer::new();
715 let clusters = s.summarize(&g, 5);
716 assert_eq!(clusters.len(), 1);
717 assert_eq!(clusters[0].representative_node, "b");
718 }
719
720 #[test]
721 fn test_representative_exists_for_single_node_cluster() {
722 let g = make_graph_with_types(&[("solo", "Category")], &[]);
723 let s = SubgraphSummarizer::new();
724 let clusters = s.summarize(&g, 5);
725 assert_eq!(clusters[0].representative_node, "solo");
726 }
727
728 #[test]
731 fn test_internal_edges_all_within_cluster() {
732 let g = make_graph_with_types(
733 &[("a", "T"), ("b", "T"), ("c", "T")],
734 &[("a", "b", "r"), ("b", "c", "r")],
735 );
736 let s = SubgraphSummarizer::new();
737 let clusters = s.summarize(&g, 5);
738 assert_eq!(clusters[0].internal_edges, 2);
739 }
740
741 #[test]
742 fn test_internal_edges_none_cross_cluster() {
743 let g = make_graph_with_types(
745 &[("a", "T1"), ("b", "T1"), ("c", "T2")],
746 &[("a", "c", "cross"), ("b", "c", "cross")],
747 );
748 let s = SubgraphSummarizer::new();
749 let clusters = s.summarize(&g, 5);
750 for cluster in &clusters {
751 if cluster.summary_label.contains("T1") {
752 assert_eq!(cluster.internal_edges, 0);
753 }
754 }
755 }
756
757 #[test]
760 fn test_cluster_ids_are_sequential() {
761 let g = make_graph_with_types(&[("a", "T1"), ("b", "T2"), ("c", "T3")], &[]);
762 let s = SubgraphSummarizer::new();
763 let clusters = s.summarize(&g, 10);
764 for (i, c) in clusters.iter().enumerate() {
765 assert_eq!(c.id, i);
766 }
767 }
768
769 #[test]
772 fn test_key_relations_empty_graph() {
773 let g = KgSubgraph::new();
774 let s = SubgraphSummarizer::new();
775 assert!(s.extract_key_relations(&g, 5).is_empty());
776 }
777
778 #[test]
779 fn test_key_relations_top_n_zero() {
780 let mut g = KgSubgraph::new();
781 g.add_edge(edge("a", "b", "knows"));
782 let s = SubgraphSummarizer::new();
783 assert!(s.extract_key_relations(&g, 0).is_empty());
784 }
785
786 #[test]
787 fn test_key_relations_sorted_by_frequency() {
788 let g = make_graph_with_types(
789 &[("a", "T"), ("b", "T"), ("c", "T")],
790 &[
791 ("a", "b", "knows"),
792 ("b", "c", "knows"),
793 ("a", "c", "likes"),
794 ],
795 );
796 let s = SubgraphSummarizer::new();
797 let relations = s.extract_key_relations(&g, 5);
798 assert!(!relations.is_empty());
799 assert_eq!(relations[0].0, "knows");
800 assert_eq!(relations[0].1, 2);
801 }
802
803 #[test]
804 fn test_key_relations_truncated_to_top_n() {
805 let g = make_graph_with_types(
806 &[("a", "T"), ("b", "T"), ("c", "T")],
807 &[
808 ("a", "b", "r1"),
809 ("a", "b", "r2"),
810 ("a", "b", "r3"),
811 ("a", "b", "r4"),
812 ("a", "b", "r5"),
813 ],
814 );
815 let s = SubgraphSummarizer::new();
816 let relations = s.extract_key_relations(&g, 3);
817 assert!(relations.len() <= 3);
818 }
819
820 #[test]
821 fn test_key_relations_single_relation() {
822 let mut g = KgSubgraph::new();
823 for i in 0..5 {
824 g.add_edge(edge(&format!("n{i}"), &format!("n{}", i + 1), "knows"));
825 }
826 let s = SubgraphSummarizer::new();
827 let rels = s.extract_key_relations(&g, 1);
828 assert_eq!(rels.len(), 1);
829 assert_eq!(rels[0].0, "knows");
830 assert_eq!(rels[0].1, 5);
831 }
832
833 #[test]
836 fn test_node_degree_no_edges() {
837 let g = make_graph_with_types(&[("a", "T")], &[]);
838 let s = SubgraphSummarizer::new();
839 assert_eq!(s.node_degree(&g, "a"), 0);
840 }
841
842 #[test]
843 fn test_node_degree_outgoing_only() {
844 let g = make_graph_with_types(&[("a", "T"), ("b", "T")], &[("a", "b", "r")]);
845 let s = SubgraphSummarizer::new();
846 assert_eq!(s.node_degree(&g, "a"), 1);
847 assert_eq!(s.node_degree(&g, "b"), 1);
848 }
849
850 #[test]
851 fn test_node_degree_multiple_edges() {
852 let g = make_graph_with_types(
853 &[("a", "T"), ("b", "T"), ("c", "T")],
854 &[("a", "b", "r"), ("a", "c", "r"), ("b", "a", "r")],
855 );
856 let s = SubgraphSummarizer::new();
857 assert_eq!(s.node_degree(&g, "a"), 3);
859 }
860
861 #[test]
862 fn test_node_degree_missing_node() {
863 let g = KgSubgraph::new();
864 let s = SubgraphSummarizer::new();
865 assert_eq!(s.node_degree(&g, "ghost"), 0);
866 }
867
868 #[test]
871 fn test_text_summary_empty_clusters() {
872 let s = SubgraphSummarizer::new();
873 let text = s.generate_text_summary(&[]);
874 assert!(text.contains("no identifiable clusters"), "text: {text}");
875 }
876
877 #[test]
878 fn test_text_summary_single_cluster() {
879 let clusters = vec![SummaryCluster {
880 id: 0,
881 representative_node: "alice".to_string(),
882 member_nodes: vec!["alice".to_string(), "bob".to_string()],
883 internal_edges: 1,
884 summary_label: "Person cluster".to_string(),
885 }];
886 let s = SubgraphSummarizer::new();
887 let text = s.generate_text_summary(&clusters);
888 assert!(text.contains("alice"), "text: {text}");
889 assert!(text.contains("Person cluster"), "text: {text}");
890 assert!(text.contains("1 cluster"), "text: {text}");
891 }
892
893 #[test]
894 fn test_text_summary_multiple_clusters() {
895 let clusters = vec![
896 SummaryCluster {
897 id: 0,
898 representative_node: "a".to_string(),
899 member_nodes: vec!["a".to_string()],
900 internal_edges: 0,
901 summary_label: "Person cluster".to_string(),
902 },
903 SummaryCluster {
904 id: 1,
905 representative_node: "c".to_string(),
906 member_nodes: vec!["c".to_string(), "d".to_string()],
907 internal_edges: 1,
908 summary_label: "Company cluster".to_string(),
909 },
910 ];
911 let s = SubgraphSummarizer::new();
912 let text = s.generate_text_summary(&clusters);
913 assert!(text.contains("2 clusters"), "text: {text}");
914 assert!(text.contains("Person cluster"), "text: {text}");
915 assert!(text.contains("Company cluster"), "text: {text}");
916 }
917
918 #[test]
919 fn test_text_summary_contains_member_count() {
920 let clusters = vec![SummaryCluster {
921 id: 0,
922 representative_node: "x".to_string(),
923 member_nodes: vec!["x".to_string(), "y".to_string(), "z".to_string()],
924 internal_edges: 2,
925 summary_label: "T cluster".to_string(),
926 }];
927 let s = SubgraphSummarizer::new();
928 let text = s.generate_text_summary(&clusters);
929 assert!(text.contains("3 members"), "text: {text}");
930 }
931
932 #[test]
933 fn test_text_summary_contains_internal_edge_count() {
934 let clusters = vec![SummaryCluster {
935 id: 0,
936 representative_node: "x".to_string(),
937 member_nodes: vec!["x".to_string()],
938 internal_edges: 7,
939 summary_label: "T cluster".to_string(),
940 }];
941 let s = SubgraphSummarizer::new();
942 let text = s.generate_text_summary(&clusters);
943 assert!(text.contains("7 internal edges"), "text: {text}");
944 }
945
946 #[test]
949 fn test_cluster_size() {
950 let c = SummaryCluster {
951 id: 0,
952 representative_node: "r".to_string(),
953 member_nodes: vec!["a".to_string(), "b".to_string(), "c".to_string()],
954 internal_edges: 0,
955 summary_label: "X cluster".to_string(),
956 };
957 assert_eq!(c.size(), 3);
958 }
959
960 #[test]
963 fn test_default_summarizer() {
964 let s = SubgraphSummarizer;
965 let g = KgSubgraph::new();
966 assert!(s.summarize(&g, 5).is_empty());
967 }
968
969 #[test]
970 fn test_default_subgraph() {
971 let g = KgSubgraph::default();
972 assert_eq!(g.node_count(), 0);
973 }
974}
975
976#[cfg(test)]
979mod graph_summarizer_tests {
980 use super::GraphSummarizer;
981
982 fn sample_triples() -> Vec<(String, String, String)> {
984 vec![
985 ("Alice".into(), "knows".into(), "Bob".into()),
986 ("Bob".into(), "knows".into(), "Carol".into()),
987 ("Carol".into(), "knows".into(), "Alice".into()),
988 ("Alice".into(), "worksAt".into(), "ACME".into()),
989 ("Bob".into(), "worksAt".into(), "ACME".into()),
990 ("ACME".into(), "locatedIn".into(), "Berlin".into()),
991 ("Dave".into(), "knows".into(), "Eve".into()),
992 ("Eve".into(), "knows".into(), "Frank".into()),
993 ("Dave".into(), "worksAt".into(), "WidgetCo".into()),
994 ("WidgetCo".into(), "locatedIn".into(), "Paris".into()),
995 ]
996 }
997
998 #[test]
999 fn test_summary_respects_max_nodes() {
1000 let summarizer = GraphSummarizer::new(3, 20);
1001 let triples = sample_triples();
1002 let summary = summarizer.summarize(&triples);
1003 assert!(
1004 summary.entities.len() <= 3,
1005 "expected ≤3 entities, got {}",
1006 summary.entities.len()
1007 );
1008 }
1009
1010 #[test]
1011 fn test_summary_respects_max_triples() {
1012 let summarizer = GraphSummarizer::new(10, 4);
1013 let triples = sample_triples();
1014 let summary = summarizer.summarize(&triples);
1015 assert!(
1016 summary.relations.len() <= 4,
1017 "expected ≤4 relations, got {}",
1018 summary.relations.len()
1019 );
1020 }
1021
1022 #[test]
1023 fn test_to_text_non_empty_on_non_empty_graph() {
1024 let summarizer = GraphSummarizer::new(10, 20);
1025 let triples = sample_triples();
1026 let summary = summarizer.summarize(&triples);
1027 let text = summary.to_text();
1028 assert!(!text.is_empty(), "to_text() should not be empty");
1029 }
1030
1031 #[test]
1032 fn test_empty_graph_returns_empty_summary() {
1033 let summarizer = GraphSummarizer::new(10, 20);
1034 let summary = summarizer.summarize(&[]);
1035 assert!(
1036 summary.entities.is_empty(),
1037 "empty graph: entities should be empty"
1038 );
1039 assert!(
1040 summary.relations.is_empty(),
1041 "empty graph: relations should be empty"
1042 );
1043 }
1044
1045 #[test]
1046 fn test_community_labels_present_when_graph_has_nodes() {
1047 let summarizer = GraphSummarizer::new(10, 20);
1048 let triples = sample_triples();
1049 let summary = summarizer.summarize(&triples);
1050 assert!(
1051 !summary.community_labels.is_empty(),
1052 "community_labels should be non-empty when graph has nodes"
1053 );
1054 }
1055
1056 #[test]
1057 fn test_predicate_frequency_ordering() {
1058 let summarizer = GraphSummarizer::new(10, 20);
1061 let triples = sample_triples();
1062 let summary = summarizer.summarize(&triples);
1063 if let Some((_, p, _)) = summary.relations.first() {
1065 assert_eq!(
1066 p.as_str(),
1067 "knows",
1068 "first predicate in relations should be 'knows' (most frequent), got '{p}'"
1069 );
1070 }
1071 }
1073}