1use std::collections::HashMap;
44
45#[derive(Debug, Clone)]
49pub struct KgNode {
50 pub id: String,
52 pub label: String,
54 pub node_type: String,
56 pub properties: HashMap<String, String>,
58}
59
60impl KgNode {
61 pub fn simple(
63 id: impl Into<String>,
64 label: impl Into<String>,
65 node_type: impl Into<String>,
66 ) -> Self {
67 Self {
68 id: id.into(),
69 label: label.into(),
70 node_type: node_type.into(),
71 properties: HashMap::new(),
72 }
73 }
74}
75
76#[derive(Debug, Clone)]
78pub struct KgEdge {
79 pub source: String,
81 pub target: String,
83 pub relation: String,
85 pub weight: f64,
87}
88
89impl KgEdge {
90 pub fn unweighted(
92 source: impl Into<String>,
93 target: impl Into<String>,
94 relation: impl Into<String>,
95 ) -> Self {
96 Self {
97 source: source.into(),
98 target: target.into(),
99 relation: relation.into(),
100 weight: 1.0,
101 }
102 }
103}
104
105#[derive(Debug, Clone, Default)]
109pub struct KgSubgraph {
110 pub nodes: Vec<KgNode>,
112 pub edges: Vec<KgEdge>,
114}
115
116impl KgSubgraph {
117 pub fn new() -> Self {
119 Self::default()
120 }
121
122 pub fn add_node(&mut self, node: KgNode) {
124 self.nodes.push(node);
125 }
126
127 pub fn add_edge(&mut self, edge: KgEdge) {
129 self.edges.push(edge);
130 }
131
132 pub fn node_count(&self) -> usize {
134 self.nodes.len()
135 }
136
137 pub fn edge_count(&self) -> usize {
139 self.edges.len()
140 }
141
142 pub fn node(&self, id: &str) -> Option<&KgNode> {
144 self.nodes.iter().find(|n| n.id == id)
145 }
146}
147
148#[derive(Debug, Clone)]
152pub struct SummaryCluster {
153 pub id: usize,
155 pub representative_node: String,
157 pub member_nodes: Vec<String>,
159 pub internal_edges: usize,
161 pub summary_label: String,
163}
164
165impl SummaryCluster {
166 pub fn size(&self) -> usize {
168 self.member_nodes.len()
169 }
170}
171
172pub struct SubgraphSummarizer;
176
177impl SubgraphSummarizer {
178 pub fn new() -> Self {
180 Self
181 }
182
183 pub fn summarize(&self, graph: &KgSubgraph, max_clusters: usize) -> Vec<SummaryCluster> {
194 if graph.nodes.is_empty() || max_clusters == 0 {
195 return Vec::new();
196 }
197
198 let mut type_groups: HashMap<String, Vec<String>> = HashMap::new();
200 for node in &graph.nodes {
201 type_groups
202 .entry(node.node_type.clone())
203 .or_default()
204 .push(node.id.clone());
205 }
206
207 let mut groups: Vec<(String, Vec<String>)> = type_groups.into_iter().collect();
209 groups.sort_by(|a, b| a.0.cmp(&b.0));
210
211 let groups = if groups.len() > max_clusters {
213 let (keep, overflow) = groups.split_at(max_clusters - 1);
214 let mut merged = keep.to_vec();
215 let other_members: Vec<String> = overflow
216 .iter()
217 .flat_map(|(_, ids)| ids.iter().cloned())
218 .collect();
219 if !other_members.is_empty() {
220 merged.push(("Other".to_string(), other_members));
221 }
222 merged
223 } else {
224 groups
225 };
226
227 let degree_map = build_degree_map(graph);
229
230 groups
232 .into_iter()
233 .enumerate()
234 .map(|(cluster_id, (node_type, members))| {
235 let representative = members
237 .iter()
238 .max_by_key(|id| degree_map.get(*id).copied().unwrap_or(0))
239 .cloned()
240 .unwrap_or_default();
241
242 let member_set: std::collections::HashSet<&str> =
244 members.iter().map(|s| s.as_str()).collect();
245 let internal_edges = graph
246 .edges
247 .iter()
248 .filter(|e| {
249 member_set.contains(e.source.as_str())
250 && member_set.contains(e.target.as_str())
251 })
252 .count();
253
254 SummaryCluster {
255 id: cluster_id,
256 representative_node: representative,
257 member_nodes: members,
258 internal_edges,
259 summary_label: format!("{node_type} cluster"),
260 }
261 })
262 .collect()
263 }
264
265 pub fn extract_key_relations(&self, graph: &KgSubgraph, top_n: usize) -> Vec<(String, usize)> {
269 if top_n == 0 {
270 return Vec::new();
271 }
272 let mut counts: HashMap<String, usize> = HashMap::new();
273 for edge in &graph.edges {
274 *counts.entry(edge.relation.clone()).or_insert(0) += 1;
275 }
276 let mut sorted: Vec<(String, usize)> = counts.into_iter().collect();
277 sorted.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
278 sorted.truncate(top_n);
279 sorted
280 }
281
282 pub fn node_degree(&self, graph: &KgSubgraph, node_id: &str) -> usize {
286 graph
287 .edges
288 .iter()
289 .filter(|e| e.source == node_id || e.target == node_id)
290 .count()
291 }
292
293 pub fn generate_text_summary(&self, clusters: &[SummaryCluster]) -> String {
298 if clusters.is_empty() {
299 return "The subgraph contains no identifiable clusters.".to_string();
300 }
301
302 let mut parts: Vec<String> = Vec::new();
303 for cluster in clusters {
304 parts.push(format!(
305 "The {} (representative: {}, {} members, {} internal edges)",
306 cluster.summary_label,
307 cluster.representative_node,
308 cluster.member_nodes.len(),
309 cluster.internal_edges,
310 ));
311 }
312
313 format!(
314 "{}. The subgraph contains {} cluster{}.",
315 parts.join(". "),
316 clusters.len(),
317 if clusters.len() == 1 { "" } else { "s" }
318 )
319 }
320}
321
322impl Default for SubgraphSummarizer {
323 fn default() -> Self {
324 Self::new()
325 }
326}
327
328fn build_degree_map(graph: &KgSubgraph) -> HashMap<String, usize> {
332 let mut map: HashMap<String, usize> = HashMap::new();
333 for edge in &graph.edges {
334 *map.entry(edge.source.clone()).or_insert(0) += 1;
335 if edge.source != edge.target {
336 *map.entry(edge.target.clone()).or_insert(0) += 1;
337 }
338 }
339 map
340}
341
342#[cfg(test)]
345mod tests {
346 use super::*;
347
348 fn node(id: &str, node_type: &str) -> KgNode {
351 KgNode::simple(id, id, node_type)
352 }
353
354 fn edge(src: &str, tgt: &str, rel: &str) -> KgEdge {
355 KgEdge::unweighted(src, tgt, rel)
356 }
357
358 fn make_graph_with_types(specs: &[(&str, &str)], edges: &[(&str, &str, &str)]) -> KgSubgraph {
359 let mut g = KgSubgraph::new();
360 for (id, typ) in specs {
361 g.add_node(node(id, typ));
362 }
363 for (s, t, r) in edges {
364 g.add_edge(edge(s, t, r));
365 }
366 g
367 }
368
369 #[test]
372 fn test_new_subgraph_empty() {
373 let g = KgSubgraph::new();
374 assert_eq!(g.node_count(), 0);
375 assert_eq!(g.edge_count(), 0);
376 }
377
378 #[test]
379 fn test_add_node_increments_count() {
380 let mut g = KgSubgraph::new();
381 g.add_node(node("n1", "Person"));
382 assert_eq!(g.node_count(), 1);
383 }
384
385 #[test]
386 fn test_add_edge_increments_count() {
387 let mut g = KgSubgraph::new();
388 g.add_edge(edge("a", "b", "knows"));
389 assert_eq!(g.edge_count(), 1);
390 }
391
392 #[test]
393 fn test_node_lookup_found() {
394 let mut g = KgSubgraph::new();
395 g.add_node(node("alice", "Person"));
396 assert!(g.node("alice").is_some());
397 assert_eq!(g.node("alice").expect("node").node_type, "Person");
398 }
399
400 #[test]
401 fn test_node_lookup_missing() {
402 let g = KgSubgraph::new();
403 assert!(g.node("nope").is_none());
404 }
405
406 #[test]
409 fn test_kg_node_simple() {
410 let n = KgNode::simple("id1", "Label", "Type");
411 assert_eq!(n.id, "id1");
412 assert_eq!(n.label, "Label");
413 assert_eq!(n.node_type, "Type");
414 assert!(n.properties.is_empty());
415 }
416
417 #[test]
420 fn test_kg_edge_unweighted_weight_one() {
421 let e = KgEdge::unweighted("a", "b", "rel");
422 assert_eq!(e.weight, 1.0);
423 }
424
425 #[test]
428 fn test_summarize_empty_graph() {
429 let g = KgSubgraph::new();
430 let s = SubgraphSummarizer::new();
431 assert!(s.summarize(&g, 10).is_empty());
432 }
433
434 #[test]
435 fn test_summarize_max_clusters_zero() {
436 let g = make_graph_with_types(&[("n1", "Person")], &[]);
437 let s = SubgraphSummarizer::new();
438 assert!(s.summarize(&g, 0).is_empty());
439 }
440
441 #[test]
444 fn test_summarize_single_type_one_cluster() {
445 let g = make_graph_with_types(&[("a", "Person"), ("b", "Person"), ("c", "Person")], &[]);
446 let s = SubgraphSummarizer::new();
447 let clusters = s.summarize(&g, 5);
448 assert_eq!(clusters.len(), 1);
449 assert_eq!(clusters[0].member_nodes.len(), 3);
450 }
451
452 #[test]
453 fn test_summarize_cluster_label_contains_type() {
454 let g = make_graph_with_types(&[("a", "Organization")], &[]);
455 let s = SubgraphSummarizer::new();
456 let clusters = s.summarize(&g, 5);
457 assert!(
458 clusters[0].summary_label.contains("Organization"),
459 "label should mention the type: {}",
460 clusters[0].summary_label
461 );
462 }
463
464 #[test]
467 fn test_summarize_two_types_two_clusters() {
468 let g = make_graph_with_types(&[("a", "Person"), ("b", "Person"), ("c", "Company")], &[]);
469 let s = SubgraphSummarizer::new();
470 let clusters = s.summarize(&g, 10);
471 assert_eq!(clusters.len(), 2);
472 }
473
474 #[test]
475 fn test_summarize_respects_max_clusters() {
476 let g = make_graph_with_types(&[("a", "T1"), ("b", "T2"), ("c", "T3"), ("d", "T4")], &[]);
477 let s = SubgraphSummarizer::new();
478 let clusters = s.summarize(&g, 2);
479 assert!(
480 clusters.len() <= 2,
481 "should have at most 2 clusters, got {}",
482 clusters.len()
483 );
484 }
485
486 #[test]
487 fn test_summarize_overflow_goes_to_other() {
488 let g = make_graph_with_types(
489 &[
490 ("a", "T1"),
491 ("b", "T2"),
492 ("c", "T3"),
493 ("d", "T4"),
494 ("e", "T5"),
495 ],
496 &[],
497 );
498 let s = SubgraphSummarizer::new();
499 let clusters = s.summarize(&g, 3);
500 assert_eq!(clusters.len(), 3);
502 let has_other = clusters.iter().any(|c| c.summary_label.contains("Other"));
503 assert!(has_other, "overflow should be merged into Other cluster");
504 }
505
506 #[test]
509 fn test_representative_is_most_connected() {
510 let g = make_graph_with_types(
512 &[("a", "Person"), ("b", "Person"), ("c", "Person")],
513 &[("a", "b", "knows"), ("b", "c", "knows")],
514 );
515 let s = SubgraphSummarizer::new();
516 let clusters = s.summarize(&g, 5);
517 assert_eq!(clusters.len(), 1);
518 assert_eq!(clusters[0].representative_node, "b");
519 }
520
521 #[test]
522 fn test_representative_exists_for_single_node_cluster() {
523 let g = make_graph_with_types(&[("solo", "Category")], &[]);
524 let s = SubgraphSummarizer::new();
525 let clusters = s.summarize(&g, 5);
526 assert_eq!(clusters[0].representative_node, "solo");
527 }
528
529 #[test]
532 fn test_internal_edges_all_within_cluster() {
533 let g = make_graph_with_types(
534 &[("a", "T"), ("b", "T"), ("c", "T")],
535 &[("a", "b", "r"), ("b", "c", "r")],
536 );
537 let s = SubgraphSummarizer::new();
538 let clusters = s.summarize(&g, 5);
539 assert_eq!(clusters[0].internal_edges, 2);
540 }
541
542 #[test]
543 fn test_internal_edges_none_cross_cluster() {
544 let g = make_graph_with_types(
546 &[("a", "T1"), ("b", "T1"), ("c", "T2")],
547 &[("a", "c", "cross"), ("b", "c", "cross")],
548 );
549 let s = SubgraphSummarizer::new();
550 let clusters = s.summarize(&g, 5);
551 for cluster in &clusters {
552 if cluster.summary_label.contains("T1") {
553 assert_eq!(cluster.internal_edges, 0);
554 }
555 }
556 }
557
558 #[test]
561 fn test_cluster_ids_are_sequential() {
562 let g = make_graph_with_types(&[("a", "T1"), ("b", "T2"), ("c", "T3")], &[]);
563 let s = SubgraphSummarizer::new();
564 let clusters = s.summarize(&g, 10);
565 for (i, c) in clusters.iter().enumerate() {
566 assert_eq!(c.id, i);
567 }
568 }
569
570 #[test]
573 fn test_key_relations_empty_graph() {
574 let g = KgSubgraph::new();
575 let s = SubgraphSummarizer::new();
576 assert!(s.extract_key_relations(&g, 5).is_empty());
577 }
578
579 #[test]
580 fn test_key_relations_top_n_zero() {
581 let mut g = KgSubgraph::new();
582 g.add_edge(edge("a", "b", "knows"));
583 let s = SubgraphSummarizer::new();
584 assert!(s.extract_key_relations(&g, 0).is_empty());
585 }
586
587 #[test]
588 fn test_key_relations_sorted_by_frequency() {
589 let g = make_graph_with_types(
590 &[("a", "T"), ("b", "T"), ("c", "T")],
591 &[
592 ("a", "b", "knows"),
593 ("b", "c", "knows"),
594 ("a", "c", "likes"),
595 ],
596 );
597 let s = SubgraphSummarizer::new();
598 let relations = s.extract_key_relations(&g, 5);
599 assert!(!relations.is_empty());
600 assert_eq!(relations[0].0, "knows");
601 assert_eq!(relations[0].1, 2);
602 }
603
604 #[test]
605 fn test_key_relations_truncated_to_top_n() {
606 let g = make_graph_with_types(
607 &[("a", "T"), ("b", "T"), ("c", "T")],
608 &[
609 ("a", "b", "r1"),
610 ("a", "b", "r2"),
611 ("a", "b", "r3"),
612 ("a", "b", "r4"),
613 ("a", "b", "r5"),
614 ],
615 );
616 let s = SubgraphSummarizer::new();
617 let relations = s.extract_key_relations(&g, 3);
618 assert!(relations.len() <= 3);
619 }
620
621 #[test]
622 fn test_key_relations_single_relation() {
623 let mut g = KgSubgraph::new();
624 for i in 0..5 {
625 g.add_edge(edge(&format!("n{i}"), &format!("n{}", i + 1), "knows"));
626 }
627 let s = SubgraphSummarizer::new();
628 let rels = s.extract_key_relations(&g, 1);
629 assert_eq!(rels.len(), 1);
630 assert_eq!(rels[0].0, "knows");
631 assert_eq!(rels[0].1, 5);
632 }
633
634 #[test]
637 fn test_node_degree_no_edges() {
638 let g = make_graph_with_types(&[("a", "T")], &[]);
639 let s = SubgraphSummarizer::new();
640 assert_eq!(s.node_degree(&g, "a"), 0);
641 }
642
643 #[test]
644 fn test_node_degree_outgoing_only() {
645 let g = make_graph_with_types(&[("a", "T"), ("b", "T")], &[("a", "b", "r")]);
646 let s = SubgraphSummarizer::new();
647 assert_eq!(s.node_degree(&g, "a"), 1);
648 assert_eq!(s.node_degree(&g, "b"), 1);
649 }
650
651 #[test]
652 fn test_node_degree_multiple_edges() {
653 let g = make_graph_with_types(
654 &[("a", "T"), ("b", "T"), ("c", "T")],
655 &[("a", "b", "r"), ("a", "c", "r"), ("b", "a", "r")],
656 );
657 let s = SubgraphSummarizer::new();
658 assert_eq!(s.node_degree(&g, "a"), 3);
660 }
661
662 #[test]
663 fn test_node_degree_missing_node() {
664 let g = KgSubgraph::new();
665 let s = SubgraphSummarizer::new();
666 assert_eq!(s.node_degree(&g, "ghost"), 0);
667 }
668
669 #[test]
672 fn test_text_summary_empty_clusters() {
673 let s = SubgraphSummarizer::new();
674 let text = s.generate_text_summary(&[]);
675 assert!(text.contains("no identifiable clusters"), "text: {text}");
676 }
677
678 #[test]
679 fn test_text_summary_single_cluster() {
680 let clusters = vec![SummaryCluster {
681 id: 0,
682 representative_node: "alice".to_string(),
683 member_nodes: vec!["alice".to_string(), "bob".to_string()],
684 internal_edges: 1,
685 summary_label: "Person cluster".to_string(),
686 }];
687 let s = SubgraphSummarizer::new();
688 let text = s.generate_text_summary(&clusters);
689 assert!(text.contains("alice"), "text: {text}");
690 assert!(text.contains("Person cluster"), "text: {text}");
691 assert!(text.contains("1 cluster"), "text: {text}");
692 }
693
694 #[test]
695 fn test_text_summary_multiple_clusters() {
696 let clusters = vec![
697 SummaryCluster {
698 id: 0,
699 representative_node: "a".to_string(),
700 member_nodes: vec!["a".to_string()],
701 internal_edges: 0,
702 summary_label: "Person cluster".to_string(),
703 },
704 SummaryCluster {
705 id: 1,
706 representative_node: "c".to_string(),
707 member_nodes: vec!["c".to_string(), "d".to_string()],
708 internal_edges: 1,
709 summary_label: "Company cluster".to_string(),
710 },
711 ];
712 let s = SubgraphSummarizer::new();
713 let text = s.generate_text_summary(&clusters);
714 assert!(text.contains("2 clusters"), "text: {text}");
715 assert!(text.contains("Person cluster"), "text: {text}");
716 assert!(text.contains("Company cluster"), "text: {text}");
717 }
718
719 #[test]
720 fn test_text_summary_contains_member_count() {
721 let clusters = vec![SummaryCluster {
722 id: 0,
723 representative_node: "x".to_string(),
724 member_nodes: vec!["x".to_string(), "y".to_string(), "z".to_string()],
725 internal_edges: 2,
726 summary_label: "T cluster".to_string(),
727 }];
728 let s = SubgraphSummarizer::new();
729 let text = s.generate_text_summary(&clusters);
730 assert!(text.contains("3 members"), "text: {text}");
731 }
732
733 #[test]
734 fn test_text_summary_contains_internal_edge_count() {
735 let clusters = vec![SummaryCluster {
736 id: 0,
737 representative_node: "x".to_string(),
738 member_nodes: vec!["x".to_string()],
739 internal_edges: 7,
740 summary_label: "T cluster".to_string(),
741 }];
742 let s = SubgraphSummarizer::new();
743 let text = s.generate_text_summary(&clusters);
744 assert!(text.contains("7 internal edges"), "text: {text}");
745 }
746
747 #[test]
750 fn test_cluster_size() {
751 let c = SummaryCluster {
752 id: 0,
753 representative_node: "r".to_string(),
754 member_nodes: vec!["a".to_string(), "b".to_string(), "c".to_string()],
755 internal_edges: 0,
756 summary_label: "X cluster".to_string(),
757 };
758 assert_eq!(c.size(), 3);
759 }
760
761 #[test]
764 fn test_default_summarizer() {
765 let s = SubgraphSummarizer;
766 let g = KgSubgraph::new();
767 assert!(s.summarize(&g, 5).is_empty());
768 }
769
770 #[test]
771 fn test_default_subgraph() {
772 let g = KgSubgraph::default();
773 assert_eq!(g.node_count(), 0);
774 }
775}