Skip to main content

oxirs_graphrag/
summarizer.rs

1//! # Knowledge Graph Subgraph Summarizer
2//!
3//! Cluster-based abstraction for summarising KG subgraphs.
4//!
5//! The [`SubgraphSummarizer`] groups nodes by their `node_type` into clusters,
6//! selects the most-connected node of each cluster as its representative, and
7//! produces both structured [`SummaryCluster`] data and a human-readable text
8//! summary paragraph.
9//!
10//! ## Example
11//!
12//! ```rust
13//! use oxirs_graphrag::summarizer::{
14//!     KgEdge, KgNode, KgSubgraph, SubgraphSummarizer,
15//! };
16//! use std::collections::HashMap;
17//!
18//! let mut graph = KgSubgraph::new();
19//! graph.add_node(KgNode {
20//!     id: "e1".to_string(),
21//!     label: "Alice".to_string(),
22//!     node_type: "Person".to_string(),
23//!     properties: HashMap::new(),
24//! });
25//! graph.add_node(KgNode {
26//!     id: "e2".to_string(),
27//!     label: "Bob".to_string(),
28//!     node_type: "Person".to_string(),
29//!     properties: HashMap::new(),
30//! });
31//! graph.add_edge(KgEdge {
32//!     source: "e1".to_string(),
33//!     target: "e2".to_string(),
34//!     relation: "knows".to_string(),
35//!     weight: 1.0,
36//! });
37//!
38//! let summarizer = SubgraphSummarizer::new();
39//! let clusters = summarizer.summarize(&graph, 10);
40//! assert_eq!(clusters.len(), 1); // one "Person" cluster
41//! ```
42
43use std::collections::HashMap;
44
45// ─── Graph primitives ─────────────────────────────────────────────────────────
46
47/// A node in a knowledge-graph subgraph.
48#[derive(Debug, Clone)]
49pub struct KgNode {
50    /// Unique node identifier (URI or local name).
51    pub id: String,
52    /// Human-readable label.
53    pub label: String,
54    /// Semantic type / RDF class.
55    pub node_type: String,
56    /// Arbitrary key-value properties.
57    pub properties: HashMap<String, String>,
58}
59
60impl KgNode {
61    /// Create a node with no properties.
62    pub fn simple(
63        id: impl Into<String>,
64        label: impl Into<String>,
65        node_type: impl Into<String>,
66    ) -> Self {
67        Self {
68            id: id.into(),
69            label: label.into(),
70            node_type: node_type.into(),
71            properties: HashMap::new(),
72        }
73    }
74}
75
76/// A directed, weighted edge between two KG nodes.
77#[derive(Debug, Clone)]
78pub struct KgEdge {
79    /// Source node identifier.
80    pub source: String,
81    /// Target node identifier.
82    pub target: String,
83    /// Relation type / predicate label.
84    pub relation: String,
85    /// Edge weight (e.g. confidence score).
86    pub weight: f64,
87}
88
89impl KgEdge {
90    /// Create an unweighted (weight = 1.0) edge.
91    pub fn unweighted(
92        source: impl Into<String>,
93        target: impl Into<String>,
94        relation: impl Into<String>,
95    ) -> Self {
96        Self {
97            source: source.into(),
98            target: target.into(),
99            relation: relation.into(),
100            weight: 1.0,
101        }
102    }
103}
104
105// ─── KgSubgraph ──────────────────────────────────────────────────────────────
106
107/// A subgraph of a knowledge graph, consisting of nodes and directed edges.
108#[derive(Debug, Clone, Default)]
109pub struct KgSubgraph {
110    /// All nodes in the subgraph.
111    pub nodes: Vec<KgNode>,
112    /// All edges in the subgraph.
113    pub edges: Vec<KgEdge>,
114}
115
116impl KgSubgraph {
117    /// Create an empty subgraph.
118    pub fn new() -> Self {
119        Self::default()
120    }
121
122    /// Add a node.
123    pub fn add_node(&mut self, node: KgNode) {
124        self.nodes.push(node);
125    }
126
127    /// Add an edge.
128    pub fn add_edge(&mut self, edge: KgEdge) {
129        self.edges.push(edge);
130    }
131
132    /// Number of nodes.
133    pub fn node_count(&self) -> usize {
134        self.nodes.len()
135    }
136
137    /// Number of edges.
138    pub fn edge_count(&self) -> usize {
139        self.edges.len()
140    }
141
142    /// Return the node with the given `id`, or `None`.
143    pub fn node(&self, id: &str) -> Option<&KgNode> {
144        self.nodes.iter().find(|n| n.id == id)
145    }
146}
147
148// ─── Summary output ──────────────────────────────────────────────────────────
149
150/// A cluster of KG nodes of the same type, with a representative and summary.
151#[derive(Debug, Clone)]
152pub struct SummaryCluster {
153    /// Sequential cluster identifier.
154    pub id: usize,
155    /// Node ID chosen as the representative (most-connected node in cluster).
156    pub representative_node: String,
157    /// All node IDs belonging to this cluster.
158    pub member_nodes: Vec<String>,
159    /// Number of edges whose both endpoints are within this cluster.
160    pub internal_edges: usize,
161    /// Brief human-readable label derived from the cluster's node type.
162    pub summary_label: String,
163}
164
165impl SummaryCluster {
166    /// Size of the cluster (number of member nodes).
167    pub fn size(&self) -> usize {
168        self.member_nodes.len()
169    }
170}
171
172// ─── SubgraphSummarizer ───────────────────────────────────────────────────────
173
174/// Produces cluster-based summaries of KG subgraphs.
175pub struct SubgraphSummarizer;
176
177impl SubgraphSummarizer {
178    /// Create a new summarizer (stateless).
179    pub fn new() -> Self {
180        Self
181    }
182
183    /// Summarise `graph` into at most `max_clusters` [`SummaryCluster`]s.
184    ///
185    /// ## Algorithm
186    ///
187    /// 1. Group nodes by `node_type`.
188    /// 2. If there are more types than `max_clusters`, merge the smallest groups
189    ///    into an `"Other"` cluster so that only `max_clusters` remain.
190    /// 3. For each cluster, count internal edges and pick the most-connected
191    ///    node (highest undirected degree within the whole graph) as
192    ///    the representative.
193    pub fn summarize(&self, graph: &KgSubgraph, max_clusters: usize) -> Vec<SummaryCluster> {
194        if graph.nodes.is_empty() || max_clusters == 0 {
195            return Vec::new();
196        }
197
198        // Group node IDs by node_type.
199        let mut type_groups: HashMap<String, Vec<String>> = HashMap::new();
200        for node in &graph.nodes {
201            type_groups
202                .entry(node.node_type.clone())
203                .or_default()
204                .push(node.id.clone());
205        }
206
207        // Sort groups deterministically by type name.
208        let mut groups: Vec<(String, Vec<String>)> = type_groups.into_iter().collect();
209        groups.sort_by(|a, b| a.0.cmp(&b.0));
210
211        // Merge overflow groups into "Other" if too many types.
212        let groups = if groups.len() > max_clusters {
213            let (keep, overflow) = groups.split_at(max_clusters - 1);
214            let mut merged = keep.to_vec();
215            let other_members: Vec<String> = overflow
216                .iter()
217                .flat_map(|(_, ids)| ids.iter().cloned())
218                .collect();
219            if !other_members.is_empty() {
220                merged.push(("Other".to_string(), other_members));
221            }
222            merged
223        } else {
224            groups
225        };
226
227        // Build undirected degree map over the entire graph.
228        let degree_map = build_degree_map(graph);
229
230        // Build the clusters.
231        groups
232            .into_iter()
233            .enumerate()
234            .map(|(cluster_id, (node_type, members))| {
235                // Pick representative: member with highest degree.
236                let representative = members
237                    .iter()
238                    .max_by_key(|id| degree_map.get(*id).copied().unwrap_or(0))
239                    .cloned()
240                    .unwrap_or_default();
241
242                // Count internal edges (both endpoints in this cluster).
243                let member_set: std::collections::HashSet<&str> =
244                    members.iter().map(|s| s.as_str()).collect();
245                let internal_edges = graph
246                    .edges
247                    .iter()
248                    .filter(|e| {
249                        member_set.contains(e.source.as_str())
250                            && member_set.contains(e.target.as_str())
251                    })
252                    .count();
253
254                SummaryCluster {
255                    id: cluster_id,
256                    representative_node: representative,
257                    member_nodes: members,
258                    internal_edges,
259                    summary_label: format!("{node_type} cluster"),
260                }
261            })
262            .collect()
263    }
264
265    /// Return the top-`top_n` relation types by frequency of occurrence.
266    ///
267    /// Ties are broken by relation name (alphabetical ascending).
268    pub fn extract_key_relations(&self, graph: &KgSubgraph, top_n: usize) -> Vec<(String, usize)> {
269        if top_n == 0 {
270            return Vec::new();
271        }
272        let mut counts: HashMap<String, usize> = HashMap::new();
273        for edge in &graph.edges {
274            *counts.entry(edge.relation.clone()).or_insert(0) += 1;
275        }
276        let mut sorted: Vec<(String, usize)> = counts.into_iter().collect();
277        sorted.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
278        sorted.truncate(top_n);
279        sorted
280    }
281
282    /// Compute the undirected degree of `node_id` in `graph`.
283    ///
284    /// Both outgoing and incoming edges are counted (a self-loop counts twice).
285    pub fn node_degree(&self, graph: &KgSubgraph, node_id: &str) -> usize {
286        graph
287            .edges
288            .iter()
289            .filter(|e| e.source == node_id || e.target == node_id)
290            .count()
291    }
292
293    /// Generate a human-readable summary paragraph from a set of clusters.
294    ///
295    /// The paragraph lists each cluster's representative node, member count, and
296    /// internal edge count, ending with the total number of clusters.
297    pub fn generate_text_summary(&self, clusters: &[SummaryCluster]) -> String {
298        if clusters.is_empty() {
299            return "The subgraph contains no identifiable clusters.".to_string();
300        }
301
302        let mut parts: Vec<String> = Vec::new();
303        for cluster in clusters {
304            parts.push(format!(
305                "The {} (representative: {}, {} members, {} internal edges)",
306                cluster.summary_label,
307                cluster.representative_node,
308                cluster.member_nodes.len(),
309                cluster.internal_edges,
310            ));
311        }
312
313        format!(
314            "{}. The subgraph contains {} cluster{}.",
315            parts.join(". "),
316            clusters.len(),
317            if clusters.len() == 1 { "" } else { "s" }
318        )
319    }
320}
321
322impl Default for SubgraphSummarizer {
323    fn default() -> Self {
324        Self::new()
325    }
326}
327
328// ─── Internal helpers ─────────────────────────────────────────────────────────
329
330/// Build an undirected-degree map over all edges in `graph`.
331fn build_degree_map(graph: &KgSubgraph) -> HashMap<String, usize> {
332    let mut map: HashMap<String, usize> = HashMap::new();
333    for edge in &graph.edges {
334        *map.entry(edge.source.clone()).or_insert(0) += 1;
335        if edge.source != edge.target {
336            *map.entry(edge.target.clone()).or_insert(0) += 1;
337        }
338    }
339    map
340}
341
342// ─── Tests ────────────────────────────────────────────────────────────────────
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347
348    // ── Helpers ──────────────────────────────────────────────────────────────
349
350    fn node(id: &str, node_type: &str) -> KgNode {
351        KgNode::simple(id, id, node_type)
352    }
353
354    fn edge(src: &str, tgt: &str, rel: &str) -> KgEdge {
355        KgEdge::unweighted(src, tgt, rel)
356    }
357
358    fn make_graph_with_types(specs: &[(&str, &str)], edges: &[(&str, &str, &str)]) -> KgSubgraph {
359        let mut g = KgSubgraph::new();
360        for (id, typ) in specs {
361            g.add_node(node(id, typ));
362        }
363        for (s, t, r) in edges {
364            g.add_edge(edge(s, t, r));
365        }
366        g
367    }
368
369    // ── KgSubgraph basics ────────────────────────────────────────────────────
370
371    #[test]
372    fn test_new_subgraph_empty() {
373        let g = KgSubgraph::new();
374        assert_eq!(g.node_count(), 0);
375        assert_eq!(g.edge_count(), 0);
376    }
377
378    #[test]
379    fn test_add_node_increments_count() {
380        let mut g = KgSubgraph::new();
381        g.add_node(node("n1", "Person"));
382        assert_eq!(g.node_count(), 1);
383    }
384
385    #[test]
386    fn test_add_edge_increments_count() {
387        let mut g = KgSubgraph::new();
388        g.add_edge(edge("a", "b", "knows"));
389        assert_eq!(g.edge_count(), 1);
390    }
391
392    #[test]
393    fn test_node_lookup_found() {
394        let mut g = KgSubgraph::new();
395        g.add_node(node("alice", "Person"));
396        assert!(g.node("alice").is_some());
397        assert_eq!(g.node("alice").expect("node").node_type, "Person");
398    }
399
400    #[test]
401    fn test_node_lookup_missing() {
402        let g = KgSubgraph::new();
403        assert!(g.node("nope").is_none());
404    }
405
406    // ── KgNode simple constructor ─────────────────────────────────────────────
407
408    #[test]
409    fn test_kg_node_simple() {
410        let n = KgNode::simple("id1", "Label", "Type");
411        assert_eq!(n.id, "id1");
412        assert_eq!(n.label, "Label");
413        assert_eq!(n.node_type, "Type");
414        assert!(n.properties.is_empty());
415    }
416
417    // ── KgEdge unweighted ─────────────────────────────────────────────────────
418
419    #[test]
420    fn test_kg_edge_unweighted_weight_one() {
421        let e = KgEdge::unweighted("a", "b", "rel");
422        assert_eq!(e.weight, 1.0);
423    }
424
425    // ── summarize: empty graph ────────────────────────────────────────────────
426
427    #[test]
428    fn test_summarize_empty_graph() {
429        let g = KgSubgraph::new();
430        let s = SubgraphSummarizer::new();
431        assert!(s.summarize(&g, 10).is_empty());
432    }
433
434    #[test]
435    fn test_summarize_max_clusters_zero() {
436        let g = make_graph_with_types(&[("n1", "Person")], &[]);
437        let s = SubgraphSummarizer::new();
438        assert!(s.summarize(&g, 0).is_empty());
439    }
440
441    // ── summarize: single type ────────────────────────────────────────────────
442
443    #[test]
444    fn test_summarize_single_type_one_cluster() {
445        let g = make_graph_with_types(&[("a", "Person"), ("b", "Person"), ("c", "Person")], &[]);
446        let s = SubgraphSummarizer::new();
447        let clusters = s.summarize(&g, 5);
448        assert_eq!(clusters.len(), 1);
449        assert_eq!(clusters[0].member_nodes.len(), 3);
450    }
451
452    #[test]
453    fn test_summarize_cluster_label_contains_type() {
454        let g = make_graph_with_types(&[("a", "Organization")], &[]);
455        let s = SubgraphSummarizer::new();
456        let clusters = s.summarize(&g, 5);
457        assert!(
458            clusters[0].summary_label.contains("Organization"),
459            "label should mention the type: {}",
460            clusters[0].summary_label
461        );
462    }
463
464    // ── summarize: multiple types ─────────────────────────────────────────────
465
466    #[test]
467    fn test_summarize_two_types_two_clusters() {
468        let g = make_graph_with_types(&[("a", "Person"), ("b", "Person"), ("c", "Company")], &[]);
469        let s = SubgraphSummarizer::new();
470        let clusters = s.summarize(&g, 10);
471        assert_eq!(clusters.len(), 2);
472    }
473
474    #[test]
475    fn test_summarize_respects_max_clusters() {
476        let g = make_graph_with_types(&[("a", "T1"), ("b", "T2"), ("c", "T3"), ("d", "T4")], &[]);
477        let s = SubgraphSummarizer::new();
478        let clusters = s.summarize(&g, 2);
479        assert!(
480            clusters.len() <= 2,
481            "should have at most 2 clusters, got {}",
482            clusters.len()
483        );
484    }
485
486    #[test]
487    fn test_summarize_overflow_goes_to_other() {
488        let g = make_graph_with_types(
489            &[
490                ("a", "T1"),
491                ("b", "T2"),
492                ("c", "T3"),
493                ("d", "T4"),
494                ("e", "T5"),
495            ],
496            &[],
497        );
498        let s = SubgraphSummarizer::new();
499        let clusters = s.summarize(&g, 3);
500        // Should have 3 clusters: T1, T2, Other (T3+T4+T5)
501        assert_eq!(clusters.len(), 3);
502        let has_other = clusters.iter().any(|c| c.summary_label.contains("Other"));
503        assert!(has_other, "overflow should be merged into Other cluster");
504    }
505
506    // ── summarize: representative selection ──────────────────────────────────
507
508    #[test]
509    fn test_representative_is_most_connected() {
510        // "b" has degree 2, "a" and "c" have degree 1 — b should be representative
511        let g = make_graph_with_types(
512            &[("a", "Person"), ("b", "Person"), ("c", "Person")],
513            &[("a", "b", "knows"), ("b", "c", "knows")],
514        );
515        let s = SubgraphSummarizer::new();
516        let clusters = s.summarize(&g, 5);
517        assert_eq!(clusters.len(), 1);
518        assert_eq!(clusters[0].representative_node, "b");
519    }
520
521    #[test]
522    fn test_representative_exists_for_single_node_cluster() {
523        let g = make_graph_with_types(&[("solo", "Category")], &[]);
524        let s = SubgraphSummarizer::new();
525        let clusters = s.summarize(&g, 5);
526        assert_eq!(clusters[0].representative_node, "solo");
527    }
528
529    // ── summarize: internal edges ─────────────────────────────────────────────
530
531    #[test]
532    fn test_internal_edges_all_within_cluster() {
533        let g = make_graph_with_types(
534            &[("a", "T"), ("b", "T"), ("c", "T")],
535            &[("a", "b", "r"), ("b", "c", "r")],
536        );
537        let s = SubgraphSummarizer::new();
538        let clusters = s.summarize(&g, 5);
539        assert_eq!(clusters[0].internal_edges, 2);
540    }
541
542    #[test]
543    fn test_internal_edges_none_cross_cluster() {
544        // Edges between different types — should not appear as internal.
545        let g = make_graph_with_types(
546            &[("a", "T1"), ("b", "T1"), ("c", "T2")],
547            &[("a", "c", "cross"), ("b", "c", "cross")],
548        );
549        let s = SubgraphSummarizer::new();
550        let clusters = s.summarize(&g, 5);
551        for cluster in &clusters {
552            if cluster.summary_label.contains("T1") {
553                assert_eq!(cluster.internal_edges, 0);
554            }
555        }
556    }
557
558    // ── summarize: cluster IDs ────────────────────────────────────────────────
559
560    #[test]
561    fn test_cluster_ids_are_sequential() {
562        let g = make_graph_with_types(&[("a", "T1"), ("b", "T2"), ("c", "T3")], &[]);
563        let s = SubgraphSummarizer::new();
564        let clusters = s.summarize(&g, 10);
565        for (i, c) in clusters.iter().enumerate() {
566            assert_eq!(c.id, i);
567        }
568    }
569
570    // ── extract_key_relations ─────────────────────────────────────────────────
571
572    #[test]
573    fn test_key_relations_empty_graph() {
574        let g = KgSubgraph::new();
575        let s = SubgraphSummarizer::new();
576        assert!(s.extract_key_relations(&g, 5).is_empty());
577    }
578
579    #[test]
580    fn test_key_relations_top_n_zero() {
581        let mut g = KgSubgraph::new();
582        g.add_edge(edge("a", "b", "knows"));
583        let s = SubgraphSummarizer::new();
584        assert!(s.extract_key_relations(&g, 0).is_empty());
585    }
586
587    #[test]
588    fn test_key_relations_sorted_by_frequency() {
589        let g = make_graph_with_types(
590            &[("a", "T"), ("b", "T"), ("c", "T")],
591            &[
592                ("a", "b", "knows"),
593                ("b", "c", "knows"),
594                ("a", "c", "likes"),
595            ],
596        );
597        let s = SubgraphSummarizer::new();
598        let relations = s.extract_key_relations(&g, 5);
599        assert!(!relations.is_empty());
600        assert_eq!(relations[0].0, "knows");
601        assert_eq!(relations[0].1, 2);
602    }
603
604    #[test]
605    fn test_key_relations_truncated_to_top_n() {
606        let g = make_graph_with_types(
607            &[("a", "T"), ("b", "T"), ("c", "T")],
608            &[
609                ("a", "b", "r1"),
610                ("a", "b", "r2"),
611                ("a", "b", "r3"),
612                ("a", "b", "r4"),
613                ("a", "b", "r5"),
614            ],
615        );
616        let s = SubgraphSummarizer::new();
617        let relations = s.extract_key_relations(&g, 3);
618        assert!(relations.len() <= 3);
619    }
620
621    #[test]
622    fn test_key_relations_single_relation() {
623        let mut g = KgSubgraph::new();
624        for i in 0..5 {
625            g.add_edge(edge(&format!("n{i}"), &format!("n{}", i + 1), "knows"));
626        }
627        let s = SubgraphSummarizer::new();
628        let rels = s.extract_key_relations(&g, 1);
629        assert_eq!(rels.len(), 1);
630        assert_eq!(rels[0].0, "knows");
631        assert_eq!(rels[0].1, 5);
632    }
633
634    // ── node_degree ───────────────────────────────────────────────────────────
635
636    #[test]
637    fn test_node_degree_no_edges() {
638        let g = make_graph_with_types(&[("a", "T")], &[]);
639        let s = SubgraphSummarizer::new();
640        assert_eq!(s.node_degree(&g, "a"), 0);
641    }
642
643    #[test]
644    fn test_node_degree_outgoing_only() {
645        let g = make_graph_with_types(&[("a", "T"), ("b", "T")], &[("a", "b", "r")]);
646        let s = SubgraphSummarizer::new();
647        assert_eq!(s.node_degree(&g, "a"), 1);
648        assert_eq!(s.node_degree(&g, "b"), 1);
649    }
650
651    #[test]
652    fn test_node_degree_multiple_edges() {
653        let g = make_graph_with_types(
654            &[("a", "T"), ("b", "T"), ("c", "T")],
655            &[("a", "b", "r"), ("a", "c", "r"), ("b", "a", "r")],
656        );
657        let s = SubgraphSummarizer::new();
658        // a appears in edges: (a→b), (a→c), (b→a) = 3
659        assert_eq!(s.node_degree(&g, "a"), 3);
660    }
661
662    #[test]
663    fn test_node_degree_missing_node() {
664        let g = KgSubgraph::new();
665        let s = SubgraphSummarizer::new();
666        assert_eq!(s.node_degree(&g, "ghost"), 0);
667    }
668
669    // ── generate_text_summary ─────────────────────────────────────────────────
670
671    #[test]
672    fn test_text_summary_empty_clusters() {
673        let s = SubgraphSummarizer::new();
674        let text = s.generate_text_summary(&[]);
675        assert!(text.contains("no identifiable clusters"), "text: {text}");
676    }
677
678    #[test]
679    fn test_text_summary_single_cluster() {
680        let clusters = vec![SummaryCluster {
681            id: 0,
682            representative_node: "alice".to_string(),
683            member_nodes: vec!["alice".to_string(), "bob".to_string()],
684            internal_edges: 1,
685            summary_label: "Person cluster".to_string(),
686        }];
687        let s = SubgraphSummarizer::new();
688        let text = s.generate_text_summary(&clusters);
689        assert!(text.contains("alice"), "text: {text}");
690        assert!(text.contains("Person cluster"), "text: {text}");
691        assert!(text.contains("1 cluster"), "text: {text}");
692    }
693
694    #[test]
695    fn test_text_summary_multiple_clusters() {
696        let clusters = vec![
697            SummaryCluster {
698                id: 0,
699                representative_node: "a".to_string(),
700                member_nodes: vec!["a".to_string()],
701                internal_edges: 0,
702                summary_label: "Person cluster".to_string(),
703            },
704            SummaryCluster {
705                id: 1,
706                representative_node: "c".to_string(),
707                member_nodes: vec!["c".to_string(), "d".to_string()],
708                internal_edges: 1,
709                summary_label: "Company cluster".to_string(),
710            },
711        ];
712        let s = SubgraphSummarizer::new();
713        let text = s.generate_text_summary(&clusters);
714        assert!(text.contains("2 clusters"), "text: {text}");
715        assert!(text.contains("Person cluster"), "text: {text}");
716        assert!(text.contains("Company cluster"), "text: {text}");
717    }
718
719    #[test]
720    fn test_text_summary_contains_member_count() {
721        let clusters = vec![SummaryCluster {
722            id: 0,
723            representative_node: "x".to_string(),
724            member_nodes: vec!["x".to_string(), "y".to_string(), "z".to_string()],
725            internal_edges: 2,
726            summary_label: "T cluster".to_string(),
727        }];
728        let s = SubgraphSummarizer::new();
729        let text = s.generate_text_summary(&clusters);
730        assert!(text.contains("3 members"), "text: {text}");
731    }
732
733    #[test]
734    fn test_text_summary_contains_internal_edge_count() {
735        let clusters = vec![SummaryCluster {
736            id: 0,
737            representative_node: "x".to_string(),
738            member_nodes: vec!["x".to_string()],
739            internal_edges: 7,
740            summary_label: "T cluster".to_string(),
741        }];
742        let s = SubgraphSummarizer::new();
743        let text = s.generate_text_summary(&clusters);
744        assert!(text.contains("7 internal edges"), "text: {text}");
745    }
746
747    // ── SummaryCluster helpers ────────────────────────────────────────────────
748
749    #[test]
750    fn test_cluster_size() {
751        let c = SummaryCluster {
752            id: 0,
753            representative_node: "r".to_string(),
754            member_nodes: vec!["a".to_string(), "b".to_string(), "c".to_string()],
755            internal_edges: 0,
756            summary_label: "X cluster".to_string(),
757        };
758        assert_eq!(c.size(), 3);
759    }
760
761    // ── default impl ──────────────────────────────────────────────────────────
762
763    #[test]
764    fn test_default_summarizer() {
765        let s = SubgraphSummarizer;
766        let g = KgSubgraph::new();
767        assert!(s.summarize(&g, 5).is_empty());
768    }
769
770    #[test]
771    fn test_default_subgraph() {
772        let g = KgSubgraph::default();
773        assert_eq!(g.node_count(), 0);
774    }
775}