1use serde::{Deserialize, Serialize};
17use std::collections::{HashMap, HashSet};
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct GraphSummaryConfig {
26 pub max_entities: usize,
28 pub max_relationships: usize,
30 pub min_entity_degree: usize,
32 pub detail_level: DetailLevel,
34 pub include_stats: bool,
36 pub include_communities: bool,
38}
39
40impl Default for GraphSummaryConfig {
41 fn default() -> Self {
42 Self {
43 max_entities: 20,
44 max_relationships: 50,
45 min_entity_degree: 1,
46 detail_level: DetailLevel::Standard,
47 include_stats: true,
48 include_communities: true,
49 }
50 }
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
55pub enum DetailLevel {
56 Brief,
58 Standard,
60 Detailed,
62}
63
64#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
70pub struct Triple {
71 pub subject: String,
72 pub predicate: String,
73 pub object: String,
74}
75
76impl Triple {
77 pub fn new(subject: &str, predicate: &str, object: &str) -> Self {
78 Self {
79 subject: subject.to_string(),
80 predicate: predicate.to_string(),
81 object: object.to_string(),
82 }
83 }
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct Subgraph {
89 pub triples: Vec<Triple>,
91 pub label: Option<String>,
93}
94
95impl Subgraph {
96 pub fn new(triples: Vec<Triple>) -> Self {
97 Self {
98 triples,
99 label: None,
100 }
101 }
102
103 pub fn with_label(mut self, label: &str) -> Self {
104 self.label = Some(label.to_string());
105 self
106 }
107
108 pub fn entities(&self) -> HashSet<String> {
110 let mut entities = HashSet::new();
111 for t in &self.triples {
112 entities.insert(t.subject.clone());
113 entities.insert(t.object.clone());
114 }
115 entities
116 }
117
118 pub fn predicates(&self) -> HashSet<String> {
120 self.triples.iter().map(|t| t.predicate.clone()).collect()
121 }
122
123 pub fn entity_degrees(&self) -> HashMap<String, usize> {
125 let mut degrees: HashMap<String, usize> = HashMap::new();
126 for t in &self.triples {
127 *degrees.entry(t.subject.clone()).or_insert(0) += 1;
128 *degrees.entry(t.object.clone()).or_insert(0) += 1;
129 }
130 degrees
131 }
132}
133
134#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct GraphSummary {
141 pub text: String,
143 pub entity_count: usize,
145 pub triple_count: usize,
147 pub relationship_types: usize,
149 pub hub_entities: Vec<EntitySummary>,
151 pub relationship_distribution: HashMap<String, usize>,
153 pub structural_stats: Option<StructuralStats>,
155 pub communities: Vec<CommunitySummary>,
157 pub detail_level: DetailLevel,
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct EntitySummary {
164 pub uri: String,
166 pub label: String,
168 pub degree: usize,
170 pub incoming_types: Vec<String>,
172 pub outgoing_types: Vec<String>,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
178pub struct StructuralStats {
179 pub avg_degree: f64,
181 pub max_degree: usize,
183 pub density: f64,
185 pub connected_components: usize,
187}
188
189#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct CommunitySummary {
192 pub id: usize,
194 pub size: usize,
196 pub hub: String,
198 pub key_relationships: Vec<String>,
200}
201
202#[derive(Debug, Clone, Default, Serialize, Deserialize)]
208pub struct SummarizerStats {
209 pub graphs_summarized: u64,
210 pub total_triples_processed: u64,
211 pub total_entities_processed: u64,
212 pub avg_compression_ratio: f64,
213}
214
215pub struct GraphSummarizer {
221 config: GraphSummaryConfig,
222 stats: SummarizerStats,
223}
224
225impl GraphSummarizer {
226 pub fn new(config: GraphSummaryConfig) -> Self {
228 Self {
229 config,
230 stats: SummarizerStats::default(),
231 }
232 }
233
234 pub fn with_defaults() -> Self {
236 Self::new(GraphSummaryConfig::default())
237 }
238
239 pub fn summarize(&mut self, subgraph: &Subgraph) -> GraphSummary {
241 let entities = subgraph.entities();
242 let predicates = subgraph.predicates();
243 let degrees = subgraph.entity_degrees();
244
245 let mut sorted_entities: Vec<_> = degrees.iter().collect();
247 sorted_entities.sort_by(|a, b| b.1.cmp(a.1));
248
249 let hub_entities: Vec<EntitySummary> = sorted_entities
250 .iter()
251 .filter(|(_, °)| deg >= self.config.min_entity_degree)
252 .take(self.config.max_entities)
253 .map(|(uri, °ree)| {
254 let outgoing: Vec<String> = subgraph
255 .triples
256 .iter()
257 .filter(|t| &t.subject == *uri)
258 .map(|t| shorten_uri(&t.predicate))
259 .collect::<HashSet<_>>()
260 .into_iter()
261 .collect();
262
263 let incoming: Vec<String> = subgraph
264 .triples
265 .iter()
266 .filter(|t| &t.object == *uri)
267 .map(|t| shorten_uri(&t.predicate))
268 .collect::<HashSet<_>>()
269 .into_iter()
270 .collect();
271
272 EntitySummary {
273 uri: uri.to_string(),
274 label: shorten_uri(uri),
275 degree,
276 incoming_types: incoming,
277 outgoing_types: outgoing,
278 }
279 })
280 .collect();
281
282 let mut rel_dist: HashMap<String, usize> = HashMap::new();
284 for t in &subgraph.triples {
285 *rel_dist.entry(shorten_uri(&t.predicate)).or_insert(0) += 1;
286 }
287
288 let structural_stats = if self.config.include_stats {
290 let total_degree: usize = degrees.values().sum();
291 let n = entities.len().max(1);
292 let avg_degree = total_degree as f64 / n as f64;
293 let max_degree = degrees.values().copied().max().unwrap_or(0);
294 let possible_edges = n * (n.saturating_sub(1));
295 let density = if possible_edges > 0 {
296 subgraph.triples.len() as f64 / possible_edges as f64
297 } else {
298 0.0
299 };
300
301 Some(StructuralStats {
302 avg_degree,
303 max_degree,
304 density,
305 connected_components: self.estimate_components(subgraph),
306 })
307 } else {
308 None
309 };
310
311 let communities = if self.config.include_communities {
313 self.detect_communities(subgraph, °rees)
314 } else {
315 Vec::new()
316 };
317
318 let text = self.generate_text(subgraph, &hub_entities, &rel_dist, &structural_stats);
320
321 self.stats.graphs_summarized += 1;
322 self.stats.total_triples_processed += subgraph.triples.len() as u64;
323 self.stats.total_entities_processed += entities.len() as u64;
324
325 GraphSummary {
326 text,
327 entity_count: entities.len(),
328 triple_count: subgraph.triples.len(),
329 relationship_types: predicates.len(),
330 hub_entities,
331 relationship_distribution: rel_dist,
332 structural_stats,
333 communities,
334 detail_level: self.config.detail_level,
335 }
336 }
337
338 pub fn stats(&self) -> &SummarizerStats {
340 &self.stats
341 }
342
343 pub fn config(&self) -> &GraphSummaryConfig {
345 &self.config
346 }
347
348 fn generate_text(
351 &self,
352 subgraph: &Subgraph,
353 hubs: &[EntitySummary],
354 rel_dist: &HashMap<String, usize>,
355 stats: &Option<StructuralStats>,
356 ) -> String {
357 let mut parts = Vec::new();
358
359 if let Some(ref label) = subgraph.label {
360 parts.push(format!("Subgraph: {label}"));
361 }
362
363 parts.push(format!(
364 "Contains {} entities and {} triples with {} relationship types.",
365 subgraph.entities().len(),
366 subgraph.triples.len(),
367 subgraph.predicates().len()
368 ));
369
370 if self.config.detail_level != DetailLevel::Brief {
371 if !hubs.is_empty() {
372 let hub_names: Vec<String> = hubs.iter().take(5).map(|h| h.label.clone()).collect();
373 parts.push(format!("Key entities: {}.", hub_names.join(", ")));
374 }
375
376 let mut rels: Vec<_> = rel_dist.iter().collect();
377 rels.sort_by(|a, b| b.1.cmp(a.1));
378 let top_rels: Vec<String> = rels
379 .iter()
380 .take(5)
381 .map(|(r, c)| format!("{r} ({c})"))
382 .collect();
383 if !top_rels.is_empty() {
384 parts.push(format!("Top relationships: {}.", top_rels.join(", ")));
385 }
386 }
387
388 if self.config.detail_level == DetailLevel::Detailed {
389 if let Some(ref s) = stats {
390 parts.push(format!(
391 "Structure: avg degree {:.1}, max degree {}, density {:.4}, {} component(s).",
392 s.avg_degree, s.max_degree, s.density, s.connected_components
393 ));
394 }
395 }
396
397 parts.join(" ")
398 }
399
400 fn estimate_components(&self, subgraph: &Subgraph) -> usize {
401 let entities = subgraph.entities();
402 if entities.is_empty() {
403 return 0;
404 }
405
406 let mut parent: HashMap<String, String> = HashMap::new();
407 for e in &entities {
408 parent.insert(e.clone(), e.clone());
409 }
410
411 for t in &subgraph.triples {
412 let root_s = find_root(&parent, &t.subject);
413 let root_o = find_root(&parent, &t.object);
414 if root_s != root_o {
415 parent.insert(root_s, root_o);
416 }
417 }
418
419 let roots: HashSet<String> = entities.iter().map(|e| find_root(&parent, e)).collect();
420 roots.len()
421 }
422
423 fn detect_communities(
424 &self,
425 subgraph: &Subgraph,
426 degrees: &HashMap<String, usize>,
427 ) -> Vec<CommunitySummary> {
428 let mut pred_groups: HashMap<String, HashSet<String>> = HashMap::new();
430 for t in &subgraph.triples {
431 let group = pred_groups.entry(shorten_uri(&t.predicate)).or_default();
432 group.insert(t.subject.clone());
433 group.insert(t.object.clone());
434 }
435
436 pred_groups
437 .iter()
438 .enumerate()
439 .take(5)
440 .map(|(id, (pred, members))| {
441 let hub = members
442 .iter()
443 .max_by_key(|m| degrees.get(*m).unwrap_or(&0))
444 .cloned()
445 .unwrap_or_default();
446 CommunitySummary {
447 id,
448 size: members.len(),
449 hub: shorten_uri(&hub),
450 key_relationships: vec![pred.clone()],
451 }
452 })
453 .collect()
454 }
455}
456
457fn find_root(parent: &HashMap<String, String>, node: &str) -> String {
458 let mut current = node.to_string();
459 while let Some(p) = parent.get(¤t) {
460 if p == ¤t {
461 break;
462 }
463 current = p.clone();
464 }
465 current
466}
467
468fn shorten_uri(uri: &str) -> String {
469 if let Some(idx) = uri.rfind('#') {
473 uri[idx + 1..].to_string()
474 } else if let Some(idx) = uri.rfind('/') {
475 uri[idx + 1..].to_string()
476 } else {
477 uri.to_string()
478 }
479}
480
481#[cfg(test)]
486mod tests {
487 use super::*;
488
489 fn sample_subgraph() -> Subgraph {
490 Subgraph::new(vec![
491 Triple::new(
492 "http://ex.org/Alice",
493 "http://ex.org/knows",
494 "http://ex.org/Bob",
495 ),
496 Triple::new(
497 "http://ex.org/Alice",
498 "http://ex.org/likes",
499 "http://ex.org/Charlie",
500 ),
501 Triple::new(
502 "http://ex.org/Bob",
503 "http://ex.org/knows",
504 "http://ex.org/Charlie",
505 ),
506 Triple::new(
507 "http://ex.org/Charlie",
508 "http://ex.org/worksAt",
509 "http://ex.org/ACME",
510 ),
511 Triple::new(
512 "http://ex.org/Alice",
513 "http://ex.org/worksAt",
514 "http://ex.org/ACME",
515 ),
516 ])
517 }
518
519 #[test]
520 fn test_default_config() {
521 let config = GraphSummaryConfig::default();
522 assert_eq!(config.max_entities, 20);
523 assert_eq!(config.detail_level, DetailLevel::Standard);
524 }
525
526 #[test]
527 fn test_basic_summarize() {
528 let mut summarizer = GraphSummarizer::with_defaults();
529 let summary = summarizer.summarize(&sample_subgraph());
530 assert_eq!(summary.triple_count, 5);
531 assert!(summary.entity_count > 0);
532 assert!(!summary.text.is_empty());
533 }
534
535 #[test]
536 fn test_hub_entities() {
537 let mut summarizer = GraphSummarizer::with_defaults();
538 let summary = summarizer.summarize(&sample_subgraph());
539 assert!(!summary.hub_entities.is_empty());
540 assert!(summary.hub_entities.iter().any(|e| e.label == "Alice"));
542 }
543
544 #[test]
545 fn test_relationship_distribution() {
546 let mut summarizer = GraphSummarizer::with_defaults();
547 let summary = summarizer.summarize(&sample_subgraph());
548 assert!(summary.relationship_distribution.contains_key("knows"));
549 assert_eq!(summary.relationship_distribution["knows"], 2);
550 }
551
552 #[test]
553 fn test_structural_stats() {
554 let mut summarizer = GraphSummarizer::with_defaults();
555 let summary = summarizer.summarize(&sample_subgraph());
556 let stats = summary.structural_stats.expect("should have stats");
557 assert!(stats.avg_degree > 0.0);
558 assert!(stats.max_degree > 0);
559 assert!(stats.density > 0.0);
560 }
561
562 #[test]
563 fn test_communities() {
564 let mut summarizer = GraphSummarizer::with_defaults();
565 let summary = summarizer.summarize(&sample_subgraph());
566 assert!(!summary.communities.is_empty());
567 }
568
569 #[test]
570 fn test_brief_summary() {
571 let mut summarizer = GraphSummarizer::new(GraphSummaryConfig {
572 detail_level: DetailLevel::Brief,
573 ..Default::default()
574 });
575 let summary = summarizer.summarize(&sample_subgraph());
576 assert!(!summary.text.is_empty());
577 assert!(!summary.text.contains("Key entities"));
579 }
580
581 #[test]
582 fn test_detailed_summary() {
583 let mut summarizer = GraphSummarizer::new(GraphSummaryConfig {
584 detail_level: DetailLevel::Detailed,
585 ..Default::default()
586 });
587 let summary = summarizer.summarize(&sample_subgraph());
588 assert!(summary.text.contains("Structure"));
589 }
590
591 #[test]
592 fn test_empty_subgraph() {
593 let mut summarizer = GraphSummarizer::with_defaults();
594 let summary = summarizer.summarize(&Subgraph::new(vec![]));
595 assert_eq!(summary.triple_count, 0);
596 assert_eq!(summary.entity_count, 0);
597 }
598
599 #[test]
600 fn test_single_triple() {
601 let mut summarizer = GraphSummarizer::with_defaults();
602 let sg = Subgraph::new(vec![Triple::new("A", "knows", "B")]);
603 let summary = summarizer.summarize(&sg);
604 assert_eq!(summary.triple_count, 1);
605 assert_eq!(summary.entity_count, 2);
606 }
607
608 #[test]
609 fn test_subgraph_with_label() {
610 let mut summarizer = GraphSummarizer::with_defaults();
611 let sg = sample_subgraph().with_label("Social Network");
612 let summary = summarizer.summarize(&sg);
613 assert!(summary.text.contains("Social Network"));
614 }
615
616 #[test]
617 fn test_entities_extraction() {
618 let sg = sample_subgraph();
619 let entities = sg.entities();
620 assert_eq!(entities.len(), 4);
621 }
622
623 #[test]
624 fn test_predicates_extraction() {
625 let sg = sample_subgraph();
626 let preds = sg.predicates();
627 assert_eq!(preds.len(), 3);
628 }
629
630 #[test]
631 fn test_entity_degrees() {
632 let sg = sample_subgraph();
633 let degrees = sg.entity_degrees();
634 assert!(degrees["http://ex.org/Alice"] >= 3);
635 }
636
637 #[test]
638 fn test_connected_components() {
639 let mut summarizer = GraphSummarizer::with_defaults();
640 let sg = sample_subgraph();
641 let summary = summarizer.summarize(&sg);
642 let stats = summary.structural_stats.expect("should have stats");
643 assert_eq!(stats.connected_components, 1); }
645
646 #[test]
647 fn test_disconnected_components() {
648 let mut summarizer = GraphSummarizer::with_defaults();
649 let sg = Subgraph::new(vec![
650 Triple::new("A", "knows", "B"),
651 Triple::new("C", "knows", "D"), ]);
653 let summary = summarizer.summarize(&sg);
654 let stats = summary.structural_stats.expect("should have stats");
655 assert_eq!(stats.connected_components, 2);
656 }
657
658 #[test]
659 fn test_min_entity_degree_filter() {
660 let mut summarizer = GraphSummarizer::new(GraphSummaryConfig {
661 min_entity_degree: 3,
662 ..Default::default()
663 });
664 let summary = summarizer.summarize(&sample_subgraph());
665 for hub in &summary.hub_entities {
667 assert!(hub.degree >= 3);
668 }
669 }
670
671 #[test]
672 fn test_no_stats() {
673 let mut summarizer = GraphSummarizer::new(GraphSummaryConfig {
674 include_stats: false,
675 ..Default::default()
676 });
677 let summary = summarizer.summarize(&sample_subgraph());
678 assert!(summary.structural_stats.is_none());
679 }
680
681 #[test]
682 fn test_no_communities() {
683 let mut summarizer = GraphSummarizer::new(GraphSummaryConfig {
684 include_communities: false,
685 ..Default::default()
686 });
687 let summary = summarizer.summarize(&sample_subgraph());
688 assert!(summary.communities.is_empty());
689 }
690
691 #[test]
692 fn test_shorten_uri_slash() {
693 assert_eq!(shorten_uri("http://ex.org/Alice"), "Alice");
694 }
695
696 #[test]
697 fn test_shorten_uri_hash() {
698 assert_eq!(shorten_uri("http://ex.org#name"), "name");
699 }
700
701 #[test]
702 fn test_stats_tracking() {
703 let mut summarizer = GraphSummarizer::with_defaults();
704 summarizer.summarize(&sample_subgraph());
705 assert_eq!(summarizer.stats().graphs_summarized, 1);
706 assert_eq!(summarizer.stats().total_triples_processed, 5);
707 }
708
709 #[test]
710 fn test_config_serialization() {
711 let config = GraphSummaryConfig::default();
712 let json = serde_json::to_string(&config).expect("serialize failed");
713 assert!(json.contains("max_entities"));
714 }
715
716 #[test]
717 fn test_summary_serialization() {
718 let mut summarizer = GraphSummarizer::with_defaults();
719 let summary = summarizer.summarize(&sample_subgraph());
720 let json = serde_json::to_string(&summary).expect("serialize failed");
721 assert!(json.contains("text"));
722 }
723
724 #[test]
725 fn test_triple_equality() {
726 let t1 = Triple::new("A", "knows", "B");
727 let t2 = Triple::new("A", "knows", "B");
728 assert_eq!(t1, t2);
729 }
730
731 #[test]
732 fn test_entity_summary_outgoing() {
733 let mut summarizer = GraphSummarizer::with_defaults();
734 let summary = summarizer.summarize(&sample_subgraph());
735 let alice = summary
736 .hub_entities
737 .iter()
738 .find(|e| e.label == "Alice")
739 .expect("Alice should be a hub");
740 assert!(!alice.outgoing_types.is_empty());
741 }
742
743 #[test]
744 fn test_large_subgraph() {
745 let mut triples = Vec::new();
746 for i in 0..100 {
747 triples.push(Triple::new(
748 &format!("http://ex.org/e{i}"),
749 "http://ex.org/rel",
750 &format!("http://ex.org/e{}", (i + 1) % 100),
751 ));
752 }
753 let mut summarizer = GraphSummarizer::with_defaults();
754 let summary = summarizer.summarize(&Subgraph::new(triples));
755 assert_eq!(summary.triple_count, 100);
756 assert!(summary.hub_entities.len() <= 20);
757 }
758
759 #[test]
760 fn test_relationship_types_count() {
761 let mut summarizer = GraphSummarizer::with_defaults();
762 let summary = summarizer.summarize(&sample_subgraph());
763 assert_eq!(summary.relationship_types, 3);
764 }
765
766 #[test]
767 fn test_density_calculation() {
768 let mut summarizer = GraphSummarizer::with_defaults();
769 let sg = Subgraph::new(vec![Triple::new("A", "r", "B"), Triple::new("B", "r", "A")]);
770 let summary = summarizer.summarize(&sg);
771 let stats = summary.structural_stats.expect("should have stats");
772 assert!(stats.density > 0.0 && stats.density <= 1.0);
773 }
774
775 #[test]
776 fn test_community_hub() {
777 let mut summarizer = GraphSummarizer::with_defaults();
778 let summary = summarizer.summarize(&sample_subgraph());
779 for community in &summary.communities {
780 assert!(!community.hub.is_empty());
781 }
782 }
783}