1pub mod error;
7pub mod metadata;
8
9#[cfg(feature = "async")]
11pub mod registry;
12
13#[cfg(feature = "async")]
15pub mod traits;
16
17#[cfg(all(feature = "async", feature = "ollama"))]
19pub mod ollama_adapters;
20
21#[cfg(feature = "async")]
23pub mod entity_adapters;
24
25#[cfg(feature = "async")]
27pub mod retrieval_adapters;
28
29#[cfg(feature = "async")]
31pub mod test_utils;
32
33#[cfg(test)]
34pub mod test_traits;
35
36pub use error::{ErrorContext, ErrorSeverity, ErrorSuggestion, GraphRAGError, Result};
38pub use metadata::ChunkMetadata;
39
40#[cfg(feature = "async")]
41pub use registry::{RegistryBuilder, ServiceConfig, ServiceContext, ServiceRegistry};
42
43#[cfg(feature = "async")]
45pub use traits::*;
46
47pub trait ChunkingStrategy: Send + Sync {
68 fn chunk(&self, text: &str) -> Vec<TextChunk>;
76}
77
78use indexmap::IndexMap;
79use petgraph::{graph::NodeIndex, Graph};
80use std::collections::HashMap;
81
82#[cfg(feature = "pagerank")]
84use sprs::CsMat;
85
86#[cfg(feature = "pagerank")]
89type AdjacencyMatrixResult = (
90 CsMat<f64>,
91 HashMap<EntityId, usize>,
92 HashMap<usize, EntityId>,
93);
94
95#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
97pub struct DocumentId(pub String);
98
99impl DocumentId {
100 pub fn new(id: String) -> Self {
102 Self(id)
103 }
104}
105
106impl std::fmt::Display for DocumentId {
107 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108 write!(f, "{}", self.0)
109 }
110}
111
112impl From<String> for DocumentId {
113 fn from(s: String) -> Self {
114 Self(s)
115 }
116}
117
118impl From<DocumentId> for String {
119 fn from(id: DocumentId) -> Self {
120 id.0
121 }
122}
123
124#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
126pub struct EntityId(pub String);
127
128impl EntityId {
129 pub fn new(id: String) -> Self {
131 Self(id)
132 }
133}
134
135impl std::fmt::Display for EntityId {
136 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
137 write!(f, "{}", self.0)
138 }
139}
140
141impl From<String> for EntityId {
142 fn from(s: String) -> Self {
143 Self(s)
144 }
145}
146
147impl From<EntityId> for String {
148 fn from(id: EntityId) -> Self {
149 id.0
150 }
151}
152
153#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
155pub struct ChunkId(pub String);
156
157impl ChunkId {
158 pub fn new(id: String) -> Self {
160 Self(id)
161 }
162}
163
164impl std::fmt::Display for ChunkId {
165 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
166 write!(f, "{}", self.0)
167 }
168}
169
170impl From<String> for ChunkId {
171 fn from(s: String) -> Self {
172 Self(s)
173 }
174}
175
176impl From<ChunkId> for String {
177 fn from(id: ChunkId) -> Self {
178 id.0
179 }
180}
181
182#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
184pub struct Document {
185 pub id: DocumentId,
187 pub title: String,
189 pub content: String,
191 pub metadata: IndexMap<String, String>,
193 pub chunks: Vec<TextChunk>,
195}
196
197#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
199pub struct TextChunk {
200 pub id: ChunkId,
202 pub document_id: DocumentId,
204 pub content: String,
206 pub start_offset: usize,
208 pub end_offset: usize,
210 pub embedding: Option<Vec<f32>>,
212 pub entities: Vec<EntityId>,
214 pub metadata: ChunkMetadata,
216}
217
218#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
220pub struct Entity {
221 pub id: EntityId,
223 pub name: String,
225 pub entity_type: String,
227 pub confidence: f32,
229 pub mentions: Vec<EntityMention>,
231 pub embedding: Option<Vec<f32>>,
233
234 #[serde(skip_serializing_if = "Option::is_none", default)]
237 pub first_mentioned: Option<i64>,
238 #[serde(skip_serializing_if = "Option::is_none", default)]
240 pub last_mentioned: Option<i64>,
241 #[serde(skip_serializing_if = "Option::is_none", default)]
243 pub temporal_validity: Option<crate::graph::temporal::TemporalRange>,
244}
245
246#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
248pub struct EntityMention {
249 pub chunk_id: ChunkId,
251 pub start_offset: usize,
253 pub end_offset: usize,
255 pub confidence: f32,
257}
258
259#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
261pub struct Relationship {
262 pub source: EntityId,
264 pub target: EntityId,
266 pub relation_type: String,
268 pub confidence: f32,
270 pub context: Vec<ChunkId>,
272
273 #[serde(skip_serializing_if = "Option::is_none", default)]
275 pub embedding: Option<Vec<f32>>,
276
277 #[serde(skip_serializing_if = "Option::is_none", default)]
280 pub temporal_type: Option<crate::graph::temporal::TemporalRelationType>,
281 #[serde(skip_serializing_if = "Option::is_none", default)]
283 pub temporal_range: Option<crate::graph::temporal::TemporalRange>,
284 #[serde(skip_serializing_if = "Option::is_none", default)]
286 pub causal_strength: Option<f32>,
287}
288
289impl Relationship {
290 pub fn new(source: EntityId, target: EntityId, relation_type: String, confidence: f32) -> Self {
292 Self {
293 source,
294 target,
295 relation_type,
296 confidence,
297 context: Vec::new(),
298 embedding: None,
299 temporal_type: None,
301 temporal_range: None,
302 causal_strength: None,
303 }
304 }
305
306 pub fn with_context(mut self, context: Vec<ChunkId>) -> Self {
308 self.context = context;
309 self
310 }
311
312 pub fn with_temporal_type(
314 mut self,
315 temporal_type: crate::graph::temporal::TemporalRelationType,
316 ) -> Self {
317 self.temporal_type = Some(temporal_type);
318 if self.causal_strength.is_none() && temporal_type.is_causal() {
320 self.causal_strength = Some(temporal_type.default_strength());
321 }
322 self
323 }
324
325 pub fn with_temporal_range(mut self, start: i64, end: i64) -> Self {
327 self.temporal_range = Some(crate::graph::temporal::TemporalRange::new(start, end));
328 self
329 }
330
331 pub fn with_causal_strength(mut self, strength: f32) -> Self {
333 self.causal_strength = Some(strength.clamp(0.0, 1.0));
334 self
335 }
336
337 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
339 self.embedding = Some(embedding);
340 self
341 }
342}
343
344#[derive(Debug, Clone)]
346pub struct KnowledgeGraph {
347 graph: Graph<Entity, Relationship>,
348 entity_index: HashMap<EntityId, NodeIndex>,
349 documents: IndexMap<DocumentId, Document>,
350 chunks: IndexMap<ChunkId, TextChunk>,
351
352 #[cfg(feature = "async")]
354 pub relationship_hierarchy:
355 Option<crate::graph::hierarchical_relationships::RelationshipHierarchy>,
356}
357
358impl KnowledgeGraph {
359 pub fn new() -> Self {
361 Self {
362 graph: Graph::new(),
363 entity_index: HashMap::new(),
364 documents: IndexMap::new(),
365 chunks: IndexMap::new(),
366 #[cfg(feature = "async")]
367 relationship_hierarchy: None,
368 }
369 }
370
371 pub fn add_document(&mut self, document: Document) -> Result<()> {
373 let document_id = document.id.clone();
374
375 for chunk in &document.chunks {
377 self.chunks.insert(chunk.id.clone(), chunk.clone());
378 }
379
380 self.documents.insert(document_id, document);
382
383 Ok(())
384 }
385
386 pub fn add_entity(&mut self, entity: Entity) -> Result<NodeIndex> {
388 let entity_id = entity.id.clone();
389 let node_index = self.graph.add_node(entity);
390 self.entity_index.insert(entity_id, node_index);
391 Ok(node_index)
392 }
393
394 pub fn add_relationship(&mut self, relationship: Relationship) -> Result<()> {
396 let source_idx = self.entity_index.get(&relationship.source).ok_or_else(|| {
397 crate::GraphRAGError::GraphConstruction {
398 message: format!("Source entity {} not found", relationship.source),
399 }
400 })?;
401
402 let target_idx = self.entity_index.get(&relationship.target).ok_or_else(|| {
403 crate::GraphRAGError::GraphConstruction {
404 message: format!("Target entity {} not found", relationship.target),
405 }
406 })?;
407
408 self.graph.add_edge(*source_idx, *target_idx, relationship);
409 Ok(())
410 }
411
412 pub fn add_chunk(&mut self, chunk: TextChunk) -> Result<()> {
414 self.chunks.insert(chunk.id.clone(), chunk);
415 Ok(())
416 }
417
418 pub fn get_entity(&self, id: &EntityId) -> Option<&Entity> {
420 let node_idx = self.entity_index.get(id)?;
421 self.graph.node_weight(*node_idx)
422 }
423
424 pub fn get_document(&self, id: &DocumentId) -> Option<&Document> {
426 self.documents.get(id)
427 }
428
429 pub fn get_chunk(&self, id: &ChunkId) -> Option<&TextChunk> {
431 self.chunks.get(id)
432 }
433
434 pub fn get_entity_mut(&mut self, id: &EntityId) -> Option<&mut Entity> {
436 let node_idx = self.entity_index.get(id)?;
437 self.graph.node_weight_mut(*node_idx)
438 }
439
440 pub fn get_chunk_mut(&mut self, id: &ChunkId) -> Option<&mut TextChunk> {
442 self.chunks.get_mut(id)
443 }
444
445 pub fn entities(&self) -> impl Iterator<Item = &Entity> {
447 self.graph.node_weights()
448 }
449
450 pub fn entities_mut(&mut self) -> impl Iterator<Item = &mut Entity> {
452 self.graph.node_weights_mut()
453 }
454
455 pub fn documents(&self) -> impl Iterator<Item = &Document> {
457 self.documents.values()
458 }
459
460 pub fn documents_mut(&mut self) -> impl Iterator<Item = &mut Document> {
462 self.documents.values_mut()
463 }
464
465 pub fn chunks(&self) -> impl Iterator<Item = &TextChunk> {
467 self.chunks.values()
468 }
469
470 pub fn chunks_mut(&mut self) -> impl Iterator<Item = &mut TextChunk> {
472 self.chunks.values_mut()
473 }
474
475 pub fn get_neighbors(&self, entity_id: &EntityId) -> Vec<(&Entity, &Relationship)> {
477 use petgraph::visit::EdgeRef;
478
479 if let Some(&node_idx) = self.entity_index.get(entity_id) {
480 self.graph
481 .edges(node_idx)
482 .filter_map(|edge| {
483 let target_entity = self.graph.node_weight(edge.target())?;
484 Some((target_entity, edge.weight()))
485 })
486 .collect()
487 } else {
488 Vec::new()
489 }
490 }
491
492 pub fn get_all_relationships(&self) -> Vec<&Relationship> {
494 self.graph.edge_weights().collect()
495 }
496
497 pub fn load_from_json(file_path: &str) -> Result<Self> {
499 use std::fs;
500
501 let json_str = fs::read_to_string(file_path)?;
503 let json_data = json::parse(&json_str).map_err(|e| GraphRAGError::Config {
504 message: format!("Failed to parse JSON: {}", e),
505 })?;
506
507 let mut kg = KnowledgeGraph::new();
508
509 if json_data["entities"].is_array() {
511 for entity_obj in json_data["entities"].members() {
512 let id = EntityId::new(entity_obj["id"].as_str().unwrap_or("").to_string());
513 let name = entity_obj["name"].as_str().unwrap_or("").to_string();
514 let entity_type = entity_obj["type"].as_str().unwrap_or("").to_string();
515 let confidence = entity_obj["confidence"].as_f32().unwrap_or(0.0);
516
517 let mut mentions = Vec::new();
519 if entity_obj["mentions"].is_array() {
520 for mention_obj in entity_obj["mentions"].members() {
521 let mention = EntityMention {
522 chunk_id: ChunkId::new(
523 mention_obj["chunk_id"].as_str().unwrap_or("").to_string(),
524 ),
525 start_offset: mention_obj["start_offset"].as_usize().unwrap_or(0),
526 end_offset: mention_obj["end_offset"].as_usize().unwrap_or(0),
527 confidence: mention_obj["confidence"].as_f32().unwrap_or(0.0),
528 };
529 mentions.push(mention);
530 }
531 }
532
533 let entity = Entity {
534 id,
535 name,
536 entity_type,
537 confidence,
538 mentions,
539 embedding: None, first_mentioned: None,
541 last_mentioned: None,
542 temporal_validity: None,
543 };
544
545 kg.add_entity(entity)?;
546 }
547 }
548
549 if json_data["relationships"].is_array() {
551 for rel_obj in json_data["relationships"].members() {
552 let source = EntityId::new(rel_obj["source_id"].as_str().unwrap_or("").to_string());
553 let target = EntityId::new(rel_obj["target_id"].as_str().unwrap_or("").to_string());
554 let relation_type = rel_obj["relation_type"].as_str().unwrap_or("").to_string();
555 let confidence = rel_obj["confidence"].as_f32().unwrap_or(0.0);
556
557 let mut context = Vec::new();
558 if rel_obj["context_chunks"].is_array() {
559 for chunk_id in rel_obj["context_chunks"].members() {
560 if let Some(chunk_id_str) = chunk_id.as_str() {
561 context.push(ChunkId::new(chunk_id_str.to_string()));
562 }
563 }
564 }
565
566 let relationship = Relationship {
567 source,
568 target,
569 relation_type,
570 confidence,
571 context,
572 embedding: None,
573 temporal_type: None,
574 temporal_range: None,
575 causal_strength: None,
576 };
577
578 let _ = kg.add_relationship(relationship);
580 }
581 }
582
583 if json_data["chunks"].is_array() {
585 for chunk_obj in json_data["chunks"].members() {
586 let id = ChunkId::new(chunk_obj["id"].as_str().unwrap_or("").to_string());
587 let document_id =
588 DocumentId::new(chunk_obj["document_id"].as_str().unwrap_or("").to_string());
589 let start_offset = chunk_obj["start_offset"].as_usize().unwrap_or(0);
590 let end_offset = chunk_obj["end_offset"].as_usize().unwrap_or(0);
591
592 let content = chunk_obj["content"].as_str().unwrap_or("").to_string();
594
595 let mut entities = Vec::new();
597 if chunk_obj["entities"].is_array() {
598 for entity_id in chunk_obj["entities"].members() {
599 if let Some(entity_id_str) = entity_id.as_str() {
600 entities.push(EntityId::new(entity_id_str.to_string()));
601 }
602 }
603 }
604
605 let chunk = TextChunk {
606 id,
607 document_id,
608 content,
609 start_offset,
610 end_offset,
611 embedding: None, entities,
613 metadata: ChunkMetadata::default(),
614 };
615 kg.add_chunk(chunk)?;
616 }
617 }
618
619 if json_data["documents"].is_array() {
621 for doc_obj in json_data["documents"].members() {
622 let id = DocumentId::new(doc_obj["id"].as_str().unwrap_or("").to_string());
623 let title = doc_obj["title"].as_str().unwrap_or("").to_string();
624 let content = doc_obj["content"].as_str().unwrap_or("").to_string();
625
626 let mut metadata = IndexMap::new();
628 if doc_obj["metadata"].is_object() {
629 for (key, value) in doc_obj["metadata"].entries() {
630 metadata.insert(key.to_string(), value.as_str().unwrap_or("").to_string());
631 }
632 }
633
634 let document = Document {
635 id,
636 title,
637 content,
638 metadata,
639 chunks: vec![], };
641 kg.add_document(document)?;
642 }
643 }
644
645 Ok(kg)
646 }
647
648 pub fn save_to_json(&self, file_path: &str) -> Result<()> {
650 use std::fs;
651
652 let mut json_data = json::JsonValue::new_object();
654
655 json_data["metadata"] = json::object! {
657 "format_version" => "2.0",
658 "created_at" => chrono::Utc::now().to_rfc3339(),
659 "total_entities" => self.entities().count(),
660 "total_relationships" => self.get_all_relationships().len(),
661 "total_chunks" => self.chunks().count(),
662 "total_documents" => self.documents().count()
663 };
664
665 let mut entities_array = json::JsonValue::new_array();
667 for entity in self.entities() {
668 let mut entity_obj = json::object! {
669 "id" => entity.id.to_string(),
670 "name" => entity.name.clone(),
671 "type" => entity.entity_type.clone(),
672 "confidence" => entity.confidence,
673 "mentions_count" => entity.mentions.len()
674 };
675
676 let mut mentions_array = json::JsonValue::new_array();
678 for mention in &entity.mentions {
679 mentions_array
680 .push(json::object! {
681 "chunk_id" => mention.chunk_id.to_string(),
682 "start_offset" => mention.start_offset,
683 "end_offset" => mention.end_offset,
684 "confidence" => mention.confidence
685 })
686 .unwrap();
687 }
688 entity_obj["mentions"] = mentions_array;
689
690 if let Some(embedding) = &entity.embedding {
692 entity_obj["has_embedding"] = true.into();
693 entity_obj["embedding_dimension"] = embedding.len().into();
694 let sample_embedding: Vec<f32> = embedding.iter().take(5).cloned().collect();
696 entity_obj["embedding_sample"] = sample_embedding.into();
697 } else {
698 entity_obj["has_embedding"] = false.into();
699 }
700
701 entities_array.push(entity_obj).unwrap();
702 }
703 json_data["entities"] = entities_array;
704
705 let mut relationships_array = json::JsonValue::new_array();
707 for relationship in self.get_all_relationships() {
708 let rel_obj = json::object! {
709 "source_id" => relationship.source.to_string(),
710 "target_id" => relationship.target.to_string(),
711 "relation_type" => relationship.relation_type.clone(),
712 "confidence" => relationship.confidence,
713 "context_chunks" => relationship.context.iter()
714 .map(|c| c.to_string())
715 .collect::<Vec<String>>()
716 };
717 relationships_array.push(rel_obj).unwrap();
718 }
719 json_data["relationships"] = relationships_array;
720
721 let mut chunks_array = json::JsonValue::new_array();
723 for chunk in self.chunks() {
724 let mut chunk_obj = json::object! {
725 "id" => chunk.id.to_string(),
726 "document_id" => chunk.document_id.to_string(),
727 "content" => chunk.content.clone(), "start_offset" => chunk.start_offset,
729 "end_offset" => chunk.end_offset
730 };
731
732 let entities_list: Vec<String> = chunk.entities.iter().map(|e| e.to_string()).collect();
734 chunk_obj["entities"] = entities_list.into();
735
736 chunk_obj["has_embedding"] = chunk.embedding.is_some().into();
738 if let Some(embedding) = &chunk.embedding {
739 chunk_obj["embedding_dimension"] = embedding.len().into();
740 }
741
742 chunks_array.push(chunk_obj).unwrap();
743 }
744 json_data["chunks"] = chunks_array;
745
746 let mut documents_array = json::JsonValue::new_array();
748 for document in self.documents() {
749 let mut meta_obj = json::JsonValue::new_object();
750 for (key, value) in &document.metadata {
751 meta_obj[key] = value.clone().into();
752 }
753
754 let doc_obj = json::object! {
755 "id" => document.id.to_string(),
756 "title" => document.title.clone(),
757 "content" => document.content.clone(), "metadata" => meta_obj
759 };
760 documents_array.push(doc_obj).unwrap();
761 }
762 json_data["documents"] = documents_array;
763
764 fs::write(file_path, json_data.dump())?;
766 #[cfg(feature = "tracing")]
767 tracing::info!("Knowledge graph saved to {file_path}");
768
769 Ok(())
770 }
771
772 pub fn find_entities_by_name(&self, name: &str) -> impl Iterator<Item = &Entity> {
774 let name_lower = name.to_lowercase();
775 self.entities()
776 .filter(move |entity| entity.name.to_lowercase().contains(&name_lower))
777 }
778
779 pub fn get_entity_by_id(&self, id: &str) -> Option<&Entity> {
781 let entity_id = EntityId::new(id.to_string());
782 self.get_entity(&entity_id)
783 }
784
785 pub fn get_entity_relationships(&self, entity_id: &str) -> impl Iterator<Item = &Relationship> {
787 let entity_id = EntityId::new(entity_id.to_string());
788 if let Some(&node_idx) = self.entity_index.get(&entity_id) {
789 self.graph
790 .edges(node_idx)
791 .map(|edge| edge.weight())
792 .collect::<Vec<_>>()
793 .into_iter()
794 } else {
795 Vec::new().into_iter()
796 }
797 }
798
799 pub fn find_relationship_path(
801 &self,
802 entity1: &str,
803 entity2: &str,
804 _max_depth: usize,
805 ) -> Vec<String> {
806 let entity1_id = EntityId::new(entity1.to_string());
807 let entity2_id = EntityId::new(entity2.to_string());
808
809 let node1 = self.entity_index.get(&entity1_id);
810 let node2 = self.entity_index.get(&entity2_id);
811
812 if let (Some(&start), Some(&end)) = (node1, node2) {
813 use petgraph::visit::EdgeRef;
815 for edge in self.graph.edges(start) {
816 if edge.target() == end {
817 return vec![edge.weight().relation_type.clone()];
818 }
819 }
820 }
821
822 Vec::new() }
824
825 #[cfg(feature = "pagerank")]
828 pub fn build_pagerank_calculator(
829 &self,
830 ) -> Result<crate::graph::pagerank::PersonalizedPageRank> {
831 let config = crate::graph::pagerank::PageRankConfig::default();
832 let (adjacency_matrix, node_mapping, reverse_mapping) = self.build_adjacency_matrix()?;
833
834 Ok(crate::graph::pagerank::PersonalizedPageRank::new(
835 config,
836 adjacency_matrix,
837 node_mapping,
838 reverse_mapping,
839 ))
840 }
841
842 #[cfg(feature = "pagerank")]
845 fn build_adjacency_matrix(&self) -> Result<AdjacencyMatrixResult> {
846 let nodes: Vec<EntityId> = self.entities().map(|e| e.id.clone()).collect();
847 let node_mapping: HashMap<EntityId, usize> = nodes
848 .iter()
849 .enumerate()
850 .map(|(i, id)| (id.clone(), i))
851 .collect();
852 let reverse_mapping: HashMap<usize, EntityId> = nodes
853 .iter()
854 .enumerate()
855 .map(|(i, id)| (i, id.clone()))
856 .collect();
857
858 let mut row_indices = Vec::new();
860 let mut col_indices = Vec::new();
861 let mut values = Vec::new();
862
863 for relationship in self.get_all_relationships() {
864 if let (Some(&from_idx), Some(&to_idx)) = (
865 node_mapping.get(&relationship.source),
866 node_mapping.get(&relationship.target),
867 ) {
868 row_indices.push(from_idx);
869 col_indices.push(to_idx);
870 values.push(relationship.confidence as f64);
871 }
872 }
873
874 let matrix = if row_indices.is_empty() {
875 sprs::CsMat::zero((nodes.len(), nodes.len()))
877 } else {
878 let mut triplet_mat = sprs::TriMat::new((nodes.len(), nodes.len()));
880 for ((row, col), val) in row_indices.into_iter().zip(col_indices).zip(values) {
881 triplet_mat.add_triplet(row, col, val);
882 }
883 triplet_mat.to_csr()
884 };
885
886 Ok((matrix, node_mapping, reverse_mapping))
887 }
888
889 pub fn entity_count(&self) -> usize {
891 self.entities().count()
892 }
893
894 pub fn relationship_count(&self) -> usize {
896 self.get_all_relationships().len()
897 }
898
899 pub fn document_count(&self) -> usize {
901 self.documents().count()
902 }
903
904 pub fn relationships(&self) -> impl Iterator<Item = &Relationship> {
906 self.graph.edge_weights()
907 }
908
909 pub fn clear_entities_and_relationships(&mut self) {
913 self.graph.clear();
914 self.entity_index.clear();
915 }
917
918 fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
929 if a.len() != b.len() || a.is_empty() {
930 return 0.0;
931 }
932
933 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
934 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
935 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
936
937 if norm_a == 0.0 || norm_b == 0.0 {
938 return 0.0;
939 }
940
941 dot_product / (norm_a * norm_b)
942 }
943
944 fn calculate_temporal_relevance(range: &crate::graph::temporal::TemporalRange) -> f32 {
958 use std::time::{SystemTime, UNIX_EPOCH};
959
960 let now = SystemTime::now()
962 .duration_since(UNIX_EPOCH)
963 .unwrap_or_default()
964 .as_secs() as i64;
965
966 let mid_point = (range.start + range.end) / 2;
968 let years_ago = ((now - mid_point) / (365 * 24 * 3600)).abs();
969
970 let recency_boost = if years_ago <= 10 {
974 0.3
975 } else if years_ago <= 50 {
976 0.3 * (1.0 - (years_ago - 10) as f32 / 40.0)
977 } else if years_ago <= 200 {
978 0.1 * (1.0 - (years_ago - 50) as f32 / 150.0)
979 } else {
980 0.05 };
982
983 recency_boost.max(0.0)
984 }
985
986 pub fn dynamic_weight(
1004 &self,
1005 relationship: &Relationship,
1006 query_embedding: Option<&[f32]>,
1007 query_concepts: &[String],
1008 ) -> f32 {
1009 let base_weight = relationship.confidence;
1010
1011 let semantic_boost = if let (Some(rel_emb), Some(query_emb)) =
1013 (relationship.embedding.as_deref(), query_embedding)
1014 {
1015 Self::cosine_similarity(rel_emb, query_emb).max(0.0)
1016 } else {
1017 0.0
1018 };
1019
1020 let temporal_boost = if let Some(tr) = &relationship.temporal_range {
1022 Self::calculate_temporal_relevance(tr)
1023 } else {
1024 0.0
1025 };
1026
1027 let concept_boost = query_concepts
1029 .iter()
1030 .filter(|c| {
1031 relationship
1032 .relation_type
1033 .to_lowercase()
1034 .contains(&c.to_lowercase())
1035 })
1036 .count() as f32
1037 * 0.15; let causal_boost = if let Some(strength) = relationship.causal_strength {
1041 strength * 0.2 } else {
1043 0.0
1044 };
1045
1046 base_weight * (1.0 + semantic_boost + temporal_boost + concept_boost + causal_boost)
1048 }
1049
1050 #[cfg(feature = "leiden")]
1054 pub fn to_leiden_graph(&self) -> petgraph::Graph<String, f32, petgraph::Undirected> {
1055 let mut graph = Graph::new_undirected();
1056 let mut node_map = HashMap::new();
1057
1058 for entity in self.entities() {
1060 let idx = graph.add_node(entity.name.clone());
1061 node_map.insert(entity.id.clone(), idx);
1062 }
1063
1064 for rel in self.get_all_relationships() {
1066 if let (Some(&src), Some(&tgt)) = (node_map.get(&rel.source), node_map.get(&rel.target))
1067 {
1068 graph.add_edge(src, tgt, rel.confidence);
1069 }
1070 }
1071
1072 graph
1073 }
1074
1075 #[cfg(feature = "leiden")]
1100 pub fn detect_hierarchical_communities(
1101 &self,
1102 config: crate::graph::leiden::LeidenConfig,
1103 ) -> Result<crate::graph::leiden::HierarchicalCommunities> {
1104 use crate::graph::leiden::LeidenCommunityDetector;
1105
1106 let leiden_graph = self.to_leiden_graph();
1108
1109 let detector = LeidenCommunityDetector::new(config);
1111 let mut communities = detector.detect_communities(&leiden_graph)?;
1112
1113 communities.entity_mapping = Some(self.build_entity_mapping());
1115
1116 Ok(communities)
1117 }
1118
1119 #[cfg(feature = "leiden")]
1122 fn build_entity_mapping(&self) -> HashMap<String, crate::graph::leiden::EntityMetadata> {
1123 use crate::graph::leiden::EntityMetadata;
1124
1125 self.entities()
1126 .map(|entity| {
1127 let metadata = EntityMetadata {
1128 id: entity.id.to_string(),
1129 name: entity.name.clone(),
1130 entity_type: entity.entity_type.clone(),
1131 confidence: entity.confidence,
1132 mention_count: entity.mentions.len(),
1133 };
1134 (entity.name.clone(), metadata)
1135 })
1136 .collect()
1137 }
1138
1139 #[cfg(feature = "async")]
1162 pub async fn build_relationship_hierarchy(
1163 &mut self,
1164 num_levels: usize,
1165 ollama_client: Option<crate::ollama::OllamaClient>,
1166 ) -> Result<()> {
1167 use crate::graph::hierarchical_relationships::HierarchyBuilder;
1168
1169 let builder = HierarchyBuilder::from_graph(self).with_num_levels(num_levels);
1170
1171 let builder = if let Some(client) = ollama_client {
1172 builder.with_ollama_client(client)
1173 } else {
1174 builder
1175 };
1176
1177 let hierarchy = builder.build().await?;
1178 self.relationship_hierarchy = Some(hierarchy);
1179
1180 Ok(())
1181 }
1182}
1183
1184impl Default for KnowledgeGraph {
1185 fn default() -> Self {
1186 Self::new()
1187 }
1188}
1189
1190impl Document {
1191 pub fn new(id: DocumentId, title: String, content: String) -> Self {
1193 Self {
1194 id,
1195 title,
1196 content,
1197 metadata: IndexMap::new(),
1198 chunks: Vec::new(),
1199 }
1200 }
1201
1202 pub fn with_metadata(mut self, key: String, value: String) -> Self {
1204 self.metadata.insert(key, value);
1205 self
1206 }
1207
1208 pub fn with_chunks(mut self, chunks: Vec<TextChunk>) -> Self {
1210 self.chunks = chunks;
1211 self
1212 }
1213}
1214
1215impl TextChunk {
1216 pub fn new(
1218 id: ChunkId,
1219 document_id: DocumentId,
1220 content: String,
1221 start_offset: usize,
1222 end_offset: usize,
1223 ) -> Self {
1224 Self {
1225 id,
1226 document_id,
1227 content,
1228 start_offset,
1229 end_offset,
1230 embedding: None,
1231 entities: Vec::new(),
1232 metadata: ChunkMetadata::default(),
1233 }
1234 }
1235
1236 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
1238 self.embedding = Some(embedding);
1239 self
1240 }
1241
1242 pub fn with_entities(mut self, entities: Vec<EntityId>) -> Self {
1244 self.entities = entities;
1245 self
1246 }
1247
1248 pub fn with_metadata(mut self, metadata: ChunkMetadata) -> Self {
1250 self.metadata = metadata;
1251 self
1252 }
1253}
1254
1255impl Entity {
1256 pub fn new(id: EntityId, name: String, entity_type: String, confidence: f32) -> Self {
1258 Self {
1259 id,
1260 name,
1261 entity_type,
1262 confidence,
1263 mentions: Vec::new(),
1264 embedding: None,
1265 first_mentioned: None,
1267 last_mentioned: None,
1268 temporal_validity: None,
1269 }
1270 }
1271
1272 pub fn with_mentions(mut self, mentions: Vec<EntityMention>) -> Self {
1274 self.mentions = mentions;
1275 self
1276 }
1277
1278 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
1280 self.embedding = Some(embedding);
1281 self
1282 }
1283
1284 pub fn with_temporal_validity(mut self, start: i64, end: i64) -> Self {
1286 self.temporal_validity = Some(crate::graph::temporal::TemporalRange::new(start, end));
1287 self
1288 }
1289
1290 pub fn with_mention_times(mut self, first: i64, last: i64) -> Self {
1292 self.first_mentioned = Some(first);
1293 self.last_mentioned = Some(last);
1294 self
1295 }
1296}
1297
1298#[cfg(test)]
1299mod temporal_tests {
1300 use super::*;
1301
1302 #[test]
1303 fn test_entity_with_temporal_fields() {
1304 let entity = Entity::new(
1305 EntityId::new("socrates".to_string()),
1306 "Socrates".to_string(),
1307 "PERSON".to_string(),
1308 0.9,
1309 )
1310 .with_temporal_validity(-470 * 365 * 24 * 3600, -399 * 365 * 24 * 3600) .with_mention_times(1000, 2000);
1312
1313 assert_eq!(entity.name, "Socrates");
1314 assert!(entity.temporal_validity.is_some());
1315 assert!(entity.first_mentioned.is_some());
1316 assert!(entity.last_mentioned.is_some());
1317
1318 let validity = entity.temporal_validity.unwrap();
1319 assert_eq!(validity.start, -470 * 365 * 24 * 3600);
1320 assert_eq!(validity.end, -399 * 365 * 24 * 3600);
1321 }
1322
1323 #[test]
1324 fn test_entity_temporal_serialization() {
1325 let entity = Entity::new(
1326 EntityId::new("test".to_string()),
1327 "Test Entity".to_string(),
1328 "TEST".to_string(),
1329 0.8,
1330 )
1331 .with_temporal_validity(100, 200);
1332
1333 let json = serde_json::to_string(&entity).unwrap();
1334 let deserialized: Entity = serde_json::from_str(&json).unwrap();
1335
1336 assert_eq!(deserialized.name, "Test Entity");
1337 assert!(deserialized.temporal_validity.is_some());
1338
1339 let validity = deserialized.temporal_validity.unwrap();
1340 assert_eq!(validity.start, 100);
1341 assert_eq!(validity.end, 200);
1342 }
1343
1344 #[test]
1345 fn test_relationship_with_temporal_type() {
1346 let rel = Relationship::new(
1347 EntityId::new("socrates".to_string()),
1348 EntityId::new("plato".to_string()),
1349 "TAUGHT".to_string(),
1350 0.9,
1351 )
1352 .with_temporal_type(crate::graph::temporal::TemporalRelationType::Caused)
1353 .with_temporal_range(100, 200);
1354
1355 assert!(rel.temporal_type.is_some());
1356 assert!(rel.temporal_range.is_some());
1357 assert!(rel.causal_strength.is_some());
1358
1359 let temporal_type = rel.temporal_type.unwrap();
1360 assert_eq!(
1361 temporal_type,
1362 crate::graph::temporal::TemporalRelationType::Caused
1363 );
1364
1365 let strength = rel.causal_strength.unwrap();
1367 assert_eq!(strength, 0.9); }
1369
1370 #[test]
1371 fn test_relationship_with_causal_strength() {
1372 let rel = Relationship::new(
1373 EntityId::new("a".to_string()),
1374 EntityId::new("b".to_string()),
1375 "INFLUENCED".to_string(),
1376 0.8,
1377 )
1378 .with_temporal_type(crate::graph::temporal::TemporalRelationType::Enabled)
1379 .with_causal_strength(0.75);
1380
1381 assert!(rel.causal_strength.is_some());
1382 assert_eq!(rel.causal_strength.unwrap(), 0.75);
1383 }
1384
1385 #[test]
1386 fn test_relationship_temporal_serialization() {
1387 let rel = Relationship::new(
1388 EntityId::new("source".to_string()),
1389 EntityId::new("target".to_string()),
1390 "CAUSED".to_string(),
1391 0.9,
1392 )
1393 .with_temporal_type(crate::graph::temporal::TemporalRelationType::Caused)
1394 .with_temporal_range(100, 200)
1395 .with_causal_strength(0.95);
1396
1397 let json = serde_json::to_string(&rel).unwrap();
1398 let deserialized: Relationship = serde_json::from_str(&json).unwrap();
1399
1400 assert_eq!(deserialized.relation_type, "CAUSED");
1401 assert!(deserialized.temporal_type.is_some());
1402 assert!(deserialized.temporal_range.is_some());
1403 assert!(deserialized.causal_strength.is_some());
1404
1405 let temporal_type = deserialized.temporal_type.unwrap();
1406 assert_eq!(
1407 temporal_type,
1408 crate::graph::temporal::TemporalRelationType::Caused
1409 );
1410
1411 let range = deserialized.temporal_range.unwrap();
1412 assert_eq!(range.start, 100);
1413 assert_eq!(range.end, 200);
1414
1415 assert_eq!(deserialized.causal_strength.unwrap(), 0.95);
1416 }
1417
1418 #[test]
1419 fn test_entity_backward_compatibility() {
1420 let entity = Entity::new(
1422 EntityId::new("test".to_string()),
1423 "Test".to_string(),
1424 "TEST".to_string(),
1425 0.9,
1426 );
1427
1428 assert!(entity.first_mentioned.is_none());
1429 assert!(entity.last_mentioned.is_none());
1430 assert!(entity.temporal_validity.is_none());
1431
1432 let json = serde_json::to_string(&entity).unwrap();
1434 assert!(!json.contains("first_mentioned"));
1435 assert!(!json.contains("last_mentioned"));
1436 assert!(!json.contains("temporal_validity"));
1437 }
1438
1439 #[test]
1440 fn test_relationship_backward_compatibility() {
1441 let rel = Relationship::new(
1443 EntityId::new("a".to_string()),
1444 EntityId::new("b".to_string()),
1445 "RELATED_TO".to_string(),
1446 0.8,
1447 );
1448
1449 assert!(rel.temporal_type.is_none());
1450 assert!(rel.temporal_range.is_none());
1451 assert!(rel.causal_strength.is_none());
1452
1453 let json = serde_json::to_string(&rel).unwrap();
1455 assert!(!json.contains("temporal_type"));
1456 assert!(!json.contains("temporal_range"));
1457 assert!(!json.contains("causal_strength"));
1458 }
1459}