1pub mod error;
7pub mod metadata;
8
9#[cfg(feature = "async")]
11pub mod registry;
12
13#[cfg(feature = "async")]
15pub mod traits;
16
17#[cfg(all(feature = "async", feature = "ollama"))]
19pub mod ollama_adapters;
20
21#[cfg(feature = "async")]
23pub mod entity_adapters;
24
25#[cfg(feature = "async")]
27pub mod retrieval_adapters;
28
29#[cfg(feature = "async")]
31pub mod test_utils;
32
33#[cfg(test)]
34pub mod test_traits;
35
36pub use error::{ErrorContext, ErrorSeverity, ErrorSuggestion, GraphRAGError, Result};
38pub use metadata::ChunkMetadata;
39
40#[cfg(feature = "async")]
41pub use registry::{RegistryBuilder, ServiceConfig, ServiceContext, ServiceRegistry};
42
43#[cfg(feature = "async")]
45pub use traits::*;
46
47pub trait ChunkingStrategy: Send + Sync {
68 fn chunk(&self, text: &str) -> Vec<TextChunk>;
76}
77
78use indexmap::IndexMap;
79use petgraph::{graph::NodeIndex, Graph};
80use std::collections::HashMap;
81
82#[cfg(feature = "pagerank")]
84use sprs::CsMat;
85
86#[cfg(feature = "pagerank")]
89type AdjacencyMatrixResult = (
90 CsMat<f64>,
91 HashMap<EntityId, usize>,
92 HashMap<usize, EntityId>,
93);
94
95#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
97pub struct DocumentId(pub String);
98
99impl DocumentId {
100 pub fn new(id: String) -> Self {
102 Self(id)
103 }
104}
105
106impl std::fmt::Display for DocumentId {
107 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108 write!(f, "{}", self.0)
109 }
110}
111
112impl From<String> for DocumentId {
113 fn from(s: String) -> Self {
114 Self(s)
115 }
116}
117
118impl From<DocumentId> for String {
119 fn from(id: DocumentId) -> Self {
120 id.0
121 }
122}
123
124#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
126pub struct EntityId(pub String);
127
128impl EntityId {
129 pub fn new(id: String) -> Self {
131 Self(id)
132 }
133}
134
135impl std::fmt::Display for EntityId {
136 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
137 write!(f, "{}", self.0)
138 }
139}
140
141impl From<String> for EntityId {
142 fn from(s: String) -> Self {
143 Self(s)
144 }
145}
146
147impl From<EntityId> for String {
148 fn from(id: EntityId) -> Self {
149 id.0
150 }
151}
152
153#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
155pub struct ChunkId(pub String);
156
157impl ChunkId {
158 pub fn new(id: String) -> Self {
160 Self(id)
161 }
162}
163
164impl std::fmt::Display for ChunkId {
165 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
166 write!(f, "{}", self.0)
167 }
168}
169
170impl From<String> for ChunkId {
171 fn from(s: String) -> Self {
172 Self(s)
173 }
174}
175
176impl From<ChunkId> for String {
177 fn from(id: ChunkId) -> Self {
178 id.0
179 }
180}
181
182#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
184pub struct Document {
185 pub id: DocumentId,
187 pub title: String,
189 pub content: String,
191 pub metadata: IndexMap<String, String>,
193 pub chunks: Vec<TextChunk>,
195}
196
197#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
199pub struct TextChunk {
200 pub id: ChunkId,
202 pub document_id: DocumentId,
204 pub content: String,
206 pub start_offset: usize,
208 pub end_offset: usize,
210 pub embedding: Option<Vec<f32>>,
212 pub entities: Vec<EntityId>,
214 pub metadata: ChunkMetadata,
216}
217
218#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
220pub struct Entity {
221 pub id: EntityId,
223 pub name: String,
225 pub entity_type: String,
227 pub confidence: f32,
229 pub mentions: Vec<EntityMention>,
231 pub embedding: Option<Vec<f32>>,
233
234 #[serde(skip_serializing_if = "Option::is_none", default)]
237 pub first_mentioned: Option<i64>,
238 #[serde(skip_serializing_if = "Option::is_none", default)]
240 pub last_mentioned: Option<i64>,
241 #[serde(skip_serializing_if = "Option::is_none", default)]
243 pub temporal_validity: Option<crate::graph::temporal::TemporalRange>,
244}
245
246#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
248pub struct EntityMention {
249 pub chunk_id: ChunkId,
251 pub start_offset: usize,
253 pub end_offset: usize,
255 pub confidence: f32,
257}
258
259#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
261pub struct Relationship {
262 pub source: EntityId,
264 pub target: EntityId,
266 pub relation_type: String,
268 pub confidence: f32,
270 pub context: Vec<ChunkId>,
272
273 #[serde(skip_serializing_if = "Option::is_none", default)]
275 pub embedding: Option<Vec<f32>>,
276
277 #[serde(skip_serializing_if = "Option::is_none", default)]
280 pub temporal_type: Option<crate::graph::temporal::TemporalRelationType>,
281 #[serde(skip_serializing_if = "Option::is_none", default)]
283 pub temporal_range: Option<crate::graph::temporal::TemporalRange>,
284 #[serde(skip_serializing_if = "Option::is_none", default)]
286 pub causal_strength: Option<f32>,
287}
288
289impl Relationship {
290 pub fn new(source: EntityId, target: EntityId, relation_type: String, confidence: f32) -> Self {
292 Self {
293 source,
294 target,
295 relation_type,
296 confidence,
297 context: Vec::new(),
298 embedding: None,
299 temporal_type: None,
301 temporal_range: None,
302 causal_strength: None,
303 }
304 }
305
306 pub fn with_context(mut self, context: Vec<ChunkId>) -> Self {
308 self.context = context;
309 self
310 }
311
312 pub fn with_temporal_type(
314 mut self,
315 temporal_type: crate::graph::temporal::TemporalRelationType,
316 ) -> Self {
317 self.temporal_type = Some(temporal_type);
318 if self.causal_strength.is_none() && temporal_type.is_causal() {
320 self.causal_strength = Some(temporal_type.default_strength());
321 }
322 self
323 }
324
325 pub fn with_temporal_range(mut self, start: i64, end: i64) -> Self {
327 self.temporal_range = Some(crate::graph::temporal::TemporalRange::new(start, end));
328 self
329 }
330
331 pub fn with_causal_strength(mut self, strength: f32) -> Self {
333 self.causal_strength = Some(strength.clamp(0.0, 1.0));
334 self
335 }
336
337 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
339 self.embedding = Some(embedding);
340 self
341 }
342}
343
344#[derive(Debug, Clone)]
346pub struct KnowledgeGraph {
347 graph: Graph<Entity, Relationship>,
348 entity_index: HashMap<EntityId, NodeIndex>,
349 documents: IndexMap<DocumentId, Document>,
350 chunks: IndexMap<ChunkId, TextChunk>,
351
352 #[cfg(feature = "async")]
354 pub relationship_hierarchy:
355 Option<crate::graph::hierarchical_relationships::RelationshipHierarchy>,
356}
357
358impl KnowledgeGraph {
359 pub fn new() -> Self {
361 Self {
362 graph: Graph::new(),
363 entity_index: HashMap::new(),
364 documents: IndexMap::new(),
365 chunks: IndexMap::new(),
366 #[cfg(feature = "async")]
367 relationship_hierarchy: None,
368 }
369 }
370
371 pub fn add_document(&mut self, document: Document) -> Result<()> {
373 let document_id = document.id.clone();
374
375 for chunk in &document.chunks {
377 self.chunks.insert(chunk.id.clone(), chunk.clone());
378 }
379
380 self.documents.insert(document_id, document);
382
383 Ok(())
384 }
385
386 pub fn add_entity(&mut self, entity: Entity) -> Result<NodeIndex> {
388 let entity_id = entity.id.clone();
389 let node_index = self.graph.add_node(entity);
390 self.entity_index.insert(entity_id, node_index);
391 Ok(node_index)
392 }
393
394 pub fn add_relationship(&mut self, relationship: Relationship) -> Result<()> {
396 let source_idx = self.entity_index.get(&relationship.source).ok_or_else(|| {
397 crate::GraphRAGError::GraphConstruction {
398 message: format!("Source entity {} not found", relationship.source),
399 }
400 })?;
401
402 let target_idx = self.entity_index.get(&relationship.target).ok_or_else(|| {
403 crate::GraphRAGError::GraphConstruction {
404 message: format!("Target entity {} not found", relationship.target),
405 }
406 })?;
407
408 self.graph.add_edge(*source_idx, *target_idx, relationship);
409 Ok(())
410 }
411
412 pub fn add_chunk(&mut self, chunk: TextChunk) -> Result<()> {
414 self.chunks.insert(chunk.id.clone(), chunk);
415 Ok(())
416 }
417
418 pub fn get_entity(&self, id: &EntityId) -> Option<&Entity> {
420 let node_idx = self.entity_index.get(id)?;
421 self.graph.node_weight(*node_idx)
422 }
423
424 pub fn get_document(&self, id: &DocumentId) -> Option<&Document> {
426 self.documents.get(id)
427 }
428
429 pub fn get_chunk(&self, id: &ChunkId) -> Option<&TextChunk> {
431 self.chunks.get(id)
432 }
433
434 pub fn get_entity_mut(&mut self, id: &EntityId) -> Option<&mut Entity> {
436 let node_idx = self.entity_index.get(id)?;
437 self.graph.node_weight_mut(*node_idx)
438 }
439
440 pub fn get_chunk_mut(&mut self, id: &ChunkId) -> Option<&mut TextChunk> {
442 self.chunks.get_mut(id)
443 }
444
445 pub fn entities(&self) -> impl Iterator<Item = &Entity> {
447 self.graph.node_weights()
448 }
449
450 pub fn entities_mut(&mut self) -> impl Iterator<Item = &mut Entity> {
452 self.graph.node_weights_mut()
453 }
454
455 pub fn documents(&self) -> impl Iterator<Item = &Document> {
457 self.documents.values()
458 }
459
460 pub fn documents_mut(&mut self) -> impl Iterator<Item = &mut Document> {
462 self.documents.values_mut()
463 }
464
465 pub fn chunks(&self) -> impl Iterator<Item = &TextChunk> {
467 self.chunks.values()
468 }
469
470 pub fn chunks_mut(&mut self) -> impl Iterator<Item = &mut TextChunk> {
472 self.chunks.values_mut()
473 }
474
475 pub fn get_neighbors(&self, entity_id: &EntityId) -> Vec<(&Entity, &Relationship)> {
477 use petgraph::visit::EdgeRef;
478
479 if let Some(&node_idx) = self.entity_index.get(entity_id) {
480 self.graph
481 .edges(node_idx)
482 .filter_map(|edge| {
483 let target_entity = self.graph.node_weight(edge.target())?;
484 Some((target_entity, edge.weight()))
485 })
486 .collect()
487 } else {
488 Vec::new()
489 }
490 }
491
492 pub fn get_all_relationships(&self) -> Vec<&Relationship> {
494 self.graph.edge_weights().collect()
495 }
496
497 pub fn load_from_json(file_path: &str) -> Result<Self> {
499 use std::fs;
500
501 let json_str = fs::read_to_string(file_path)?;
503 let json_data = json::parse(&json_str).map_err(|e| GraphRAGError::Config {
504 message: format!("Failed to parse JSON: {}", e),
505 })?;
506
507 let mut kg = KnowledgeGraph::new();
508
509 if json_data["entities"].is_array() {
511 for entity_obj in json_data["entities"].members() {
512 let id = EntityId::new(entity_obj["id"].as_str().unwrap_or("").to_string());
513 let name = entity_obj["name"].as_str().unwrap_or("").to_string();
514 let entity_type = entity_obj["type"].as_str().unwrap_or("").to_string();
515 let confidence = entity_obj["confidence"].as_f32().unwrap_or(0.0);
516
517 let mut mentions = Vec::new();
519 if entity_obj["mentions"].is_array() {
520 for mention_obj in entity_obj["mentions"].members() {
521 let mention = EntityMention {
522 chunk_id: ChunkId::new(
523 mention_obj["chunk_id"].as_str().unwrap_or("").to_string(),
524 ),
525 start_offset: mention_obj["start_offset"].as_usize().unwrap_or(0),
526 end_offset: mention_obj["end_offset"].as_usize().unwrap_or(0),
527 confidence: mention_obj["confidence"].as_f32().unwrap_or(0.0),
528 };
529 mentions.push(mention);
530 }
531 }
532
533 let entity = Entity {
534 id,
535 name,
536 entity_type,
537 confidence,
538 mentions,
539 embedding: None, first_mentioned: None,
541 last_mentioned: None,
542 temporal_validity: None,
543 };
544
545 kg.add_entity(entity)?;
546 }
547 }
548
549 if json_data["relationships"].is_array() {
551 for rel_obj in json_data["relationships"].members() {
552 let source = EntityId::new(rel_obj["source_id"].as_str().unwrap_or("").to_string());
553 let target = EntityId::new(rel_obj["target_id"].as_str().unwrap_or("").to_string());
554 let relation_type = rel_obj["relation_type"].as_str().unwrap_or("").to_string();
555 let confidence = rel_obj["confidence"].as_f32().unwrap_or(0.0);
556
557 let mut context = Vec::new();
558 if rel_obj["context_chunks"].is_array() {
559 for chunk_id in rel_obj["context_chunks"].members() {
560 if let Some(chunk_id_str) = chunk_id.as_str() {
561 context.push(ChunkId::new(chunk_id_str.to_string()));
562 }
563 }
564 }
565
566 let relationship = Relationship {
567 source,
568 target,
569 relation_type,
570 confidence,
571 context,
572 embedding: None,
573 temporal_type: None,
574 temporal_range: None,
575 causal_strength: None,
576 };
577
578 let _ = kg.add_relationship(relationship);
580 }
581 }
582
583 if json_data["chunks"].is_array() {
585 for chunk_obj in json_data["chunks"].members() {
586 let id = ChunkId::new(chunk_obj["id"].as_str().unwrap_or("").to_string());
587 let document_id =
588 DocumentId::new(chunk_obj["document_id"].as_str().unwrap_or("").to_string());
589 let start_offset = chunk_obj["start_offset"].as_usize().unwrap_or(0);
590 let end_offset = chunk_obj["end_offset"].as_usize().unwrap_or(0);
591
592 let content = chunk_obj["content"].as_str().unwrap_or("").to_string();
594
595 let mut entities = Vec::new();
597 if chunk_obj["entities"].is_array() {
598 for entity_id in chunk_obj["entities"].members() {
599 if let Some(entity_id_str) = entity_id.as_str() {
600 entities.push(EntityId::new(entity_id_str.to_string()));
601 }
602 }
603 }
604
605 let chunk = TextChunk {
606 id,
607 document_id,
608 content,
609 start_offset,
610 end_offset,
611 embedding: None, entities,
613 metadata: ChunkMetadata::default(),
614 };
615 kg.add_chunk(chunk)?;
616 }
617 }
618
619 if json_data["documents"].is_array() {
621 for doc_obj in json_data["documents"].members() {
622 let id = DocumentId::new(doc_obj["id"].as_str().unwrap_or("").to_string());
623 let title = doc_obj["title"].as_str().unwrap_or("").to_string();
624 let content = doc_obj["content"].as_str().unwrap_or("").to_string();
625
626 let mut metadata = IndexMap::new();
628 if doc_obj["metadata"].is_object() {
629 for (key, value) in doc_obj["metadata"].entries() {
630 metadata.insert(key.to_string(), value.as_str().unwrap_or("").to_string());
631 }
632 }
633
634 let document = Document {
635 id,
636 title,
637 content,
638 metadata,
639 chunks: vec![], };
641 kg.add_document(document)?;
642 }
643 }
644
645 Ok(kg)
646 }
647
648 pub fn save_to_json(&self, file_path: &str) -> Result<()> {
650 use std::fs;
651
652 let mut json_data = json::JsonValue::new_object();
654
655 json_data["metadata"] = json::object! {
657 "format_version" => "2.0",
658 "created_at" => chrono::Utc::now().to_rfc3339(),
659 "total_entities" => self.entities().count(),
660 "total_relationships" => self.get_all_relationships().len(),
661 "total_chunks" => self.chunks().count(),
662 "total_documents" => self.documents().count()
663 };
664
665 let mut entities_array = json::JsonValue::new_array();
667 for entity in self.entities() {
668 let mut entity_obj = json::object! {
669 "id" => entity.id.to_string(),
670 "name" => entity.name.clone(),
671 "type" => entity.entity_type.clone(),
672 "confidence" => entity.confidence,
673 "mentions_count" => entity.mentions.len()
674 };
675
676 let mut mentions_array = json::JsonValue::new_array();
678 for mention in &entity.mentions {
679 mentions_array
680 .push(json::object! {
681 "chunk_id" => mention.chunk_id.to_string(),
682 "start_offset" => mention.start_offset,
683 "end_offset" => mention.end_offset,
684 "confidence" => mention.confidence
685 })
686 .unwrap();
687 }
688 entity_obj["mentions"] = mentions_array;
689
690 if let Some(embedding) = &entity.embedding {
692 entity_obj["has_embedding"] = true.into();
693 entity_obj["embedding_dimension"] = embedding.len().into();
694 let sample_embedding: Vec<f32> = embedding.iter().take(5).cloned().collect();
696 entity_obj["embedding_sample"] = sample_embedding.into();
697 } else {
698 entity_obj["has_embedding"] = false.into();
699 }
700
701 entities_array.push(entity_obj).unwrap();
702 }
703 json_data["entities"] = entities_array;
704
705 let mut relationships_array = json::JsonValue::new_array();
707 for relationship in self.get_all_relationships() {
708 let rel_obj = json::object! {
709 "source_id" => relationship.source.to_string(),
710 "target_id" => relationship.target.to_string(),
711 "relation_type" => relationship.relation_type.clone(),
712 "confidence" => relationship.confidence,
713 "context_chunks" => relationship.context.iter()
714 .map(|c| c.to_string())
715 .collect::<Vec<String>>()
716 };
717 relationships_array.push(rel_obj).unwrap();
718 }
719 json_data["relationships"] = relationships_array;
720
721 let mut chunks_array = json::JsonValue::new_array();
723 for chunk in self.chunks() {
724 let mut chunk_obj = json::object! {
725 "id" => chunk.id.to_string(),
726 "document_id" => chunk.document_id.to_string(),
727 "content" => chunk.content.clone(), "start_offset" => chunk.start_offset,
729 "end_offset" => chunk.end_offset
730 };
731
732 let entities_list: Vec<String> = chunk.entities.iter().map(|e| e.to_string()).collect();
734 chunk_obj["entities"] = entities_list.into();
735
736 chunk_obj["has_embedding"] = chunk.embedding.is_some().into();
738 if let Some(embedding) = &chunk.embedding {
739 chunk_obj["embedding_dimension"] = embedding.len().into();
740 }
741
742 chunks_array.push(chunk_obj).unwrap();
743 }
744 json_data["chunks"] = chunks_array;
745
746 let mut documents_array = json::JsonValue::new_array();
748 for document in self.documents() {
749 let mut meta_obj = json::JsonValue::new_object();
750 for (key, value) in &document.metadata {
751 meta_obj[key] = value.clone().into();
752 }
753
754 let doc_obj = json::object! {
755 "id" => document.id.to_string(),
756 "title" => document.title.clone(),
757 "content" => document.content.clone(), "metadata" => meta_obj
759 };
760 documents_array.push(doc_obj).unwrap();
761 }
762 json_data["documents"] = documents_array;
763
764 fs::write(file_path, json_data.dump())?;
766 tracing::info!("Knowledge graph saved to {file_path}");
767
768 Ok(())
769 }
770
771 pub fn find_entities_by_name(&self, name: &str) -> impl Iterator<Item = &Entity> {
773 let name_lower = name.to_lowercase();
774 self.entities()
775 .filter(move |entity| entity.name.to_lowercase().contains(&name_lower))
776 }
777
778 pub fn get_entity_by_id(&self, id: &str) -> Option<&Entity> {
780 let entity_id = EntityId::new(id.to_string());
781 self.get_entity(&entity_id)
782 }
783
784 pub fn get_entity_relationships(&self, entity_id: &str) -> impl Iterator<Item = &Relationship> {
786 let entity_id = EntityId::new(entity_id.to_string());
787 if let Some(&node_idx) = self.entity_index.get(&entity_id) {
788 self.graph
789 .edges(node_idx)
790 .map(|edge| edge.weight())
791 .collect::<Vec<_>>()
792 .into_iter()
793 } else {
794 Vec::new().into_iter()
795 }
796 }
797
798 pub fn find_relationship_path(
800 &self,
801 entity1: &str,
802 entity2: &str,
803 _max_depth: usize,
804 ) -> Vec<String> {
805 let entity1_id = EntityId::new(entity1.to_string());
806 let entity2_id = EntityId::new(entity2.to_string());
807
808 let node1 = self.entity_index.get(&entity1_id);
809 let node2 = self.entity_index.get(&entity2_id);
810
811 if let (Some(&start), Some(&end)) = (node1, node2) {
812 use petgraph::visit::EdgeRef;
814 for edge in self.graph.edges(start) {
815 if edge.target() == end {
816 return vec![edge.weight().relation_type.clone()];
817 }
818 }
819 }
820
821 Vec::new() }
823
824 #[cfg(feature = "pagerank")]
827 pub fn build_pagerank_calculator(
828 &self,
829 ) -> Result<crate::graph::pagerank::PersonalizedPageRank> {
830 let config = crate::graph::pagerank::PageRankConfig::default();
831 let (adjacency_matrix, node_mapping, reverse_mapping) = self.build_adjacency_matrix()?;
832
833 Ok(crate::graph::pagerank::PersonalizedPageRank::new(
834 config,
835 adjacency_matrix,
836 node_mapping,
837 reverse_mapping,
838 ))
839 }
840
841 #[cfg(feature = "pagerank")]
844 fn build_adjacency_matrix(&self) -> Result<AdjacencyMatrixResult> {
845 let nodes: Vec<EntityId> = self.entities().map(|e| e.id.clone()).collect();
846 let node_mapping: HashMap<EntityId, usize> = nodes
847 .iter()
848 .enumerate()
849 .map(|(i, id)| (id.clone(), i))
850 .collect();
851 let reverse_mapping: HashMap<usize, EntityId> = nodes
852 .iter()
853 .enumerate()
854 .map(|(i, id)| (i, id.clone()))
855 .collect();
856
857 let mut row_indices = Vec::new();
859 let mut col_indices = Vec::new();
860 let mut values = Vec::new();
861
862 for relationship in self.get_all_relationships() {
863 if let (Some(&from_idx), Some(&to_idx)) = (
864 node_mapping.get(&relationship.source),
865 node_mapping.get(&relationship.target),
866 ) {
867 row_indices.push(from_idx);
868 col_indices.push(to_idx);
869 values.push(relationship.confidence as f64);
870 }
871 }
872
873 let matrix = if row_indices.is_empty() {
874 sprs::CsMat::zero((nodes.len(), nodes.len()))
876 } else {
877 let mut triplet_mat = sprs::TriMat::new((nodes.len(), nodes.len()));
879 for ((row, col), val) in row_indices
880 .into_iter()
881 .zip(col_indices.into_iter())
882 .zip(values.into_iter())
883 {
884 triplet_mat.add_triplet(row, col, val);
885 }
886 triplet_mat.to_csr()
887 };
888
889 Ok((matrix, node_mapping, reverse_mapping))
890 }
891
892 pub fn entity_count(&self) -> usize {
894 self.entities().count()
895 }
896
897 pub fn relationship_count(&self) -> usize {
899 self.get_all_relationships().len()
900 }
901
902 pub fn document_count(&self) -> usize {
904 self.documents().count()
905 }
906
907 pub fn relationships(&self) -> impl Iterator<Item = &Relationship> {
909 self.graph.edge_weights()
910 }
911
912 pub fn clear_entities_and_relationships(&mut self) {
916 self.graph.clear();
917 self.entity_index.clear();
918 }
920
921 fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
932 if a.len() != b.len() || a.is_empty() {
933 return 0.0;
934 }
935
936 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
937 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
938 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
939
940 if norm_a == 0.0 || norm_b == 0.0 {
941 return 0.0;
942 }
943
944 dot_product / (norm_a * norm_b)
945 }
946
947 fn calculate_temporal_relevance(range: &crate::graph::temporal::TemporalRange) -> f32 {
961 use std::time::{SystemTime, UNIX_EPOCH};
962
963 let now = SystemTime::now()
965 .duration_since(UNIX_EPOCH)
966 .unwrap_or_default()
967 .as_secs() as i64;
968
969 let mid_point = (range.start + range.end) / 2;
971 let years_ago = ((now - mid_point) / (365 * 24 * 3600)).abs();
972
973 let recency_boost = if years_ago <= 10 {
977 0.3
978 } else if years_ago <= 50 {
979 0.3 * (1.0 - (years_ago - 10) as f32 / 40.0)
980 } else if years_ago <= 200 {
981 0.1 * (1.0 - (years_ago - 50) as f32 / 150.0)
982 } else {
983 0.05 };
985
986 recency_boost.max(0.0)
987 }
988
989 pub fn dynamic_weight(
1007 &self,
1008 relationship: &Relationship,
1009 query_embedding: Option<&[f32]>,
1010 query_concepts: &[String],
1011 ) -> f32 {
1012 let base_weight = relationship.confidence;
1013
1014 let semantic_boost = if let (Some(rel_emb), Some(query_emb)) =
1016 (relationship.embedding.as_deref(), query_embedding)
1017 {
1018 Self::cosine_similarity(rel_emb, query_emb).max(0.0)
1019 } else {
1020 0.0
1021 };
1022
1023 let temporal_boost = if let Some(tr) = &relationship.temporal_range {
1025 Self::calculate_temporal_relevance(tr)
1026 } else {
1027 0.0
1028 };
1029
1030 let concept_boost = query_concepts
1032 .iter()
1033 .filter(|c| {
1034 relationship
1035 .relation_type
1036 .to_lowercase()
1037 .contains(&c.to_lowercase())
1038 })
1039 .count() as f32
1040 * 0.15; let causal_boost = if let Some(strength) = relationship.causal_strength {
1044 strength * 0.2 } else {
1046 0.0
1047 };
1048
1049 base_weight * (1.0 + semantic_boost + temporal_boost + concept_boost + causal_boost)
1051 }
1052
1053 #[cfg(feature = "leiden")]
1057 pub fn to_leiden_graph(&self) -> petgraph::Graph<String, f32, petgraph::Undirected> {
1058 let mut graph = Graph::new_undirected();
1059 let mut node_map = HashMap::new();
1060
1061 for entity in self.entities() {
1063 let idx = graph.add_node(entity.name.clone());
1064 node_map.insert(entity.id.clone(), idx);
1065 }
1066
1067 for rel in self.get_all_relationships() {
1069 if let (Some(&src), Some(&tgt)) = (node_map.get(&rel.source), node_map.get(&rel.target))
1070 {
1071 graph.add_edge(src, tgt, rel.confidence);
1072 }
1073 }
1074
1075 graph
1076 }
1077
1078 #[cfg(feature = "leiden")]
1103 pub fn detect_hierarchical_communities(
1104 &self,
1105 config: crate::graph::leiden::LeidenConfig,
1106 ) -> Result<crate::graph::leiden::HierarchicalCommunities> {
1107 use crate::graph::leiden::LeidenCommunityDetector;
1108
1109 let leiden_graph = self.to_leiden_graph();
1111
1112 let detector = LeidenCommunityDetector::new(config);
1114 let mut communities = detector.detect_communities(&leiden_graph)?;
1115
1116 communities.entity_mapping = Some(self.build_entity_mapping());
1118
1119 Ok(communities)
1120 }
1121
1122 #[cfg(feature = "leiden")]
1125 fn build_entity_mapping(&self) -> HashMap<String, crate::graph::leiden::EntityMetadata> {
1126 use crate::graph::leiden::EntityMetadata;
1127
1128 self.entities()
1129 .map(|entity| {
1130 let metadata = EntityMetadata {
1131 id: entity.id.to_string(),
1132 name: entity.name.clone(),
1133 entity_type: entity.entity_type.clone(),
1134 confidence: entity.confidence,
1135 mention_count: entity.mentions.len(),
1136 };
1137 (entity.name.clone(), metadata)
1138 })
1139 .collect()
1140 }
1141
1142 #[cfg(feature = "async")]
1165 pub async fn build_relationship_hierarchy(
1166 &mut self,
1167 num_levels: usize,
1168 ollama_client: Option<crate::ollama::OllamaClient>,
1169 ) -> Result<()> {
1170 use crate::graph::hierarchical_relationships::HierarchyBuilder;
1171
1172 let builder = HierarchyBuilder::from_graph(self).with_num_levels(num_levels);
1173
1174 let builder = if let Some(client) = ollama_client {
1175 builder.with_ollama_client(client)
1176 } else {
1177 builder
1178 };
1179
1180 let hierarchy = builder.build().await?;
1181 self.relationship_hierarchy = Some(hierarchy);
1182
1183 Ok(())
1184 }
1185}
1186
1187impl Default for KnowledgeGraph {
1188 fn default() -> Self {
1189 Self::new()
1190 }
1191}
1192
1193impl Document {
1194 pub fn new(id: DocumentId, title: String, content: String) -> Self {
1196 Self {
1197 id,
1198 title,
1199 content,
1200 metadata: IndexMap::new(),
1201 chunks: Vec::new(),
1202 }
1203 }
1204
1205 pub fn with_metadata(mut self, key: String, value: String) -> Self {
1207 self.metadata.insert(key, value);
1208 self
1209 }
1210
1211 pub fn with_chunks(mut self, chunks: Vec<TextChunk>) -> Self {
1213 self.chunks = chunks;
1214 self
1215 }
1216}
1217
1218impl TextChunk {
1219 pub fn new(
1221 id: ChunkId,
1222 document_id: DocumentId,
1223 content: String,
1224 start_offset: usize,
1225 end_offset: usize,
1226 ) -> Self {
1227 Self {
1228 id,
1229 document_id,
1230 content,
1231 start_offset,
1232 end_offset,
1233 embedding: None,
1234 entities: Vec::new(),
1235 metadata: ChunkMetadata::default(),
1236 }
1237 }
1238
1239 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
1241 self.embedding = Some(embedding);
1242 self
1243 }
1244
1245 pub fn with_entities(mut self, entities: Vec<EntityId>) -> Self {
1247 self.entities = entities;
1248 self
1249 }
1250
1251 pub fn with_metadata(mut self, metadata: ChunkMetadata) -> Self {
1253 self.metadata = metadata;
1254 self
1255 }
1256}
1257
1258impl Entity {
1259 pub fn new(id: EntityId, name: String, entity_type: String, confidence: f32) -> Self {
1261 Self {
1262 id,
1263 name,
1264 entity_type,
1265 confidence,
1266 mentions: Vec::new(),
1267 embedding: None,
1268 first_mentioned: None,
1270 last_mentioned: None,
1271 temporal_validity: None,
1272 }
1273 }
1274
1275 pub fn with_mentions(mut self, mentions: Vec<EntityMention>) -> Self {
1277 self.mentions = mentions;
1278 self
1279 }
1280
1281 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
1283 self.embedding = Some(embedding);
1284 self
1285 }
1286
1287 pub fn with_temporal_validity(mut self, start: i64, end: i64) -> Self {
1289 self.temporal_validity = Some(crate::graph::temporal::TemporalRange::new(start, end));
1290 self
1291 }
1292
1293 pub fn with_mention_times(mut self, first: i64, last: i64) -> Self {
1295 self.first_mentioned = Some(first);
1296 self.last_mentioned = Some(last);
1297 self
1298 }
1299}
1300
1301#[cfg(test)]
1302mod temporal_tests {
1303 use super::*;
1304
1305 #[test]
1306 fn test_entity_with_temporal_fields() {
1307 let entity = Entity::new(
1308 EntityId::new("socrates".to_string()),
1309 "Socrates".to_string(),
1310 "PERSON".to_string(),
1311 0.9,
1312 )
1313 .with_temporal_validity(-470 * 365 * 24 * 3600, -399 * 365 * 24 * 3600) .with_mention_times(1000, 2000);
1315
1316 assert_eq!(entity.name, "Socrates");
1317 assert!(entity.temporal_validity.is_some());
1318 assert!(entity.first_mentioned.is_some());
1319 assert!(entity.last_mentioned.is_some());
1320
1321 let validity = entity.temporal_validity.unwrap();
1322 assert_eq!(validity.start, -470 * 365 * 24 * 3600);
1323 assert_eq!(validity.end, -399 * 365 * 24 * 3600);
1324 }
1325
1326 #[test]
1327 fn test_entity_temporal_serialization() {
1328 let entity = Entity::new(
1329 EntityId::new("test".to_string()),
1330 "Test Entity".to_string(),
1331 "TEST".to_string(),
1332 0.8,
1333 )
1334 .with_temporal_validity(100, 200);
1335
1336 let json = serde_json::to_string(&entity).unwrap();
1337 let deserialized: Entity = serde_json::from_str(&json).unwrap();
1338
1339 assert_eq!(deserialized.name, "Test Entity");
1340 assert!(deserialized.temporal_validity.is_some());
1341
1342 let validity = deserialized.temporal_validity.unwrap();
1343 assert_eq!(validity.start, 100);
1344 assert_eq!(validity.end, 200);
1345 }
1346
1347 #[test]
1348 fn test_relationship_with_temporal_type() {
1349 let rel = Relationship::new(
1350 EntityId::new("socrates".to_string()),
1351 EntityId::new("plato".to_string()),
1352 "TAUGHT".to_string(),
1353 0.9,
1354 )
1355 .with_temporal_type(crate::graph::temporal::TemporalRelationType::Caused)
1356 .with_temporal_range(100, 200);
1357
1358 assert!(rel.temporal_type.is_some());
1359 assert!(rel.temporal_range.is_some());
1360 assert!(rel.causal_strength.is_some());
1361
1362 let temporal_type = rel.temporal_type.unwrap();
1363 assert_eq!(
1364 temporal_type,
1365 crate::graph::temporal::TemporalRelationType::Caused
1366 );
1367
1368 let strength = rel.causal_strength.unwrap();
1370 assert_eq!(strength, 0.9); }
1372
1373 #[test]
1374 fn test_relationship_with_causal_strength() {
1375 let rel = Relationship::new(
1376 EntityId::new("a".to_string()),
1377 EntityId::new("b".to_string()),
1378 "INFLUENCED".to_string(),
1379 0.8,
1380 )
1381 .with_temporal_type(crate::graph::temporal::TemporalRelationType::Enabled)
1382 .with_causal_strength(0.75);
1383
1384 assert!(rel.causal_strength.is_some());
1385 assert_eq!(rel.causal_strength.unwrap(), 0.75);
1386 }
1387
1388 #[test]
1389 fn test_relationship_temporal_serialization() {
1390 let rel = Relationship::new(
1391 EntityId::new("source".to_string()),
1392 EntityId::new("target".to_string()),
1393 "CAUSED".to_string(),
1394 0.9,
1395 )
1396 .with_temporal_type(crate::graph::temporal::TemporalRelationType::Caused)
1397 .with_temporal_range(100, 200)
1398 .with_causal_strength(0.95);
1399
1400 let json = serde_json::to_string(&rel).unwrap();
1401 let deserialized: Relationship = serde_json::from_str(&json).unwrap();
1402
1403 assert_eq!(deserialized.relation_type, "CAUSED");
1404 assert!(deserialized.temporal_type.is_some());
1405 assert!(deserialized.temporal_range.is_some());
1406 assert!(deserialized.causal_strength.is_some());
1407
1408 let temporal_type = deserialized.temporal_type.unwrap();
1409 assert_eq!(
1410 temporal_type,
1411 crate::graph::temporal::TemporalRelationType::Caused
1412 );
1413
1414 let range = deserialized.temporal_range.unwrap();
1415 assert_eq!(range.start, 100);
1416 assert_eq!(range.end, 200);
1417
1418 assert_eq!(deserialized.causal_strength.unwrap(), 0.95);
1419 }
1420
1421 #[test]
1422 fn test_entity_backward_compatibility() {
1423 let entity = Entity::new(
1425 EntityId::new("test".to_string()),
1426 "Test".to_string(),
1427 "TEST".to_string(),
1428 0.9,
1429 );
1430
1431 assert!(entity.first_mentioned.is_none());
1432 assert!(entity.last_mentioned.is_none());
1433 assert!(entity.temporal_validity.is_none());
1434
1435 let json = serde_json::to_string(&entity).unwrap();
1437 assert!(!json.contains("first_mentioned"));
1438 assert!(!json.contains("last_mentioned"));
1439 assert!(!json.contains("temporal_validity"));
1440 }
1441
1442 #[test]
1443 fn test_relationship_backward_compatibility() {
1444 let rel = Relationship::new(
1446 EntityId::new("a".to_string()),
1447 EntityId::new("b".to_string()),
1448 "RELATED_TO".to_string(),
1449 0.8,
1450 );
1451
1452 assert!(rel.temporal_type.is_none());
1453 assert!(rel.temporal_range.is_none());
1454 assert!(rel.causal_strength.is_none());
1455
1456 let json = serde_json::to_string(&rel).unwrap();
1458 assert!(!json.contains("temporal_type"));
1459 assert!(!json.contains("temporal_range"));
1460 assert!(!json.contains("causal_strength"));
1461 }
1462}