pub mod error;
pub mod metadata;
#[cfg(feature = "async")]
pub mod registry;
#[cfg(feature = "async")]
pub mod traits;
#[cfg(all(feature = "async", feature = "ollama"))]
pub mod ollama_adapters;
#[cfg(feature = "async")]
pub mod entity_adapters;
#[cfg(feature = "async")]
pub mod retrieval_adapters;
#[cfg(feature = "async")]
pub mod test_utils;
#[cfg(test)]
pub mod test_traits;
pub use error::{ErrorContext, ErrorSeverity, ErrorSuggestion, GraphRAGError, Result};
pub use metadata::ChunkMetadata;
#[cfg(feature = "async")]
pub use registry::{RegistryBuilder, ServiceConfig, ServiceContext, ServiceRegistry};
#[cfg(feature = "async")]
pub use traits::*;
pub trait ChunkingStrategy: Send + Sync {
fn chunk(&self, text: &str) -> Vec<TextChunk>;
}
use indexmap::IndexMap;
use petgraph::{graph::NodeIndex, Graph};
use std::collections::HashMap;
#[cfg(feature = "pagerank")]
use sprs::CsMat;
#[cfg(feature = "pagerank")]
type AdjacencyMatrixResult = (
CsMat<f64>,
HashMap<EntityId, usize>,
HashMap<usize, EntityId>,
);
#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct DocumentId(pub String);
impl DocumentId {
pub fn new(id: String) -> Self {
Self(id)
}
}
impl std::fmt::Display for DocumentId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<String> for DocumentId {
fn from(s: String) -> Self {
Self(s)
}
}
impl From<DocumentId> for String {
fn from(id: DocumentId) -> Self {
id.0
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct EntityId(pub String);
impl EntityId {
pub fn new(id: String) -> Self {
Self(id)
}
}
impl std::fmt::Display for EntityId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<String> for EntityId {
fn from(s: String) -> Self {
Self(s)
}
}
impl From<EntityId> for String {
fn from(id: EntityId) -> Self {
id.0
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct ChunkId(pub String);
impl ChunkId {
pub fn new(id: String) -> Self {
Self(id)
}
}
impl std::fmt::Display for ChunkId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<String> for ChunkId {
fn from(s: String) -> Self {
Self(s)
}
}
impl From<ChunkId> for String {
fn from(id: ChunkId) -> Self {
id.0
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Document {
pub id: DocumentId,
pub title: String,
pub content: String,
pub metadata: IndexMap<String, String>,
pub chunks: Vec<TextChunk>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct TextChunk {
pub id: ChunkId,
pub document_id: DocumentId,
pub content: String,
pub start_offset: usize,
pub end_offset: usize,
pub embedding: Option<Vec<f32>>,
pub entities: Vec<EntityId>,
pub metadata: ChunkMetadata,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Entity {
pub id: EntityId,
pub name: String,
pub entity_type: String,
pub confidence: f32,
pub mentions: Vec<EntityMention>,
pub embedding: Option<Vec<f32>>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub first_mentioned: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub last_mentioned: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub temporal_validity: Option<crate::graph::temporal::TemporalRange>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct EntityMention {
pub chunk_id: ChunkId,
pub start_offset: usize,
pub end_offset: usize,
pub confidence: f32,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Relationship {
pub source: EntityId,
pub target: EntityId,
pub relation_type: String,
pub confidence: f32,
pub context: Vec<ChunkId>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub embedding: Option<Vec<f32>>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub temporal_type: Option<crate::graph::temporal::TemporalRelationType>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub temporal_range: Option<crate::graph::temporal::TemporalRange>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub causal_strength: Option<f32>,
}
impl Relationship {
pub fn new(source: EntityId, target: EntityId, relation_type: String, confidence: f32) -> Self {
Self {
source,
target,
relation_type,
confidence,
context: Vec::new(),
embedding: None,
temporal_type: None,
temporal_range: None,
causal_strength: None,
}
}
pub fn with_context(mut self, context: Vec<ChunkId>) -> Self {
self.context = context;
self
}
pub fn with_temporal_type(
mut self,
temporal_type: crate::graph::temporal::TemporalRelationType,
) -> Self {
self.temporal_type = Some(temporal_type);
if self.causal_strength.is_none() && temporal_type.is_causal() {
self.causal_strength = Some(temporal_type.default_strength());
}
self
}
pub fn with_temporal_range(mut self, start: i64, end: i64) -> Self {
self.temporal_range = Some(crate::graph::temporal::TemporalRange::new(start, end));
self
}
pub fn with_causal_strength(mut self, strength: f32) -> Self {
self.causal_strength = Some(strength.clamp(0.0, 1.0));
self
}
pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
self.embedding = Some(embedding);
self
}
}
#[derive(Debug, Clone)]
pub struct KnowledgeGraph {
graph: Graph<Entity, Relationship>,
entity_index: HashMap<EntityId, NodeIndex>,
documents: IndexMap<DocumentId, Document>,
chunks: IndexMap<ChunkId, TextChunk>,
#[cfg(feature = "async")]
pub relationship_hierarchy:
Option<crate::graph::hierarchical_relationships::RelationshipHierarchy>,
}
impl KnowledgeGraph {
pub fn new() -> Self {
Self {
graph: Graph::new(),
entity_index: HashMap::new(),
documents: IndexMap::new(),
chunks: IndexMap::new(),
#[cfg(feature = "async")]
relationship_hierarchy: None,
}
}
pub fn add_document(&mut self, document: Document) -> Result<()> {
let document_id = document.id.clone();
for chunk in &document.chunks {
self.chunks.insert(chunk.id.clone(), chunk.clone());
}
self.documents.insert(document_id, document);
Ok(())
}
pub fn add_entity(&mut self, entity: Entity) -> Result<NodeIndex> {
let entity_id = entity.id.clone();
let node_index = self.graph.add_node(entity);
self.entity_index.insert(entity_id, node_index);
Ok(node_index)
}
pub fn add_relationship(&mut self, relationship: Relationship) -> Result<()> {
let source_idx = self.entity_index.get(&relationship.source).ok_or_else(|| {
crate::GraphRAGError::GraphConstruction {
message: format!("Source entity {} not found", relationship.source),
}
})?;
let target_idx = self.entity_index.get(&relationship.target).ok_or_else(|| {
crate::GraphRAGError::GraphConstruction {
message: format!("Target entity {} not found", relationship.target),
}
})?;
self.graph.add_edge(*source_idx, *target_idx, relationship);
Ok(())
}
pub fn add_chunk(&mut self, chunk: TextChunk) -> Result<()> {
self.chunks.insert(chunk.id.clone(), chunk);
Ok(())
}
pub fn get_entity(&self, id: &EntityId) -> Option<&Entity> {
let node_idx = self.entity_index.get(id)?;
self.graph.node_weight(*node_idx)
}
pub fn get_document(&self, id: &DocumentId) -> Option<&Document> {
self.documents.get(id)
}
pub fn get_chunk(&self, id: &ChunkId) -> Option<&TextChunk> {
self.chunks.get(id)
}
pub fn get_entity_mut(&mut self, id: &EntityId) -> Option<&mut Entity> {
let node_idx = self.entity_index.get(id)?;
self.graph.node_weight_mut(*node_idx)
}
pub fn get_chunk_mut(&mut self, id: &ChunkId) -> Option<&mut TextChunk> {
self.chunks.get_mut(id)
}
pub fn entities(&self) -> impl Iterator<Item = &Entity> {
self.graph.node_weights()
}
pub fn entities_mut(&mut self) -> impl Iterator<Item = &mut Entity> {
self.graph.node_weights_mut()
}
pub fn documents(&self) -> impl Iterator<Item = &Document> {
self.documents.values()
}
pub fn documents_mut(&mut self) -> impl Iterator<Item = &mut Document> {
self.documents.values_mut()
}
pub fn chunks(&self) -> impl Iterator<Item = &TextChunk> {
self.chunks.values()
}
pub fn chunks_mut(&mut self) -> impl Iterator<Item = &mut TextChunk> {
self.chunks.values_mut()
}
pub fn get_neighbors(&self, entity_id: &EntityId) -> Vec<(&Entity, &Relationship)> {
use petgraph::visit::EdgeRef;
if let Some(&node_idx) = self.entity_index.get(entity_id) {
self.graph
.edges(node_idx)
.filter_map(|edge| {
let target_entity = self.graph.node_weight(edge.target())?;
Some((target_entity, edge.weight()))
})
.collect()
} else {
Vec::new()
}
}
pub fn get_all_relationships(&self) -> Vec<&Relationship> {
self.graph.edge_weights().collect()
}
pub fn load_from_json(file_path: &str) -> Result<Self> {
use std::fs;
let json_str = fs::read_to_string(file_path)?;
let json_data = json::parse(&json_str).map_err(|e| GraphRAGError::Config {
message: format!("Failed to parse JSON: {}", e),
})?;
let mut kg = KnowledgeGraph::new();
if json_data["entities"].is_array() {
for entity_obj in json_data["entities"].members() {
let id = EntityId::new(entity_obj["id"].as_str().unwrap_or("").to_string());
let name = entity_obj["name"].as_str().unwrap_or("").to_string();
let entity_type = entity_obj["type"].as_str().unwrap_or("").to_string();
let confidence = entity_obj["confidence"].as_f32().unwrap_or(0.0);
let mut mentions = Vec::new();
if entity_obj["mentions"].is_array() {
for mention_obj in entity_obj["mentions"].members() {
let mention = EntityMention {
chunk_id: ChunkId::new(
mention_obj["chunk_id"].as_str().unwrap_or("").to_string(),
),
start_offset: mention_obj["start_offset"].as_usize().unwrap_or(0),
end_offset: mention_obj["end_offset"].as_usize().unwrap_or(0),
confidence: mention_obj["confidence"].as_f32().unwrap_or(0.0),
};
mentions.push(mention);
}
}
let entity = Entity {
id,
name,
entity_type,
confidence,
mentions,
embedding: None, first_mentioned: None,
last_mentioned: None,
temporal_validity: None,
};
kg.add_entity(entity)?;
}
}
if json_data["relationships"].is_array() {
for rel_obj in json_data["relationships"].members() {
let source = EntityId::new(rel_obj["source_id"].as_str().unwrap_or("").to_string());
let target = EntityId::new(rel_obj["target_id"].as_str().unwrap_or("").to_string());
let relation_type = rel_obj["relation_type"].as_str().unwrap_or("").to_string();
let confidence = rel_obj["confidence"].as_f32().unwrap_or(0.0);
let mut context = Vec::new();
if rel_obj["context_chunks"].is_array() {
for chunk_id in rel_obj["context_chunks"].members() {
if let Some(chunk_id_str) = chunk_id.as_str() {
context.push(ChunkId::new(chunk_id_str.to_string()));
}
}
}
let relationship = Relationship {
source,
target,
relation_type,
confidence,
context,
embedding: None,
temporal_type: None,
temporal_range: None,
causal_strength: None,
};
let _ = kg.add_relationship(relationship);
}
}
if json_data["chunks"].is_array() {
for chunk_obj in json_data["chunks"].members() {
let id = ChunkId::new(chunk_obj["id"].as_str().unwrap_or("").to_string());
let document_id =
DocumentId::new(chunk_obj["document_id"].as_str().unwrap_or("").to_string());
let start_offset = chunk_obj["start_offset"].as_usize().unwrap_or(0);
let end_offset = chunk_obj["end_offset"].as_usize().unwrap_or(0);
let content = chunk_obj["content"].as_str().unwrap_or("").to_string();
let mut entities = Vec::new();
if chunk_obj["entities"].is_array() {
for entity_id in chunk_obj["entities"].members() {
if let Some(entity_id_str) = entity_id.as_str() {
entities.push(EntityId::new(entity_id_str.to_string()));
}
}
}
let chunk = TextChunk {
id,
document_id,
content,
start_offset,
end_offset,
embedding: None, entities,
metadata: ChunkMetadata::default(),
};
kg.add_chunk(chunk)?;
}
}
if json_data["documents"].is_array() {
for doc_obj in json_data["documents"].members() {
let id = DocumentId::new(doc_obj["id"].as_str().unwrap_or("").to_string());
let title = doc_obj["title"].as_str().unwrap_or("").to_string();
let content = doc_obj["content"].as_str().unwrap_or("").to_string();
let mut metadata = IndexMap::new();
if doc_obj["metadata"].is_object() {
for (key, value) in doc_obj["metadata"].entries() {
metadata.insert(key.to_string(), value.as_str().unwrap_or("").to_string());
}
}
let document = Document {
id,
title,
content,
metadata,
chunks: vec![], };
kg.add_document(document)?;
}
}
Ok(kg)
}
pub fn save_to_json(&self, file_path: &str) -> Result<()> {
use std::fs;
let mut json_data = json::JsonValue::new_object();
json_data["metadata"] = json::object! {
"format_version" => "2.0",
"created_at" => chrono::Utc::now().to_rfc3339(),
"total_entities" => self.entities().count(),
"total_relationships" => self.get_all_relationships().len(),
"total_chunks" => self.chunks().count(),
"total_documents" => self.documents().count()
};
let mut entities_array = json::JsonValue::new_array();
for entity in self.entities() {
let mut entity_obj = json::object! {
"id" => entity.id.to_string(),
"name" => entity.name.clone(),
"type" => entity.entity_type.clone(),
"confidence" => entity.confidence,
"mentions_count" => entity.mentions.len()
};
let mut mentions_array = json::JsonValue::new_array();
for mention in &entity.mentions {
mentions_array
.push(json::object! {
"chunk_id" => mention.chunk_id.to_string(),
"start_offset" => mention.start_offset,
"end_offset" => mention.end_offset,
"confidence" => mention.confidence
})
.unwrap();
}
entity_obj["mentions"] = mentions_array;
if let Some(embedding) = &entity.embedding {
entity_obj["has_embedding"] = true.into();
entity_obj["embedding_dimension"] = embedding.len().into();
let sample_embedding: Vec<f32> = embedding.iter().take(5).cloned().collect();
entity_obj["embedding_sample"] = sample_embedding.into();
} else {
entity_obj["has_embedding"] = false.into();
}
entities_array.push(entity_obj).unwrap();
}
json_data["entities"] = entities_array;
let mut relationships_array = json::JsonValue::new_array();
for relationship in self.get_all_relationships() {
let rel_obj = json::object! {
"source_id" => relationship.source.to_string(),
"target_id" => relationship.target.to_string(),
"relation_type" => relationship.relation_type.clone(),
"confidence" => relationship.confidence,
"context_chunks" => relationship.context.iter()
.map(|c| c.to_string())
.collect::<Vec<String>>()
};
relationships_array.push(rel_obj).unwrap();
}
json_data["relationships"] = relationships_array;
let mut chunks_array = json::JsonValue::new_array();
for chunk in self.chunks() {
let mut chunk_obj = json::object! {
"id" => chunk.id.to_string(),
"document_id" => chunk.document_id.to_string(),
"content" => chunk.content.clone(), "start_offset" => chunk.start_offset,
"end_offset" => chunk.end_offset
};
let entities_list: Vec<String> = chunk.entities.iter().map(|e| e.to_string()).collect();
chunk_obj["entities"] = entities_list.into();
chunk_obj["has_embedding"] = chunk.embedding.is_some().into();
if let Some(embedding) = &chunk.embedding {
chunk_obj["embedding_dimension"] = embedding.len().into();
}
chunks_array.push(chunk_obj).unwrap();
}
json_data["chunks"] = chunks_array;
let mut documents_array = json::JsonValue::new_array();
for document in self.documents() {
let mut meta_obj = json::JsonValue::new_object();
for (key, value) in &document.metadata {
meta_obj[key] = value.clone().into();
}
let doc_obj = json::object! {
"id" => document.id.to_string(),
"title" => document.title.clone(),
"content" => document.content.clone(), "metadata" => meta_obj
};
documents_array.push(doc_obj).unwrap();
}
json_data["documents"] = documents_array;
fs::write(file_path, json_data.dump())?;
#[cfg(feature = "tracing")]
tracing::info!("Knowledge graph saved to {file_path}");
Ok(())
}
pub fn find_entities_by_name(&self, name: &str) -> impl Iterator<Item = &Entity> {
let name_lower = name.to_lowercase();
self.entities()
.filter(move |entity| entity.name.to_lowercase().contains(&name_lower))
}
pub fn get_entity_by_id(&self, id: &str) -> Option<&Entity> {
let entity_id = EntityId::new(id.to_string());
self.get_entity(&entity_id)
}
pub fn get_entity_relationships(&self, entity_id: &str) -> impl Iterator<Item = &Relationship> {
let entity_id = EntityId::new(entity_id.to_string());
if let Some(&node_idx) = self.entity_index.get(&entity_id) {
self.graph
.edges(node_idx)
.map(|edge| edge.weight())
.collect::<Vec<_>>()
.into_iter()
} else {
Vec::new().into_iter()
}
}
pub fn find_relationship_path(
&self,
entity1: &str,
entity2: &str,
_max_depth: usize,
) -> Vec<String> {
let entity1_id = EntityId::new(entity1.to_string());
let entity2_id = EntityId::new(entity2.to_string());
let node1 = self.entity_index.get(&entity1_id);
let node2 = self.entity_index.get(&entity2_id);
if let (Some(&start), Some(&end)) = (node1, node2) {
use petgraph::visit::EdgeRef;
for edge in self.graph.edges(start) {
if edge.target() == end {
return vec![edge.weight().relation_type.clone()];
}
}
}
Vec::new() }
#[cfg(feature = "pagerank")]
pub fn build_pagerank_calculator(
&self,
) -> Result<crate::graph::pagerank::PersonalizedPageRank> {
let config = crate::graph::pagerank::PageRankConfig::default();
let (adjacency_matrix, node_mapping, reverse_mapping) = self.build_adjacency_matrix()?;
Ok(crate::graph::pagerank::PersonalizedPageRank::new(
config,
adjacency_matrix,
node_mapping,
reverse_mapping,
))
}
#[cfg(feature = "pagerank")]
fn build_adjacency_matrix(&self) -> Result<AdjacencyMatrixResult> {
let nodes: Vec<EntityId> = self.entities().map(|e| e.id.clone()).collect();
let node_mapping: HashMap<EntityId, usize> = nodes
.iter()
.enumerate()
.map(|(i, id)| (id.clone(), i))
.collect();
let reverse_mapping: HashMap<usize, EntityId> = nodes
.iter()
.enumerate()
.map(|(i, id)| (i, id.clone()))
.collect();
let mut row_indices = Vec::new();
let mut col_indices = Vec::new();
let mut values = Vec::new();
for relationship in self.get_all_relationships() {
if let (Some(&from_idx), Some(&to_idx)) = (
node_mapping.get(&relationship.source),
node_mapping.get(&relationship.target),
) {
row_indices.push(from_idx);
col_indices.push(to_idx);
values.push(relationship.confidence as f64);
}
}
let matrix = if row_indices.is_empty() {
sprs::CsMat::zero((nodes.len(), nodes.len()))
} else {
let mut triplet_mat = sprs::TriMat::new((nodes.len(), nodes.len()));
for ((row, col), val) in row_indices.into_iter().zip(col_indices).zip(values) {
triplet_mat.add_triplet(row, col, val);
}
triplet_mat.to_csr()
};
Ok((matrix, node_mapping, reverse_mapping))
}
pub fn entity_count(&self) -> usize {
self.entities().count()
}
pub fn relationship_count(&self) -> usize {
self.get_all_relationships().len()
}
pub fn document_count(&self) -> usize {
self.documents().count()
}
pub fn relationships(&self) -> impl Iterator<Item = &Relationship> {
self.graph.edge_weights()
}
pub fn clear_entities_and_relationships(&mut self) {
self.graph.clear();
self.entity_index.clear();
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
return 0.0;
}
dot_product / (norm_a * norm_b)
}
fn calculate_temporal_relevance(range: &crate::graph::temporal::TemporalRange) -> f32 {
use std::time::{SystemTime, UNIX_EPOCH};
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64;
let mid_point = (range.start + range.end) / 2;
let years_ago = ((now - mid_point) / (365 * 24 * 3600)).abs();
let recency_boost = if years_ago <= 10 {
0.3
} else if years_ago <= 50 {
0.3 * (1.0 - (years_ago - 10) as f32 / 40.0)
} else if years_ago <= 200 {
0.1 * (1.0 - (years_ago - 50) as f32 / 150.0)
} else {
0.05 };
recency_boost.max(0.0)
}
pub fn dynamic_weight(
&self,
relationship: &Relationship,
query_embedding: Option<&[f32]>,
query_concepts: &[String],
) -> f32 {
let base_weight = relationship.confidence;
let semantic_boost = if let (Some(rel_emb), Some(query_emb)) =
(relationship.embedding.as_deref(), query_embedding)
{
Self::cosine_similarity(rel_emb, query_emb).max(0.0)
} else {
0.0
};
let temporal_boost = if let Some(tr) = &relationship.temporal_range {
Self::calculate_temporal_relevance(tr)
} else {
0.0
};
let concept_boost = query_concepts
.iter()
.filter(|c| {
relationship
.relation_type
.to_lowercase()
.contains(&c.to_lowercase())
})
.count() as f32
* 0.15;
let causal_boost = if let Some(strength) = relationship.causal_strength {
strength * 0.2 } else {
0.0
};
base_weight * (1.0 + semantic_boost + temporal_boost + concept_boost + causal_boost)
}
#[cfg(feature = "leiden")]
pub fn to_leiden_graph(&self) -> petgraph::Graph<String, f32, petgraph::Undirected> {
let mut graph = Graph::new_undirected();
let mut node_map = HashMap::new();
for entity in self.entities() {
let idx = graph.add_node(entity.name.clone());
node_map.insert(entity.id.clone(), idx);
}
for rel in self.get_all_relationships() {
if let (Some(&src), Some(&tgt)) = (node_map.get(&rel.source), node_map.get(&rel.target))
{
graph.add_edge(src, tgt, rel.confidence);
}
}
graph
}
#[cfg(feature = "leiden")]
pub fn detect_hierarchical_communities(
&self,
config: crate::graph::leiden::LeidenConfig,
) -> Result<crate::graph::leiden::HierarchicalCommunities> {
use crate::graph::leiden::LeidenCommunityDetector;
let leiden_graph = self.to_leiden_graph();
let detector = LeidenCommunityDetector::new(config);
let mut communities = detector.detect_communities(&leiden_graph)?;
communities.entity_mapping = Some(self.build_entity_mapping());
Ok(communities)
}
#[cfg(feature = "leiden")]
fn build_entity_mapping(&self) -> HashMap<String, crate::graph::leiden::EntityMetadata> {
use crate::graph::leiden::EntityMetadata;
self.entities()
.map(|entity| {
let metadata = EntityMetadata {
id: entity.id.to_string(),
name: entity.name.clone(),
entity_type: entity.entity_type.clone(),
confidence: entity.confidence,
mention_count: entity.mentions.len(),
};
(entity.name.clone(), metadata)
})
.collect()
}
#[cfg(feature = "async")]
pub async fn build_relationship_hierarchy(
&mut self,
num_levels: usize,
ollama_client: Option<crate::ollama::OllamaClient>,
) -> Result<()> {
use crate::graph::hierarchical_relationships::HierarchyBuilder;
let builder = HierarchyBuilder::from_graph(self).with_num_levels(num_levels);
let builder = if let Some(client) = ollama_client {
builder.with_ollama_client(client)
} else {
builder
};
let hierarchy = builder.build().await?;
self.relationship_hierarchy = Some(hierarchy);
Ok(())
}
}
impl Default for KnowledgeGraph {
fn default() -> Self {
Self::new()
}
}
impl Document {
pub fn new(id: DocumentId, title: String, content: String) -> Self {
Self {
id,
title,
content,
metadata: IndexMap::new(),
chunks: Vec::new(),
}
}
pub fn with_metadata(mut self, key: String, value: String) -> Self {
self.metadata.insert(key, value);
self
}
pub fn with_chunks(mut self, chunks: Vec<TextChunk>) -> Self {
self.chunks = chunks;
self
}
}
impl TextChunk {
pub fn new(
id: ChunkId,
document_id: DocumentId,
content: String,
start_offset: usize,
end_offset: usize,
) -> Self {
Self {
id,
document_id,
content,
start_offset,
end_offset,
embedding: None,
entities: Vec::new(),
metadata: ChunkMetadata::default(),
}
}
pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
self.embedding = Some(embedding);
self
}
pub fn with_entities(mut self, entities: Vec<EntityId>) -> Self {
self.entities = entities;
self
}
pub fn with_metadata(mut self, metadata: ChunkMetadata) -> Self {
self.metadata = metadata;
self
}
}
impl Entity {
pub fn new(id: EntityId, name: String, entity_type: String, confidence: f32) -> Self {
Self {
id,
name,
entity_type,
confidence,
mentions: Vec::new(),
embedding: None,
first_mentioned: None,
last_mentioned: None,
temporal_validity: None,
}
}
pub fn with_mentions(mut self, mentions: Vec<EntityMention>) -> Self {
self.mentions = mentions;
self
}
pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
self.embedding = Some(embedding);
self
}
pub fn with_temporal_validity(mut self, start: i64, end: i64) -> Self {
self.temporal_validity = Some(crate::graph::temporal::TemporalRange::new(start, end));
self
}
pub fn with_mention_times(mut self, first: i64, last: i64) -> Self {
self.first_mentioned = Some(first);
self.last_mentioned = Some(last);
self
}
}
#[cfg(test)]
mod temporal_tests {
use super::*;
#[test]
fn test_entity_with_temporal_fields() {
let entity = Entity::new(
EntityId::new("socrates".to_string()),
"Socrates".to_string(),
"PERSON".to_string(),
0.9,
)
.with_temporal_validity(-470 * 365 * 24 * 3600, -399 * 365 * 24 * 3600) .with_mention_times(1000, 2000);
assert_eq!(entity.name, "Socrates");
assert!(entity.temporal_validity.is_some());
assert!(entity.first_mentioned.is_some());
assert!(entity.last_mentioned.is_some());
let validity = entity.temporal_validity.unwrap();
assert_eq!(validity.start, -470 * 365 * 24 * 3600);
assert_eq!(validity.end, -399 * 365 * 24 * 3600);
}
#[test]
fn test_entity_temporal_serialization() {
let entity = Entity::new(
EntityId::new("test".to_string()),
"Test Entity".to_string(),
"TEST".to_string(),
0.8,
)
.with_temporal_validity(100, 200);
let json = serde_json::to_string(&entity).unwrap();
let deserialized: Entity = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.name, "Test Entity");
assert!(deserialized.temporal_validity.is_some());
let validity = deserialized.temporal_validity.unwrap();
assert_eq!(validity.start, 100);
assert_eq!(validity.end, 200);
}
#[test]
fn test_relationship_with_temporal_type() {
let rel = Relationship::new(
EntityId::new("socrates".to_string()),
EntityId::new("plato".to_string()),
"TAUGHT".to_string(),
0.9,
)
.with_temporal_type(crate::graph::temporal::TemporalRelationType::Caused)
.with_temporal_range(100, 200);
assert!(rel.temporal_type.is_some());
assert!(rel.temporal_range.is_some());
assert!(rel.causal_strength.is_some());
let temporal_type = rel.temporal_type.unwrap();
assert_eq!(
temporal_type,
crate::graph::temporal::TemporalRelationType::Caused
);
let strength = rel.causal_strength.unwrap();
assert_eq!(strength, 0.9); }
#[test]
fn test_relationship_with_causal_strength() {
let rel = Relationship::new(
EntityId::new("a".to_string()),
EntityId::new("b".to_string()),
"INFLUENCED".to_string(),
0.8,
)
.with_temporal_type(crate::graph::temporal::TemporalRelationType::Enabled)
.with_causal_strength(0.75);
assert!(rel.causal_strength.is_some());
assert_eq!(rel.causal_strength.unwrap(), 0.75);
}
#[test]
fn test_relationship_temporal_serialization() {
let rel = Relationship::new(
EntityId::new("source".to_string()),
EntityId::new("target".to_string()),
"CAUSED".to_string(),
0.9,
)
.with_temporal_type(crate::graph::temporal::TemporalRelationType::Caused)
.with_temporal_range(100, 200)
.with_causal_strength(0.95);
let json = serde_json::to_string(&rel).unwrap();
let deserialized: Relationship = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.relation_type, "CAUSED");
assert!(deserialized.temporal_type.is_some());
assert!(deserialized.temporal_range.is_some());
assert!(deserialized.causal_strength.is_some());
let temporal_type = deserialized.temporal_type.unwrap();
assert_eq!(
temporal_type,
crate::graph::temporal::TemporalRelationType::Caused
);
let range = deserialized.temporal_range.unwrap();
assert_eq!(range.start, 100);
assert_eq!(range.end, 200);
assert_eq!(deserialized.causal_strength.unwrap(), 0.95);
}
#[test]
fn test_entity_backward_compatibility() {
let entity = Entity::new(
EntityId::new("test".to_string()),
"Test".to_string(),
"TEST".to_string(),
0.9,
);
assert!(entity.first_mentioned.is_none());
assert!(entity.last_mentioned.is_none());
assert!(entity.temporal_validity.is_none());
let json = serde_json::to_string(&entity).unwrap();
assert!(!json.contains("first_mentioned"));
assert!(!json.contains("last_mentioned"));
assert!(!json.contains("temporal_validity"));
}
#[test]
fn test_relationship_backward_compatibility() {
let rel = Relationship::new(
EntityId::new("a".to_string()),
EntityId::new("b".to_string()),
"RELATED_TO".to_string(),
0.8,
);
assert!(rel.temporal_type.is_none());
assert!(rel.temporal_range.is_none());
assert!(rel.causal_strength.is_none());
let json = serde_json::to_string(&rel).unwrap();
assert!(!json.contains("temporal_type"));
assert!(!json.contains("temporal_range"));
assert!(!json.contains("causal_strength"));
}
}