1pub mod error;
7pub mod metadata;
8
9#[cfg(feature = "async")]
11pub mod registry;
12
13#[cfg(feature = "async")]
15pub mod traits;
16
17#[cfg(test)]
18pub mod test_traits;
19
20pub use error::{ErrorContext, ErrorSeverity, GraphRAGError, Result};
22pub use metadata::ChunkMetadata;
23
24#[cfg(feature = "async")]
25pub use registry::{RegistryBuilder, ServiceConfig, ServiceContext, ServiceRegistry};
26
27#[cfg(feature = "async")]
29pub use traits::*;
30
31pub trait ChunkingStrategy: Send + Sync {
52 fn chunk(&self, text: &str) -> Vec<TextChunk>;
60}
61
62use indexmap::IndexMap;
63use petgraph::{graph::NodeIndex, Graph};
64use std::collections::HashMap;
65
66#[cfg(feature = "pagerank")]
68use sprs::CsMat;
69
70#[cfg(feature = "pagerank")]
73type AdjacencyMatrixResult = (
74 CsMat<f64>,
75 HashMap<EntityId, usize>,
76 HashMap<usize, EntityId>,
77);
78
79#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
81pub struct DocumentId(pub String);
82
83impl DocumentId {
84 pub fn new(id: String) -> Self {
86 Self(id)
87 }
88}
89
90impl std::fmt::Display for DocumentId {
91 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92 write!(f, "{}", self.0)
93 }
94}
95
96impl From<String> for DocumentId {
97 fn from(s: String) -> Self {
98 Self(s)
99 }
100}
101
102impl From<DocumentId> for String {
103 fn from(id: DocumentId) -> Self {
104 id.0
105 }
106}
107
108#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
110pub struct EntityId(pub String);
111
112impl EntityId {
113 pub fn new(id: String) -> Self {
115 Self(id)
116 }
117}
118
119impl std::fmt::Display for EntityId {
120 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
121 write!(f, "{}", self.0)
122 }
123}
124
125impl From<String> for EntityId {
126 fn from(s: String) -> Self {
127 Self(s)
128 }
129}
130
131impl From<EntityId> for String {
132 fn from(id: EntityId) -> Self {
133 id.0
134 }
135}
136
137#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
139pub struct ChunkId(pub String);
140
141impl ChunkId {
142 pub fn new(id: String) -> Self {
144 Self(id)
145 }
146}
147
148impl std::fmt::Display for ChunkId {
149 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150 write!(f, "{}", self.0)
151 }
152}
153
154impl From<String> for ChunkId {
155 fn from(s: String) -> Self {
156 Self(s)
157 }
158}
159
160impl From<ChunkId> for String {
161 fn from(id: ChunkId) -> Self {
162 id.0
163 }
164}
165
166#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
168pub struct Document {
169 pub id: DocumentId,
171 pub title: String,
173 pub content: String,
175 pub metadata: IndexMap<String, String>,
177 pub chunks: Vec<TextChunk>,
179}
180
181#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
183pub struct TextChunk {
184 pub id: ChunkId,
186 pub document_id: DocumentId,
188 pub content: String,
190 pub start_offset: usize,
192 pub end_offset: usize,
194 pub embedding: Option<Vec<f32>>,
196 pub entities: Vec<EntityId>,
198 pub metadata: ChunkMetadata,
200}
201
202#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
204pub struct Entity {
205 pub id: EntityId,
207 pub name: String,
209 pub entity_type: String,
211 pub confidence: f32,
213 pub mentions: Vec<EntityMention>,
215 pub embedding: Option<Vec<f32>>,
217}
218
219#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
221pub struct EntityMention {
222 pub chunk_id: ChunkId,
224 pub start_offset: usize,
226 pub end_offset: usize,
228 pub confidence: f32,
230}
231
232#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
234pub struct Relationship {
235 pub source: EntityId,
237 pub target: EntityId,
239 pub relation_type: String,
241 pub confidence: f32,
243 pub context: Vec<ChunkId>,
245}
246
247#[derive(Debug)]
249pub struct KnowledgeGraph {
250 graph: Graph<Entity, Relationship>,
251 entity_index: HashMap<EntityId, NodeIndex>,
252 documents: IndexMap<DocumentId, Document>,
253 chunks: IndexMap<ChunkId, TextChunk>,
254}
255
256impl KnowledgeGraph {
257 pub fn new() -> Self {
259 Self {
260 graph: Graph::new(),
261 entity_index: HashMap::new(),
262 documents: IndexMap::new(),
263 chunks: IndexMap::new(),
264 }
265 }
266
267 pub fn add_document(&mut self, document: Document) -> Result<()> {
269 let document_id = document.id.clone();
270
271 for chunk in &document.chunks {
273 self.chunks.insert(chunk.id.clone(), chunk.clone());
274 }
275
276 self.documents.insert(document_id, document);
278
279 Ok(())
280 }
281
282 pub fn add_entity(&mut self, entity: Entity) -> Result<NodeIndex> {
284 let entity_id = entity.id.clone();
285 let node_index = self.graph.add_node(entity);
286 self.entity_index.insert(entity_id, node_index);
287 Ok(node_index)
288 }
289
290 pub fn add_relationship(&mut self, relationship: Relationship) -> Result<()> {
292 let source_idx = self.entity_index.get(&relationship.source).ok_or_else(|| {
293 crate::GraphRAGError::GraphConstruction {
294 message: format!("Source entity {} not found", relationship.source),
295 }
296 })?;
297
298 let target_idx = self.entity_index.get(&relationship.target).ok_or_else(|| {
299 crate::GraphRAGError::GraphConstruction {
300 message: format!("Target entity {} not found", relationship.target),
301 }
302 })?;
303
304 self.graph.add_edge(*source_idx, *target_idx, relationship);
305 Ok(())
306 }
307
308 pub fn add_chunk(&mut self, chunk: TextChunk) -> Result<()> {
310 self.chunks.insert(chunk.id.clone(), chunk);
311 Ok(())
312 }
313
314 pub fn get_entity(&self, id: &EntityId) -> Option<&Entity> {
316 let node_idx = self.entity_index.get(id)?;
317 self.graph.node_weight(*node_idx)
318 }
319
320 pub fn get_document(&self, id: &DocumentId) -> Option<&Document> {
322 self.documents.get(id)
323 }
324
325 pub fn get_chunk(&self, id: &ChunkId) -> Option<&TextChunk> {
327 self.chunks.get(id)
328 }
329
330 pub fn get_entity_mut(&mut self, id: &EntityId) -> Option<&mut Entity> {
332 let node_idx = self.entity_index.get(id)?;
333 self.graph.node_weight_mut(*node_idx)
334 }
335
336 pub fn get_chunk_mut(&mut self, id: &ChunkId) -> Option<&mut TextChunk> {
338 self.chunks.get_mut(id)
339 }
340
341 pub fn entities(&self) -> impl Iterator<Item = &Entity> {
343 self.graph.node_weights()
344 }
345
346 pub fn entities_mut(&mut self) -> impl Iterator<Item = &mut Entity> {
348 self.graph.node_weights_mut()
349 }
350
351 pub fn documents(&self) -> impl Iterator<Item = &Document> {
353 self.documents.values()
354 }
355
356 pub fn documents_mut(&mut self) -> impl Iterator<Item = &mut Document> {
358 self.documents.values_mut()
359 }
360
361 pub fn chunks(&self) -> impl Iterator<Item = &TextChunk> {
363 self.chunks.values()
364 }
365
366 pub fn chunks_mut(&mut self) -> impl Iterator<Item = &mut TextChunk> {
368 self.chunks.values_mut()
369 }
370
371 pub fn get_neighbors(&self, entity_id: &EntityId) -> Vec<(&Entity, &Relationship)> {
373 use petgraph::visit::EdgeRef;
374
375 if let Some(&node_idx) = self.entity_index.get(entity_id) {
376 self.graph
377 .edges(node_idx)
378 .filter_map(|edge| {
379 let target_entity = self.graph.node_weight(edge.target())?;
380 Some((target_entity, edge.weight()))
381 })
382 .collect()
383 } else {
384 Vec::new()
385 }
386 }
387
388 pub fn get_all_relationships(&self) -> Vec<&Relationship> {
390 self.graph.edge_weights().collect()
391 }
392
393 pub fn load_from_json(file_path: &str) -> Result<Self> {
395 use std::fs;
396
397 let json_str = fs::read_to_string(file_path)?;
399 let json_data = json::parse(&json_str)
400 .map_err(|e| GraphRAGError::Config {
401 message: format!("Failed to parse JSON: {}", e),
402 })?;
403
404 let mut kg = KnowledgeGraph::new();
405
406 if json_data["entities"].is_array() {
408 for entity_obj in json_data["entities"].members() {
409 let id = EntityId::new(entity_obj["id"].as_str().unwrap_or("").to_string());
410 let name = entity_obj["name"].as_str().unwrap_or("").to_string();
411 let entity_type = entity_obj["type"].as_str().unwrap_or("").to_string();
412 let confidence = entity_obj["confidence"].as_f32().unwrap_or(0.0);
413
414 let mut mentions = Vec::new();
416 if entity_obj["mentions"].is_array() {
417 for mention_obj in entity_obj["mentions"].members() {
418 let mention = EntityMention {
419 chunk_id: ChunkId::new(mention_obj["chunk_id"].as_str().unwrap_or("").to_string()),
420 start_offset: mention_obj["start_offset"].as_usize().unwrap_or(0),
421 end_offset: mention_obj["end_offset"].as_usize().unwrap_or(0),
422 confidence: mention_obj["confidence"].as_f32().unwrap_or(0.0),
423 };
424 mentions.push(mention);
425 }
426 }
427
428 let entity = Entity {
429 id,
430 name,
431 entity_type,
432 confidence,
433 mentions,
434 embedding: None, };
436
437 kg.add_entity(entity)?;
438 }
439 }
440
441 if json_data["relationships"].is_array() {
443 for rel_obj in json_data["relationships"].members() {
444 let source = EntityId::new(rel_obj["source_id"].as_str().unwrap_or("").to_string());
445 let target = EntityId::new(rel_obj["target_id"].as_str().unwrap_or("").to_string());
446 let relation_type = rel_obj["relation_type"].as_str().unwrap_or("").to_string();
447 let confidence = rel_obj["confidence"].as_f32().unwrap_or(0.0);
448
449 let mut context = Vec::new();
450 if rel_obj["context_chunks"].is_array() {
451 for chunk_id in rel_obj["context_chunks"].members() {
452 if let Some(chunk_id_str) = chunk_id.as_str() {
453 context.push(ChunkId::new(chunk_id_str.to_string()));
454 }
455 }
456 }
457
458 let relationship = Relationship {
459 source,
460 target,
461 relation_type,
462 confidence,
463 context,
464 };
465
466 let _ = kg.add_relationship(relationship);
468 }
469 }
470
471 if json_data["chunks"].is_array() {
473 for chunk_obj in json_data["chunks"].members() {
474 let id = ChunkId::new(chunk_obj["id"].as_str().unwrap_or("").to_string());
475 let document_id = DocumentId::new(chunk_obj["document_id"].as_str().unwrap_or("").to_string());
476 let start_offset = chunk_obj["start_offset"].as_usize().unwrap_or(0);
477 let end_offset = chunk_obj["end_offset"].as_usize().unwrap_or(0);
478
479 let content = chunk_obj["content"].as_str().unwrap_or("").to_string();
481
482 let mut entities = Vec::new();
484 if chunk_obj["entities"].is_array() {
485 for entity_id in chunk_obj["entities"].members() {
486 if let Some(entity_id_str) = entity_id.as_str() {
487 entities.push(EntityId::new(entity_id_str.to_string()));
488 }
489 }
490 }
491
492 let chunk = TextChunk {
493 id,
494 document_id,
495 content,
496 start_offset,
497 end_offset,
498 embedding: None, entities,
500 metadata: ChunkMetadata::default(),
501 };
502 kg.add_chunk(chunk)?;
503 }
504 }
505
506 if json_data["documents"].is_array() {
508 for doc_obj in json_data["documents"].members() {
509 let id = DocumentId::new(doc_obj["id"].as_str().unwrap_or("").to_string());
510 let title = doc_obj["title"].as_str().unwrap_or("").to_string();
511 let content = doc_obj["content"].as_str().unwrap_or("").to_string();
512
513 let mut metadata = IndexMap::new();
515 if doc_obj["metadata"].is_object() {
516 for (key, value) in doc_obj["metadata"].entries() {
517 metadata.insert(key.to_string(), value.as_str().unwrap_or("").to_string());
518 }
519 }
520
521 let document = Document {
522 id,
523 title,
524 content,
525 metadata,
526 chunks: vec![], };
528 kg.add_document(document)?;
529 }
530 }
531
532 Ok(kg)
533 }
534
535 pub fn save_to_json(&self, file_path: &str) -> Result<()> {
537 use std::fs;
538
539 let mut json_data = json::JsonValue::new_object();
541
542 json_data["metadata"] = json::object! {
544 "format_version" => "2.0",
545 "created_at" => chrono::Utc::now().to_rfc3339(),
546 "total_entities" => self.entities().count(),
547 "total_relationships" => self.get_all_relationships().len(),
548 "total_chunks" => self.chunks().count(),
549 "total_documents" => self.documents().count()
550 };
551
552 let mut entities_array = json::JsonValue::new_array();
554 for entity in self.entities() {
555 let mut entity_obj = json::object! {
556 "id" => entity.id.to_string(),
557 "name" => entity.name.clone(),
558 "type" => entity.entity_type.clone(),
559 "confidence" => entity.confidence,
560 "mentions_count" => entity.mentions.len()
561 };
562
563 let mut mentions_array = json::JsonValue::new_array();
565 for mention in &entity.mentions {
566 mentions_array
567 .push(json::object! {
568 "chunk_id" => mention.chunk_id.to_string(),
569 "start_offset" => mention.start_offset,
570 "end_offset" => mention.end_offset,
571 "confidence" => mention.confidence
572 })
573 .unwrap();
574 }
575 entity_obj["mentions"] = mentions_array;
576
577 if let Some(embedding) = &entity.embedding {
579 entity_obj["has_embedding"] = true.into();
580 entity_obj["embedding_dimension"] = embedding.len().into();
581 let sample_embedding: Vec<f32> = embedding.iter().take(5).cloned().collect();
583 entity_obj["embedding_sample"] = sample_embedding.into();
584 } else {
585 entity_obj["has_embedding"] = false.into();
586 }
587
588 entities_array.push(entity_obj).unwrap();
589 }
590 json_data["entities"] = entities_array;
591
592 let mut relationships_array = json::JsonValue::new_array();
594 for relationship in self.get_all_relationships() {
595 let rel_obj = json::object! {
596 "source_id" => relationship.source.to_string(),
597 "target_id" => relationship.target.to_string(),
598 "relation_type" => relationship.relation_type.clone(),
599 "confidence" => relationship.confidence,
600 "context_chunks" => relationship.context.iter()
601 .map(|c| c.to_string())
602 .collect::<Vec<String>>()
603 };
604 relationships_array.push(rel_obj).unwrap();
605 }
606 json_data["relationships"] = relationships_array;
607
608 let mut chunks_array = json::JsonValue::new_array();
610 for chunk in self.chunks() {
611 let mut chunk_obj = json::object! {
612 "id" => chunk.id.to_string(),
613 "document_id" => chunk.document_id.to_string(),
614 "content" => chunk.content.clone(), "start_offset" => chunk.start_offset,
616 "end_offset" => chunk.end_offset
617 };
618
619 let entities_list: Vec<String> = chunk.entities.iter()
621 .map(|e| e.to_string())
622 .collect();
623 chunk_obj["entities"] = entities_list.into();
624
625 chunk_obj["has_embedding"] = chunk.embedding.is_some().into();
627 if let Some(embedding) = &chunk.embedding {
628 chunk_obj["embedding_dimension"] = embedding.len().into();
629 }
630
631 chunks_array.push(chunk_obj).unwrap();
632 }
633 json_data["chunks"] = chunks_array;
634
635 let mut documents_array = json::JsonValue::new_array();
637 for document in self.documents() {
638 let mut meta_obj = json::JsonValue::new_object();
639 for (key, value) in &document.metadata {
640 meta_obj[key] = value.clone().into();
641 }
642
643 let doc_obj = json::object! {
644 "id" => document.id.to_string(),
645 "title" => document.title.clone(),
646 "content" => document.content.clone(), "metadata" => meta_obj
648 };
649 documents_array.push(doc_obj).unwrap();
650 }
651 json_data["documents"] = documents_array;
652
653 fs::write(file_path, json_data.dump())?;
655 tracing::info!("Knowledge graph saved to {file_path}");
656
657 Ok(())
658 }
659
660 pub fn find_entities_by_name(&self, name: &str) -> impl Iterator<Item = &Entity> {
662 let name_lower = name.to_lowercase();
663 self.entities()
664 .filter(move |entity| entity.name.to_lowercase().contains(&name_lower))
665 }
666
667 pub fn get_entity_by_id(&self, id: &str) -> Option<&Entity> {
669 let entity_id = EntityId::new(id.to_string());
670 self.get_entity(&entity_id)
671 }
672
673 pub fn get_entity_relationships(&self, entity_id: &str) -> impl Iterator<Item = &Relationship> {
675 let entity_id = EntityId::new(entity_id.to_string());
676 if let Some(&node_idx) = self.entity_index.get(&entity_id) {
677 self.graph
678 .edges(node_idx)
679 .map(|edge| edge.weight())
680 .collect::<Vec<_>>()
681 .into_iter()
682 } else {
683 Vec::new().into_iter()
684 }
685 }
686
687 pub fn find_relationship_path(
689 &self,
690 entity1: &str,
691 entity2: &str,
692 _max_depth: usize,
693 ) -> Vec<String> {
694 let entity1_id = EntityId::new(entity1.to_string());
695 let entity2_id = EntityId::new(entity2.to_string());
696
697 let node1 = self.entity_index.get(&entity1_id);
698 let node2 = self.entity_index.get(&entity2_id);
699
700 if let (Some(&start), Some(&end)) = (node1, node2) {
701 use petgraph::visit::EdgeRef;
703 for edge in self.graph.edges(start) {
704 if edge.target() == end {
705 return vec![edge.weight().relation_type.clone()];
706 }
707 }
708 }
709
710 Vec::new() }
712
713 #[cfg(feature = "pagerank")]
716 pub fn build_pagerank_calculator(
717 &self,
718 ) -> Result<crate::graph::pagerank::PersonalizedPageRank> {
719 let config = crate::graph::pagerank::PageRankConfig::default();
720 let (adjacency_matrix, node_mapping, reverse_mapping) = self.build_adjacency_matrix()?;
721
722 Ok(crate::graph::pagerank::PersonalizedPageRank::new(
723 config,
724 adjacency_matrix,
725 node_mapping,
726 reverse_mapping,
727 ))
728 }
729
730 #[cfg(feature = "pagerank")]
733 fn build_adjacency_matrix(&self) -> Result<AdjacencyMatrixResult> {
734 let nodes: Vec<EntityId> = self.entities().map(|e| e.id.clone()).collect();
735 let node_mapping: HashMap<EntityId, usize> = nodes
736 .iter()
737 .enumerate()
738 .map(|(i, id)| (id.clone(), i))
739 .collect();
740 let reverse_mapping: HashMap<usize, EntityId> = nodes
741 .iter()
742 .enumerate()
743 .map(|(i, id)| (i, id.clone()))
744 .collect();
745
746 let mut row_indices = Vec::new();
748 let mut col_indices = Vec::new();
749 let mut values = Vec::new();
750
751 for relationship in self.get_all_relationships() {
752 if let (Some(&from_idx), Some(&to_idx)) = (
753 node_mapping.get(&relationship.source),
754 node_mapping.get(&relationship.target),
755 ) {
756 row_indices.push(from_idx);
757 col_indices.push(to_idx);
758 values.push(relationship.confidence as f64);
759 }
760 }
761
762 let matrix = if row_indices.is_empty() {
763 sprs::CsMat::zero((nodes.len(), nodes.len()))
765 } else {
766 let mut triplet_mat = sprs::TriMat::new((nodes.len(), nodes.len()));
768 for ((row, col), val) in row_indices
769 .into_iter()
770 .zip(col_indices.into_iter())
771 .zip(values.into_iter())
772 {
773 triplet_mat.add_triplet(row, col, val);
774 }
775 triplet_mat.to_csr()
776 };
777
778 Ok((matrix, node_mapping, reverse_mapping))
779 }
780
781 pub fn entity_count(&self) -> usize {
783 self.entities().count()
784 }
785
786 pub fn relationship_count(&self) -> usize {
788 self.get_all_relationships().len()
789 }
790
791 pub fn document_count(&self) -> usize {
793 self.documents().count()
794 }
795
796 pub fn relationships(&self) -> impl Iterator<Item = &Relationship> {
798 self.graph.edge_weights()
799 }
800
801 pub fn clear_entities_and_relationships(&mut self) {
805 self.graph.clear();
806 self.entity_index.clear();
807 }
809
810 #[cfg(feature = "leiden")]
814 pub fn to_leiden_graph(&self) -> petgraph::Graph<String, f32, petgraph::Undirected> {
815 let mut graph = Graph::new_undirected();
816 let mut node_map = HashMap::new();
817
818 for entity in self.entities() {
820 let idx = graph.add_node(entity.name.clone());
821 node_map.insert(entity.id.clone(), idx);
822 }
823
824 for rel in self.get_all_relationships() {
826 if let (Some(&src), Some(&tgt)) = (
827 node_map.get(&rel.source),
828 node_map.get(&rel.target)
829 ) {
830 graph.add_edge(src, tgt, rel.confidence);
831 }
832 }
833
834 graph
835 }
836
837 #[cfg(feature = "leiden")]
862 pub fn detect_hierarchical_communities(
863 &self,
864 config: crate::graph::leiden::LeidenConfig,
865 ) -> Result<crate::graph::leiden::HierarchicalCommunities> {
866 use crate::graph::leiden::LeidenCommunityDetector;
867
868 let leiden_graph = self.to_leiden_graph();
870
871 let detector = LeidenCommunityDetector::new(config);
873 let mut communities = detector.detect_communities(&leiden_graph)?;
874
875 communities.entity_mapping = Some(self.build_entity_mapping());
877
878 Ok(communities)
879 }
880
881 #[cfg(feature = "leiden")]
884 fn build_entity_mapping(&self) -> HashMap<String, crate::graph::leiden::EntityMetadata> {
885 use crate::graph::leiden::EntityMetadata;
886
887 self.entities()
888 .map(|entity| {
889 let metadata = EntityMetadata {
890 id: entity.id.to_string(),
891 name: entity.name.clone(),
892 entity_type: entity.entity_type.clone(),
893 confidence: entity.confidence,
894 mention_count: entity.mentions.len(),
895 };
896 (entity.name.clone(), metadata)
897 })
898 .collect()
899 }
900}
901
902impl Default for KnowledgeGraph {
903 fn default() -> Self {
904 Self::new()
905 }
906}
907
908impl Document {
909 pub fn new(id: DocumentId, title: String, content: String) -> Self {
911 Self {
912 id,
913 title,
914 content,
915 metadata: IndexMap::new(),
916 chunks: Vec::new(),
917 }
918 }
919
920 pub fn with_metadata(mut self, key: String, value: String) -> Self {
922 self.metadata.insert(key, value);
923 self
924 }
925
926 pub fn with_chunks(mut self, chunks: Vec<TextChunk>) -> Self {
928 self.chunks = chunks;
929 self
930 }
931}
932
933impl TextChunk {
934 pub fn new(
936 id: ChunkId,
937 document_id: DocumentId,
938 content: String,
939 start_offset: usize,
940 end_offset: usize,
941 ) -> Self {
942 Self {
943 id,
944 document_id,
945 content,
946 start_offset,
947 end_offset,
948 embedding: None,
949 entities: Vec::new(),
950 metadata: ChunkMetadata::default(),
951 }
952 }
953
954 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
956 self.embedding = Some(embedding);
957 self
958 }
959
960 pub fn with_entities(mut self, entities: Vec<EntityId>) -> Self {
962 self.entities = entities;
963 self
964 }
965
966 pub fn with_metadata(mut self, metadata: ChunkMetadata) -> Self {
968 self.metadata = metadata;
969 self
970 }
971}
972
973impl Entity {
974 pub fn new(id: EntityId, name: String, entity_type: String, confidence: f32) -> Self {
976 Self {
977 id,
978 name,
979 entity_type,
980 confidence,
981 mentions: Vec::new(),
982 embedding: None,
983 }
984 }
985
986 pub fn with_mentions(mut self, mentions: Vec<EntityMention>) -> Self {
988 self.mentions = mentions;
989 self
990 }
991
992 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
994 self.embedding = Some(embedding);
995 self
996 }
997}