1#![warn(missing_docs)]
42#![warn(clippy::all)]
43pub mod config;
52pub mod core;
54pub mod entity;
56#[cfg(feature = "async")]
58pub mod generation;
59pub mod graph;
61pub mod retrieval;
63#[cfg(any(
65 feature = "memory-storage",
66 feature = "persistent-storage",
67 feature = "async"
68))]
69pub mod storage;
70pub mod text;
72pub mod vector;
74
75pub mod builder;
77pub mod embeddings;
79pub mod nlp;
81pub mod ollama;
83pub mod persistence;
85pub mod query;
87pub mod summarization;
89
90pub mod pipeline;
93
94#[cfg(feature = "parallel-processing")]
96pub mod parallel;
97
98#[cfg(feature = "lightrag")]
99pub mod lightrag;
101
102pub mod pipeline_executor;
104
105pub mod reranking;
108
109pub mod monitoring;
111
112pub mod critic;
114
115pub mod evaluation;
117
118#[cfg(feature = "async")]
120pub mod optimization;
121
122#[cfg(feature = "api")]
124pub mod api;
125
126pub mod inference;
128
129#[cfg(feature = "corpus-processing")]
131pub mod corpus;
132
133#[cfg(feature = "async")]
135pub mod async_graphrag;
137
138#[cfg(feature = "async")]
139pub mod async_processing;
141
142#[cfg(feature = "caching")]
143pub mod caching;
145
146#[cfg(feature = "function-calling")]
147pub mod function_calling;
149
150#[cfg(feature = "incremental")]
151pub mod incremental;
153
154#[cfg(feature = "rograg")]
155pub mod rograg;
157
158pub mod prelude {
180 pub use crate::GraphRAG;
182
183 pub use crate::builder::GraphRAGBuilder;
185 pub use crate::builder::TypedBuilder;
186 pub use crate::config::Config;
187
188 pub use crate::core::{GraphRAGError, Result};
190
191 pub use crate::core::{
193 ChunkId, Document, DocumentId, Entity, EntityId, EntityMention, KnowledgeGraph,
194 Relationship, TextChunk,
195 };
196
197 pub use crate::retrieval::SearchResult;
199 pub use crate::retrieval::{ExplainedAnswer, ReasoningStep, SourceReference, SourceType};
200
201 pub use crate::pipeline_executor::{PipelineExecutor, PipelineReport};
203
204 pub use crate::config::setconfig::SetConfig;
206}
207
208pub use crate::config::Config;
210pub use crate::core::{
211 ChunkId, Document, DocumentId, Entity, EntityId, EntityMention, ErrorContext, ErrorSeverity,
212 ErrorSuggestion, GraphRAGError, KnowledgeGraph, Relationship, Result, TextChunk,
213};
214
215#[cfg(feature = "async")]
217pub use crate::core::traits::{
218 Embedder, EntityExtractor, GraphStore, LanguageModel, Retriever, Storage, VectorStore,
219};
220
221#[cfg(feature = "memory-storage")]
223pub use crate::storage::MemoryStorage;
224
225pub use crate::builder::GraphRAGBuilder;
227#[cfg(feature = "lightrag")]
232pub use crate::lightrag::{
233 DualLevelKeywords, DualLevelRetriever, DualRetrievalConfig, DualRetrievalResults,
234 KeywordExtractor, KeywordExtractorConfig, MergeStrategy, SemanticSearcher,
235};
236
237#[cfg(feature = "pagerank")]
238pub use crate::graph::pagerank::{PageRankConfig, PersonalizedPageRank};
239
240#[cfg(feature = "leiden")]
241pub use crate::graph::leiden::{HierarchicalCommunities, LeidenCommunityDetector, LeidenConfig};
242
243#[cfg(feature = "cross-encoder")]
244pub use crate::reranking::cross_encoder::{
245 ConfidenceCrossEncoder, CrossEncoder, CrossEncoderConfig, RankedResult, RerankingStats,
246};
247
248#[cfg(feature = "pagerank")]
249pub use crate::retrieval::pagerank_retrieval::{PageRankRetrievalSystem, ScoredResult};
250
251#[cfg(feature = "pagerank")]
252pub use crate::retrieval::hipporag_ppr::{Fact, HippoRAGConfig, HippoRAGRetriever};
253
254pub struct GraphRAG {
286 config: Config,
287 knowledge_graph: Option<KnowledgeGraph>,
288 retrieval_system: Option<retrieval::RetrievalSystem>,
289 query_planner: Option<query::planner::QueryPlanner>,
290 critic: Option<critic::Critic>,
291 #[cfg(feature = "parallel-processing")]
292 #[allow(dead_code)]
293 parallel_processor: Option<parallel::ParallelProcessor>,
294}
295
296impl GraphRAG {
297 pub fn new(config: Config) -> Result<Self> {
299 Ok(Self {
300 config,
301 knowledge_graph: None,
302 retrieval_system: None,
303 query_planner: None,
304 critic: None,
305 #[cfg(feature = "parallel-processing")]
306 parallel_processor: None,
307 })
308 }
309
310 pub fn default_local() -> Result<Self> {
313 let mut config = Config::default();
314 config.ollama.enabled = true;
316 Self::new(config)
319 }
320
321 pub fn builder() -> crate::builder::GraphRAGBuilder {
336 crate::builder::GraphRAGBuilder::new()
337 }
338
339 pub fn initialize(&mut self) -> Result<()> {
345 let loaded = self.try_load_from_workspace();
347
348 if !loaded {
349 self.knowledge_graph = Some(KnowledgeGraph::new());
350 }
351
352 self.retrieval_system = Some(retrieval::RetrievalSystem::new(&self.config)?);
353
354 if self.config.ollama.enabled {
355 let client = ollama::OllamaClient::new(self.config.ollama.clone());
356 self.query_planner = Some(query::planner::QueryPlanner::new(client));
357 }
358
359 Ok(())
360 }
361
362 fn try_load_from_workspace(&mut self) -> bool {
365 if !self.config.auto_save.enabled {
366 return false;
367 }
368 let base_dir = match &self.config.auto_save.base_dir {
369 Some(d) => d.clone(),
370 None => return false,
371 };
372 let workspace_name = self
373 .config
374 .auto_save
375 .workspace_name
376 .as_deref()
377 .unwrap_or("default");
378
379 let manager = match persistence::WorkspaceManager::new(&base_dir) {
380 Ok(m) => m,
381 Err(e) => {
382 tracing::warn!("Could not open workspace base dir '{}': {}", base_dir, e);
383 return false;
384 },
385 };
386
387 if !manager.workspace_exists(workspace_name) {
388 return false;
389 }
390
391 match manager.load_graph(workspace_name) {
392 Ok(graph) => {
393 tracing::info!(
394 "Loaded graph from workspace '{}' ({} entities, {} relationships)",
395 workspace_name,
396 graph.entity_count(),
397 graph.relationship_count(),
398 );
399 self.knowledge_graph = Some(graph);
400 true
401 },
402 Err(e) => {
403 tracing::warn!(
404 "Failed to load graph from workspace '{}': {}",
405 workspace_name,
406 e
407 );
408 false
409 },
410 }
411 }
412
413 pub fn save_to_workspace(&self) -> Result<()> {
416 if !self.config.auto_save.enabled {
417 return Ok(());
418 }
419 let base_dir = match &self.config.auto_save.base_dir {
420 Some(d) => d,
421 None => return Ok(()),
422 };
423 let workspace_name = self
424 .config
425 .auto_save
426 .workspace_name
427 .as_deref()
428 .unwrap_or("default");
429
430 let graph = self
431 .knowledge_graph
432 .as_ref()
433 .ok_or_else(|| GraphRAGError::Config {
434 message: "Knowledge graph not initialized".to_string(),
435 })?;
436
437 let manager = persistence::WorkspaceManager::new(base_dir)?;
438 manager.save_graph(graph, workspace_name)?;
439
440 tracing::info!(
441 "Saved graph to workspace '{}' in '{}' ({} entities, {} relationships)",
442 workspace_name,
443 base_dir,
444 graph.entity_count(),
445 graph.relationship_count(),
446 );
447 Ok(())
448 }
449
450 pub fn add_document_from_text(&mut self, text: &str) -> Result<()> {
452 use crate::text::TextProcessor;
453 use indexmap::IndexMap;
454
455 let doc_id = DocumentId::new(format!("doc_{}", uuid::Uuid::new_v4().simple()));
457
458 let document = Document {
459 id: doc_id,
460 title: "Document".to_string(),
461 content: text.to_string(),
462 metadata: IndexMap::new(),
463 chunks: Vec::new(),
464 };
465
466 let text_processor =
467 TextProcessor::new(self.config.text.chunk_size, self.config.text.chunk_overlap)?;
468 let chunks = text_processor.chunk_text(&document)?;
469
470 let document_with_chunks = Document { chunks, ..document };
471
472 self.add_document(document_with_chunks)
473 }
474
475 pub fn add_document(&mut self, document: Document) -> Result<()> {
477 let graph = self
478 .knowledge_graph
479 .as_mut()
480 .ok_or_else(|| GraphRAGError::Config {
481 message: "Knowledge graph not initialized".to_string(),
482 })?;
483
484 graph.add_document(document)
485 }
486
487 pub fn clear_graph(&mut self) -> Result<()> {
492 let graph = self
493 .knowledge_graph
494 .as_mut()
495 .ok_or_else(|| GraphRAGError::Config {
496 message: "Knowledge graph not initialized".to_string(),
497 })?;
498
499 #[cfg(feature = "tracing")]
500 tracing::info!("Clearing knowledge graph (preserving documents and chunks)");
501
502 graph.clear_entities_and_relationships();
503 Ok(())
504 }
505
506 #[cfg(feature = "async")]
517 pub async fn build_graph(&mut self) -> Result<()> {
518 use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
519
520 let suppress = self.config.suppress_progress_bars;
523 let make_pb = move |total: u64, style: ProgressStyle| -> ProgressBar {
524 let pb = ProgressBar::new(total).with_style(style);
525 if suppress {
526 pb.set_draw_target(ProgressDrawTarget::hidden());
527 }
528 pb
529 };
530
531 let graph = self
532 .knowledge_graph
533 .as_mut()
534 .ok_or_else(|| GraphRAGError::Config {
535 message: "Knowledge graph not initialized".to_string(),
536 })?;
537
538 let chunks: Vec<_> = graph.chunks().cloned().collect();
539 let total_chunks = chunks.len();
540
541 #[cfg(feature = "tracing")]
549 tracing::info!(
550 "build_graph() - Config state: approach='{}', use_gleaning={}, ollama.enabled={}",
551 self.config.approach,
552 self.config.entities.use_gleaning,
553 self.config.ollama.enabled
554 );
555
556 if self.config.entities.use_gleaning && self.config.ollama.enabled {
557 #[cfg(feature = "async")]
559 {
560 use crate::entity::GleaningEntityExtractor;
561 use crate::ollama::OllamaClient;
562
563 #[cfg(feature = "tracing")]
564 tracing::info!(
565 "Using LLM-based entity extraction with gleaning (max_rounds: {})",
566 self.config.entities.max_gleaning_rounds
567 );
568
569 let client = OllamaClient::new(self.config.ollama.clone());
571
572 let gleaning_config = crate::entity::GleaningConfig {
574 max_gleaning_rounds: self.config.entities.max_gleaning_rounds,
575 completion_threshold: 0.8,
576 entity_confidence_threshold: self.config.entities.min_confidence as f64,
577 use_llm_completion_check: true,
578 entity_types: if self.config.entities.entity_types.is_empty() {
579 vec![
580 "PERSON".to_string(),
581 "ORGANIZATION".to_string(),
582 "LOCATION".to_string(),
583 ]
584 } else {
585 self.config.entities.entity_types.clone()
586 },
587 temperature: 0.1,
588 max_tokens: 1500,
589 };
590
591 let extractor = GleaningEntityExtractor::new(client.clone(), gleaning_config);
593
594 let rel_extractor = if self.config.entities.enable_triple_reflection {
596 Some(crate::entity::LLMRelationshipExtractor::new(Some(
597 &self.config.ollama,
598 ))?)
599 } else {
600 None
601 };
602
603 let pb = make_pb(total_chunks as u64,
604 ProgressStyle::default_bar()
605 .template(" [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} chunks ({eta})")
606 .expect("Invalid progress bar template")
607 .progress_chars("=>-")
608 );
609 pb.set_message("Extracting entities with LLM");
610
611 for (idx, chunk) in chunks.iter().enumerate() {
613 pb.set_message(format!(
614 "Chunk {}/{} (gleaning with {} rounds)",
615 idx + 1,
616 total_chunks,
617 self.config.entities.max_gleaning_rounds
618 ));
619
620 #[cfg(feature = "tracing")]
621 tracing::info!("Processing chunk {}/{} (LLM)", idx + 1, total_chunks);
622
623 let (entities, relationships) = extractor.extract_with_gleaning(chunk).await?;
624
625 let entity_map: std::collections::HashMap<_, _> = entities
627 .iter()
628 .map(|e| (e.id.clone(), e.name.clone()))
629 .collect();
630
631 for entity in entities {
633 graph.add_entity(entity)?;
634 }
635
636 if let Some(ref validator) = rel_extractor {
638 #[cfg(feature = "tracing")]
639 tracing::info!(
640 "Triple reflection enabled: validating {} relationships",
641 relationships.len()
642 );
643
644 let mut validated_count = 0;
645 let mut filtered_count = 0;
646
647 for relationship in relationships {
648 let source_name = entity_map
650 .get(&relationship.source)
651 .or_else(|| {
652 graph
653 .entities()
654 .find(|e| e.id == relationship.source)
655 .map(|e| &e.name)
656 })
657 .map(|s| s.as_str())
658 .unwrap_or(relationship.source.0.as_str());
659 let target_name = entity_map
660 .get(&relationship.target)
661 .or_else(|| {
662 graph
663 .entities()
664 .find(|e| e.id == relationship.target)
665 .map(|e| &e.name)
666 })
667 .map(|s| s.as_str())
668 .unwrap_or(relationship.target.0.as_str());
669
670 match validator
672 .validate_triple(
673 source_name,
674 &relationship.relation_type,
675 target_name,
676 &chunk.content,
677 )
678 .await
679 {
680 Ok(validation) => {
681 if validation.is_valid
682 && validation.confidence
683 >= self.config.entities.validation_min_confidence
684 {
685 if let Err(e) = graph.add_relationship(relationship) {
687 #[cfg(feature = "tracing")]
688 tracing::debug!(
689 "Failed to add validated relationship: {}",
690 e
691 );
692 } else {
693 validated_count += 1;
694 }
695 } else {
696 filtered_count += 1;
698 #[cfg(feature = "tracing")]
699 tracing::debug!(
700 "Filtered relationship {} --[{}]--> {} (valid={}, conf={:.2}): {}",
701 source_name, relationship.relation_type, target_name,
702 validation.is_valid, validation.confidence, validation.reason
703 );
704 }
705 },
706 Err(e) => {
707 #[cfg(feature = "tracing")]
709 tracing::warn!(
710 "Validation error, adding relationship anyway: {}",
711 e
712 );
713 let _ = graph.add_relationship(relationship);
714 },
715 }
716 }
717
718 #[cfg(feature = "tracing")]
719 tracing::info!(
720 "Triple reflection complete: {} validated, {} filtered",
721 validated_count,
722 filtered_count
723 );
724 } else {
725 for relationship in relationships {
727 if let Err(e) = graph.add_relationship(relationship) {
728 #[cfg(feature = "tracing")]
729 tracing::warn!(
730 "Failed to add relationship: {} -> {} ({}). Error: {}",
731 e.to_string().split("entity ").nth(1).unwrap_or("unknown"),
732 e.to_string().split("entity ").nth(2).unwrap_or("unknown"),
733 "relationship",
734 e
735 );
736 }
737 }
738 }
739
740 pb.inc(1);
741 }
742
743 pb.finish_with_message("Entity extraction complete");
744
745 if self.config.entities.use_atomic_facts {
747 use crate::entity::AtomicFactExtractor;
748
749 #[cfg(feature = "tracing")]
750 tracing::info!("Starting atomic fact extraction (ATOM methodology)");
751
752 let atomic_extractor = AtomicFactExtractor::new(client.clone())
753 .with_max_tokens(self.config.entities.max_fact_tokens);
754
755 let pb_atomic = make_pb(total_chunks as u64,
756 ProgressStyle::default_bar()
757 .template(" [{elapsed_precise}] [{bar:40.magenta/blue}] {pos}/{len} atomic facts ({eta})")
758 .expect("Invalid progress bar template")
759 .progress_chars("=>-")
760 );
761 pb_atomic.set_message("Extracting atomic facts");
762
763 let mut total_facts = 0;
764 let mut total_atomic_entities = 0;
765 let mut total_atomic_relationships = 0;
766
767 for (idx, chunk) in chunks.iter().enumerate() {
768 pb_atomic.set_message(format!(
769 "Chunk {}/{} (extracting atomic facts)",
770 idx + 1,
771 total_chunks
772 ));
773
774 #[cfg(feature = "tracing")]
775 tracing::info!("Processing chunk {}/{} (Atomic)", idx + 1, total_chunks);
776
777 match atomic_extractor.extract_atomic_facts(chunk).await {
778 Ok(facts) => {
779 total_facts += facts.len();
780
781 let (atomic_entities, atomic_relationships) =
783 atomic_extractor.atomics_to_graph_elements(facts, &chunk.id);
784
785 total_atomic_entities += atomic_entities.len();
786 total_atomic_relationships += atomic_relationships.len();
787
788 for entity in atomic_entities {
790 if let Err(e) = graph.add_entity(entity) {
791 #[cfg(feature = "tracing")]
792 tracing::debug!("Failed to add atomic entity: {}", e);
793 }
794 }
795
796 for relationship in atomic_relationships {
798 if let Err(e) = graph.add_relationship(relationship) {
799 #[cfg(feature = "tracing")]
800 tracing::debug!("Failed to add atomic relationship: {}", e);
801 }
802 }
803 },
804 Err(e) => {
805 #[cfg(feature = "tracing")]
806 tracing::warn!(
807 chunk_id = %chunk.id,
808 error = %e,
809 "Atomic fact extraction failed for chunk"
810 );
811 },
812 }
813
814 pb_atomic.inc(1);
815 }
816
817 pb_atomic.finish_with_message(format!(
818 "Atomic extraction complete: {} facts → {} entities, {} relationships",
819 total_facts, total_atomic_entities, total_atomic_relationships
820 ));
821
822 #[cfg(feature = "tracing")]
823 tracing::info!(
824 facts_extracted = total_facts,
825 atomic_entities = total_atomic_entities,
826 atomic_relationships = total_atomic_relationships,
827 "ATOM atomic fact extraction complete"
828 );
829 }
830 }
831 } else if self.config.ollama.enabled {
832 #[cfg(feature = "async")]
838 {
839 use crate::entity::llm_extractor::LLMEntityExtractor;
840 use crate::ollama::OllamaClient;
841
842 #[cfg(feature = "tracing")]
843 tracing::info!(
844 "Using LLM single-pass entity extraction (no gleaning, keep_alive={:?})",
845 self.config.ollama.keep_alive,
846 );
847
848 let client = OllamaClient::new(self.config.ollama.clone());
849 let entity_types = if self.config.entities.entity_types.is_empty() {
850 vec![
851 "PERSON".to_string(),
852 "ORGANIZATION".to_string(),
853 "LOCATION".to_string(),
854 ]
855 } else {
856 self.config.entities.entity_types.clone()
857 };
858
859 let extractor = LLMEntityExtractor::new(client, entity_types)
860 .with_temperature(self.config.ollama.temperature.unwrap_or(0.1))
861 .with_max_tokens(self.config.ollama.max_tokens.unwrap_or(1500) as usize)
862 .with_keep_alive(self.config.ollama.keep_alive.clone());
863
864 let pb = make_pb(total_chunks as u64,
865 ProgressStyle::default_bar()
866 .template(" [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} chunks ({eta})")
867 .expect("Invalid progress bar template")
868 .progress_chars("=>-"),
869 );
870 pb.set_message("Extracting entities with LLM (single-pass)");
871
872 for (idx, chunk) in chunks.iter().enumerate() {
873 pb.set_message(format!(
874 "Chunk {}/{} (LLM single-pass)",
875 idx + 1,
876 total_chunks
877 ));
878
879 #[cfg(feature = "tracing")]
880 tracing::info!(
881 "Processing chunk {}/{} (LLM single-pass)",
882 idx + 1,
883 total_chunks
884 );
885
886 match extractor.extract_from_chunk(chunk).await {
887 Ok((entities, relationships)) => {
888 for entity in entities {
889 if let Err(e) = graph.add_entity(entity) {
890 #[cfg(feature = "tracing")]
891 tracing::debug!("Failed to add entity: {}", e);
892 }
893 }
894 for relationship in relationships {
895 if let Err(e) = graph.add_relationship(relationship) {
896 #[cfg(feature = "tracing")]
897 tracing::debug!("Failed to add relationship: {}", e);
898 }
899 }
900 },
901 Err(e) => {
902 #[cfg(feature = "tracing")]
903 tracing::warn!(
904 chunk_id = %chunk.id,
905 error = %e,
906 "LLM extraction failed for chunk, skipping"
907 );
908 },
909 }
910
911 pb.inc(1);
912 }
913
914 pb.finish_with_message("LLM single-pass extraction complete");
915 }
916 } else if self.config.gliner.enabled {
917 #[cfg(feature = "gliner")]
925 {
926 use crate::entity::GLiNERExtractor;
927 use std::sync::Arc;
928
929 let extractor = Arc::new(
930 GLiNERExtractor::new(self.config.gliner.clone()).map_err(|e| {
931 crate::core::error::GraphRAGError::EntityExtraction {
932 message: format!("GLiNER init failed: {e}"),
933 }
934 })?,
935 );
936
937 let pb = make_pb(total_chunks as u64,
938 ProgressStyle::default_bar()
939 .template(
940 " [{elapsed_precise}] [{bar:40.magenta/blue}] {pos}/{len} chunks ({eta})",
941 )
942 .expect("Invalid progress bar template")
943 .progress_chars("=>-"),
944 );
945 pb.set_message("Extracting entities with GLiNER-Relex");
946
947 for (idx, chunk) in chunks.iter().enumerate() {
948 pb.set_message(format!("Chunk {}/{} (GLiNER-Relex)", idx + 1, total_chunks));
949
950 let ext = Arc::clone(&extractor);
951 let ch = chunk.clone();
952 let result = tokio::task::spawn_blocking(move || ext.extract_from_chunk(&ch))
953 .await
954 .map_err(|e| crate::core::error::GraphRAGError::EntityExtraction {
955 message: format!("spawn_blocking join error: {e}"),
956 })?;
957
958 match result {
959 Ok((entities, relationships)) => {
960 for entity in entities {
961 if let Err(e) = graph.add_entity(entity) {
962 #[cfg(feature = "tracing")]
963 tracing::debug!("GLiNER: failed to add entity: {}", e);
964 }
965 }
966 for rel in relationships {
967 if let Err(e) = graph.add_relationship(rel) {
968 #[cfg(feature = "tracing")]
969 tracing::debug!("GLiNER: failed to add relationship: {}", e);
970 }
971 }
972 },
973 Err(e) => {
974 #[cfg(feature = "tracing")]
975 tracing::warn!(
976 chunk_id = %chunk.id,
977 error = %e,
978 "GLiNER extraction failed for chunk, skipping"
979 );
980 },
981 }
982
983 pb.inc(1);
984 }
985
986 pb.finish_with_message("GLiNER-Relex extraction complete");
987 }
988 #[cfg(not(feature = "gliner"))]
989 return Err(crate::core::error::GraphRAGError::Config {
990 message: "GLiNER enabled in config but crate compiled without --features gliner"
991 .into(),
992 });
993 } else {
994 use crate::entity::EntityExtractor;
996
997 #[cfg(feature = "tracing")]
998 tracing::info!("Using pattern-based entity extraction");
999
1000 let extractor = EntityExtractor::new(self.config.entities.min_confidence)?;
1001
1002 let pb = make_pb(
1004 total_chunks as u64,
1005 ProgressStyle::default_bar()
1006 .template(
1007 " [{elapsed_precise}] [{bar:40.green/blue}] {pos}/{len} chunks ({eta})",
1008 )
1009 .expect("Invalid progress bar template")
1010 .progress_chars("=>-"),
1011 );
1012 pb.set_message("Extracting entities (pattern-based)");
1013
1014 for (idx, chunk) in chunks.iter().enumerate() {
1015 pb.set_message(format!(
1016 "Chunk {}/{} (pattern-based)",
1017 idx + 1,
1018 total_chunks
1019 ));
1020
1021 #[cfg(feature = "tracing")]
1022 tracing::info!("Processing chunk {}/{} (Pattern)", idx + 1, total_chunks);
1023
1024 let entities = extractor.extract_from_chunk(chunk)?;
1025 for entity in entities {
1026 graph.add_entity(entity)?;
1027 }
1028
1029 pb.inc(1);
1030 }
1031
1032 pb.finish_with_message("Entity extraction complete");
1033
1034 if self.config.graph.extract_relationships {
1038 let all_entities: Vec<_> = graph.entities().cloned().collect();
1039
1040 let rel_pb = make_pb(total_chunks as u64,
1042 ProgressStyle::default_bar()
1043 .template(" [{elapsed_precise}] [{bar:40.yellow/blue}] {pos}/{len} chunks ({eta})")
1044 .expect("Invalid progress bar template")
1045 .progress_chars("=>-")
1046 );
1047 rel_pb.set_message("Extracting relationships");
1048
1049 for (idx, chunk) in chunks.iter().enumerate() {
1050 rel_pb.set_message(format!(
1051 "Chunk {}/{} (relationships)",
1052 idx + 1,
1053 total_chunks
1054 ));
1055 let chunk_entities: Vec<_> = all_entities
1057 .iter()
1058 .filter(|e| e.mentions.iter().any(|m| m.chunk_id == chunk.id))
1059 .cloned()
1060 .collect();
1061
1062 if chunk_entities.len() < 2 {
1063 rel_pb.inc(1);
1064 continue; }
1066
1067 let relationships = extractor.extract_relationships(&chunk_entities, chunk)?;
1069
1070 for (source_id, target_id, relation_type) in relationships {
1072 let relationship = Relationship {
1073 source: source_id.clone(),
1074 target: target_id.clone(),
1075 relation_type: relation_type.clone(),
1076 confidence: self.config.graph.relationship_confidence_threshold,
1077 context: vec![chunk.id.clone()],
1078 embedding: None,
1079 temporal_type: None,
1080 temporal_range: None,
1081 causal_strength: None,
1082 };
1083
1084 if let Err(_e) = graph.add_relationship(relationship) {
1086 #[cfg(feature = "tracing")]
1087 tracing::debug!(
1088 "Failed to add relationship: {} -> {} ({}). Error: {}",
1089 source_id,
1090 target_id,
1091 relation_type,
1092 _e
1093 );
1094 }
1095 }
1096
1097 rel_pb.inc(1);
1098 }
1099
1100 rel_pb.finish_with_message("Relationship extraction complete");
1101 } } self.save_to_workspace()?;
1106
1107 Ok(())
1108 }
1109
1110 #[cfg(not(feature = "async"))]
1115 pub fn build_graph(&mut self) -> Result<()> {
1116 use crate::entity::EntityExtractor;
1117
1118 let graph = self
1119 .knowledge_graph
1120 .as_mut()
1121 .ok_or_else(|| GraphRAGError::Config {
1122 message: "Knowledge graph not initialized".to_string(),
1123 })?;
1124
1125 let chunks: Vec<_> = graph.chunks().cloned().collect();
1126
1127 #[cfg(feature = "tracing")]
1128 tracing::info!("Using pattern-based entity extraction (sync mode)");
1129
1130 let extractor = EntityExtractor::new(self.config.entities.min_confidence)?;
1131
1132 for chunk in &chunks {
1133 let entities = extractor.extract_from_chunk(chunk)?;
1134 for entity in entities {
1135 graph.add_entity(entity)?;
1136 }
1137 }
1138
1139 if self.config.graph.extract_relationships {
1141 let all_entities: Vec<_> = graph.entities().cloned().collect();
1142
1143 for chunk in &chunks {
1144 let chunk_entities: Vec<_> = all_entities
1145 .iter()
1146 .filter(|e| e.mentions.iter().any(|m| m.chunk_id == chunk.id))
1147 .cloned()
1148 .collect();
1149
1150 if chunk_entities.len() < 2 {
1151 continue;
1152 }
1153
1154 let relationships = extractor.extract_relationships(&chunk_entities, chunk)?;
1155
1156 for (source_id, target_id, relation_type) in relationships {
1157 let relationship = Relationship {
1158 source: source_id.clone(),
1159 target: target_id.clone(),
1160 relation_type: relation_type.clone(),
1161 confidence: self.config.graph.relationship_confidence_threshold,
1162 context: vec![chunk.id.clone()],
1163 };
1164
1165 if let Err(_e) = graph.add_relationship(relationship) {
1166 #[cfg(feature = "tracing")]
1167 tracing::debug!(
1168 "Failed to add relationship: {} -> {} ({}). Error: {}",
1169 source_id,
1170 target_id,
1171 relation_type,
1172 _e
1173 );
1174 }
1175 }
1176 }
1177 }
1178
1179 Ok(())
1180 }
1181
1182 #[cfg(feature = "async")]
1185 pub async fn ask_with_reasoning(&mut self, query: &str) -> Result<String> {
1186 if self.query_planner.is_none() {
1188 return self.ask(query).await;
1189 }
1190
1191 self.ensure_initialized()?;
1192 if self.has_documents() && !self.has_graph() {
1193 self.build_graph().await?;
1194 }
1195
1196 let planner = self.query_planner.as_ref().unwrap();
1197 tracing::info!("Decomposing query: {}", query);
1198
1199 let sub_queries = match planner.decompose(query).await {
1201 Ok(sq) => sq,
1202 Err(e) => {
1203 tracing::warn!(
1204 "Query decomposition failed, falling back to standard query: {}",
1205 e
1206 );
1207 vec![query.to_string()]
1208 },
1209 };
1210
1211 tracing::info!("Sub-queries: {:?}", sub_queries);
1212
1213 let mut all_results = Vec::new();
1215 for sub_query in sub_queries {
1216 match self.query_internal_with_results(&sub_query).await {
1217 Ok(results) => all_results.extend(results),
1218 Err(e) => tracing::warn!("Failed to execute sub-query '{}': {}", sub_query, e),
1219 }
1220 }
1221
1222 if all_results.is_empty() {
1223 return Ok("No relevant information found for the decomposed queries.".to_string());
1224 }
1225
1226 all_results.sort_by(|a, b| {
1229 b.score
1230 .partial_cmp(&a.score)
1231 .unwrap_or(std::cmp::Ordering::Equal)
1232 });
1233 let mut unique_results = Vec::new();
1234 let mut seen_ids = std::collections::HashSet::new();
1235
1236 for result in all_results {
1237 if !seen_ids.contains(&result.id) {
1238 seen_ids.insert(result.id.clone());
1239 unique_results.push(result);
1240 }
1241 }
1242
1243 if self.config.ollama.enabled {
1244 let mut current_answer = self
1246 .generate_semantic_answer_from_results(query, &unique_results)
1247 .await?;
1248
1249 if let Some(critic) = &self.critic {
1251 let mut attempts = 0;
1252 let max_retries = 3;
1253
1254 while attempts < max_retries {
1255 let context_strings: Vec<String> =
1256 unique_results.iter().map(|r| r.content.clone()).collect();
1257
1258 let evaluation = match critic
1259 .evaluate(query, &context_strings, ¤t_answer)
1260 .await
1261 {
1262 Ok(eval) => eval,
1263 Err(e) => {
1264 tracing::warn!("Critic evaluation failed: {}", e);
1265 break;
1266 },
1267 };
1268
1269 tracing::info!(
1270 "Critic Evaluation (Attempt {}): Score={:.2}, Grounded={}, Feedback='{}'",
1271 attempts + 1,
1272 evaluation.score,
1273 evaluation.grounded,
1274 evaluation.feedback
1275 );
1276
1277 if evaluation.score >= 0.7 && evaluation.grounded {
1278 tracing::info!("Answer accepted by critic.");
1279 break;
1280 }
1281
1282 tracing::warn!("Answer rejected by critic. Refining...");
1283
1284 current_answer = critic
1286 .refine(query, ¤t_answer, &evaluation.feedback)
1287 .await?;
1288 attempts += 1;
1289 }
1290 }
1291
1292 return Ok(current_answer);
1293 }
1294
1295 let formatted: Vec<String> = unique_results
1297 .into_iter()
1298 .take(10)
1299 .map(|r| format!("{} (score: {:.2})", r.content, r.score))
1300 .collect();
1301 Ok(formatted.join("\n"))
1302 }
1303
1304 #[cfg(feature = "async")]
1306 pub async fn ask(&mut self, query: &str) -> Result<String> {
1307 self.ensure_initialized()?;
1308
1309 if self.has_documents() && !self.has_graph() {
1310 self.build_graph().await?;
1311 }
1312
1313 let search_results = self.query_internal_with_results(query).await?;
1315
1316 if self.config.ollama.enabled {
1318 return self
1319 .generate_semantic_answer_from_results(query, &search_results)
1320 .await;
1321 }
1322
1323 let formatted: Vec<String> = search_results
1325 .into_iter()
1326 .map(|r| format!("{} (score: {:.2})", r.content, r.score))
1327 .collect();
1328 Ok(formatted.join("\n"))
1329 }
1330
1331 #[cfg(not(feature = "async"))]
1333 pub fn ask(&mut self, query: &str) -> Result<String> {
1334 self.ensure_initialized()?;
1335
1336 if self.has_documents() && !self.has_graph() {
1337 self.build_graph()?;
1338 }
1339
1340 let results = self.query_internal(query)?;
1341 Ok(results.join("\n"))
1342 }
1343
1344 #[cfg(feature = "async")]
1375 pub async fn ask_explained(&mut self, query: &str) -> Result<retrieval::ExplainedAnswer> {
1376 self.ensure_initialized()?;
1377
1378 if self.has_documents() && !self.has_graph() {
1379 self.build_graph().await?;
1380 }
1381
1382 let search_results = self.query_internal_with_results(query).await?;
1384
1385 let answer = if self.config.ollama.enabled {
1387 self.generate_semantic_answer_from_results(query, &search_results)
1388 .await?
1389 } else {
1390 search_results
1392 .iter()
1393 .take(3)
1394 .map(|r| r.content.clone())
1395 .collect::<Vec<_>>()
1396 .join(" ")
1397 };
1398
1399 let explained = retrieval::ExplainedAnswer::from_results(answer, &search_results, query);
1401
1402 Ok(explained)
1403 }
1404
1405 pub async fn query_internal(&mut self, query: &str) -> Result<Vec<String>> {
1407 let retrieval = self
1408 .retrieval_system
1409 .as_mut()
1410 .ok_or_else(|| GraphRAGError::Config {
1411 message: "Retrieval system not initialized".to_string(),
1412 })?;
1413
1414 let graph = self
1415 .knowledge_graph
1416 .as_mut()
1417 .ok_or_else(|| GraphRAGError::Config {
1418 message: "Knowledge graph not initialized".to_string(),
1419 })?;
1420
1421 retrieval.add_embeddings_to_graph(graph).await?;
1423
1424 let search_results = retrieval.hybrid_query(query, graph).await?;
1426
1427 let result_strings: Vec<String> = search_results
1429 .into_iter()
1430 .map(|r| format!("{} (score: {:.2})", r.content, r.score))
1431 .collect();
1432
1433 Ok(result_strings)
1434 }
1435
1436 async fn query_internal_with_results(
1438 &mut self,
1439 query: &str,
1440 ) -> Result<Vec<retrieval::SearchResult>> {
1441 let retrieval = self
1442 .retrieval_system
1443 .as_mut()
1444 .ok_or_else(|| GraphRAGError::Config {
1445 message: "Retrieval system not initialized".to_string(),
1446 })?;
1447
1448 let graph = self
1449 .knowledge_graph
1450 .as_mut()
1451 .ok_or_else(|| GraphRAGError::Config {
1452 message: "Knowledge graph not initialized".to_string(),
1453 })?;
1454
1455 retrieval.add_embeddings_to_graph(graph).await?;
1457
1458 retrieval.hybrid_query(query, graph).await
1460 }
1461
1462 #[cfg(feature = "async")]
1464 async fn generate_semantic_answer_from_results(
1465 &self,
1466 query: &str,
1467 search_results: &[retrieval::SearchResult],
1468 ) -> Result<String> {
1469 use crate::ollama::OllamaClient;
1470
1471 let graph = self
1472 .knowledge_graph
1473 .as_ref()
1474 .ok_or_else(|| GraphRAGError::Config {
1475 message: "Knowledge graph not initialized".to_string(),
1476 })?;
1477
1478 let mut context_parts = Vec::new();
1481 let mut seen_chunk_ids = std::collections::HashSet::new();
1482
1483 for result in search_results.iter() {
1484 if result.result_type == retrieval::ResultType::Entity
1486 && !result.source_chunks.is_empty()
1487 {
1488 let entity_label = result
1489 .content
1490 .split(" (score:")
1491 .next()
1492 .unwrap_or(&result.content);
1493 for chunk_id_str in &result.source_chunks {
1494 if seen_chunk_ids.contains(chunk_id_str) {
1495 continue;
1496 }
1497 let chunk_id = ChunkId::new(chunk_id_str.clone());
1498 if let Some(chunk) = graph.chunks().find(|c| c.id == chunk_id) {
1499 seen_chunk_ids.insert(chunk_id_str.clone());
1500 context_parts.push((
1501 result.score,
1502 format!(
1503 "[Entity: {} | Relevance: {:.2}]\n{}",
1504 entity_label, result.score, chunk.content
1505 ),
1506 ));
1507 }
1508 }
1509 }
1510 else if result.result_type == retrieval::ResultType::Chunk {
1512 if !seen_chunk_ids.contains(&result.id) {
1513 seen_chunk_ids.insert(result.id.clone());
1514 context_parts.push((
1515 result.score,
1516 format!(
1517 "[Chunk | Relevance: {:.2}]\n{}",
1518 result.score, result.content
1519 ),
1520 ));
1521 }
1522 }
1523 else {
1525 context_parts.push((
1526 result.score,
1527 format!(
1528 "[{:?} | Relevance: {:.2}]\n{}",
1529 result.result_type, result.score, result.content
1530 ),
1531 ));
1532 }
1533 }
1534
1535 context_parts.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
1537 let context = context_parts
1538 .into_iter()
1539 .map(|(_, text)| text)
1540 .collect::<Vec<_>>()
1541 .join("\n\n---\n\n");
1542
1543 if context.trim().is_empty() {
1544 return Ok("No relevant information found in the knowledge graph.".to_string());
1545 }
1546
1547 let client = OllamaClient::new(self.config.ollama.clone());
1549
1550 let prompt = format!(
1552 "You are a knowledgeable assistant specialized in answering questions based on a knowledge graph.\n\n\
1553 IMPORTANT INSTRUCTIONS:\n\
1554 - Answer ONLY using information from the provided context below\n\
1555 - Synthesize information from ALL context sections to give a comprehensive answer\n\
1556 - Provide direct, conversational, and natural responses\n\
1557 - Do NOT show your reasoning process or use <think> tags\n\
1558 - If the context lacks sufficient information, clearly state: \"I don't have enough information to answer this question.\"\n\
1559 - Aim for a complete answer (3-6 sentences) that covers different aspects found across the context\n\
1560 - Use a natural, helpful tone as if speaking to a person\n\n\
1561 CONTEXT:\n\
1562 {}\n\n\
1563 QUESTION: {}\n\n\
1564 ANSWER (direct response only, no reasoning):",
1565 context, query
1566 );
1567
1568 let max_answer_tokens: u32 = 800;
1570 let prompt_tokens = (prompt.len() / 4) as u32;
1571 let total = prompt_tokens + max_answer_tokens;
1572 let with_margin = (total as f32 * 1.20) as u32;
1573 let num_ctx = (((with_margin + 1023) / 1024) * 1024)
1574 .max(4096)
1575 .min(131_072);
1576
1577 let params = crate::ollama::OllamaGenerationParams {
1578 num_predict: Some(max_answer_tokens),
1579 temperature: self.config.ollama.temperature,
1580 num_ctx: Some(num_ctx),
1581 keep_alive: self.config.ollama.keep_alive.clone(),
1582 ..Default::default()
1583 };
1584
1585 match client.generate_with_params(&prompt, params).await {
1587 Ok(answer) => {
1588 let cleaned_answer = Self::remove_thinking_tags(&answer);
1590 Ok(cleaned_answer.trim().to_string())
1591 },
1592 Err(e) => {
1593 #[cfg(feature = "tracing")]
1594 tracing::warn!(
1595 "LLM generation failed: {}. Falling back to search results.",
1596 e
1597 );
1598
1599 Ok(format!(
1601 "Relevant information from knowledge graph:\n\n{}",
1602 context
1603 ))
1604 },
1605 }
1606 }
1607
1608 #[cfg(feature = "async")]
1613 fn remove_thinking_tags(text: &str) -> String {
1614 let mut result = text.to_string();
1617
1618 while let Some(start) = result.find("<think>") {
1619 if let Some(end) = result[start..].find("</think>") {
1621 let end_pos = start + end + "</think>".len();
1623 result.replace_range(start..end_pos, "");
1624 } else {
1625 result.replace_range(start..start + "<think>".len(), "");
1627 break;
1628 }
1629 }
1630
1631 result.trim().to_string()
1632 }
1633
1634 pub fn config(&self) -> &Config {
1636 &self.config
1637 }
1638
1639 pub fn is_initialized(&self) -> bool {
1641 self.knowledge_graph.is_some() && self.retrieval_system.is_some()
1642 }
1643
1644 pub fn has_documents(&self) -> bool {
1646 if let Some(graph) = &self.knowledge_graph {
1647 graph.chunks().count() > 0
1648 } else {
1649 false
1650 }
1651 }
1652
1653 pub fn has_graph(&self) -> bool {
1655 if let Some(graph) = &self.knowledge_graph {
1656 graph.entities().count() > 0
1657 } else {
1658 false
1659 }
1660 }
1661
1662 pub fn knowledge_graph(&self) -> Option<&KnowledgeGraph> {
1664 self.knowledge_graph.as_ref()
1665 }
1666
1667 pub fn get_entity(&self, entity_id: &str) -> Option<&Entity> {
1669 if let Some(graph) = &self.knowledge_graph {
1670 graph.entities().find(|e| e.id.0 == entity_id)
1671 } else {
1672 None
1673 }
1674 }
1675
1676 pub fn get_entity_relationships(&self, entity_id: &str) -> Vec<&Relationship> {
1678 if let Some(graph) = &self.knowledge_graph {
1679 let entity_id_obj = EntityId::new(entity_id.to_string());
1680 graph
1681 .relationships()
1682 .filter(|r| r.source == entity_id_obj || r.target == entity_id_obj)
1683 .collect()
1684 } else {
1685 Vec::new()
1686 }
1687 }
1688
1689 pub fn get_chunk(&self, chunk_id: &str) -> Option<&TextChunk> {
1691 if let Some(graph) = &self.knowledge_graph {
1692 graph.chunks().find(|c| c.id.0 == chunk_id)
1693 } else {
1694 None
1695 }
1696 }
1697
1698 #[cfg(all(feature = "pagerank", feature = "async"))]
1700 pub async fn ask_with_pagerank(
1701 &mut self,
1702 query: &str,
1703 ) -> Result<Vec<retrieval::pagerank_retrieval::ScoredResult>> {
1704 use crate::retrieval::pagerank_retrieval::PageRankRetrievalSystem;
1705
1706 self.ensure_initialized()?;
1707
1708 if self.has_documents() && !self.has_graph() {
1709 self.build_graph().await?;
1710 }
1711
1712 let graph = self
1713 .knowledge_graph
1714 .as_ref()
1715 .ok_or_else(|| GraphRAGError::Config {
1716 message: "Knowledge graph not initialized".to_string(),
1717 })?;
1718
1719 let pagerank_system = PageRankRetrievalSystem::new(10);
1720 pagerank_system.search_with_pagerank(query, graph, Some(5))
1721 }
1722
1723 #[cfg(all(feature = "pagerank", not(feature = "async")))]
1725 pub fn ask_with_pagerank(
1726 &mut self,
1727 query: &str,
1728 ) -> Result<Vec<retrieval::pagerank_retrieval::ScoredResult>> {
1729 use crate::retrieval::pagerank_retrieval::PageRankRetrievalSystem;
1730
1731 self.ensure_initialized()?;
1732
1733 if self.has_documents() && !self.has_graph() {
1734 self.build_graph()?;
1735 }
1736
1737 let graph = self
1738 .knowledge_graph
1739 .as_ref()
1740 .ok_or_else(|| GraphRAGError::Config {
1741 message: "Knowledge graph not initialized".to_string(),
1742 })?;
1743
1744 let pagerank_system = PageRankRetrievalSystem::new(10);
1745 pagerank_system.search_with_pagerank(query, graph, Some(5))
1746 }
1747
1748 pub fn knowledge_graph_mut(&mut self) -> Option<&mut KnowledgeGraph> {
1750 self.knowledge_graph.as_mut()
1751 }
1752
1753 #[cfg(feature = "json5-support")]
1773 pub fn from_json5_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
1774 use crate::config::json5_loader::load_json5_config;
1775 use crate::config::setconfig::SetConfig;
1776
1777 let set_config = load_json5_config::<SetConfig, _>(path)?;
1778 let config = set_config.to_graphrag_config();
1779 Self::new(config)
1780 }
1781
1782 pub fn from_config_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
1805 use crate::config::setconfig::SetConfig;
1806
1807 let set_config = SetConfig::from_file(path)?;
1808 let config = set_config.to_graphrag_config();
1809 Self::new(config)
1810 }
1811
1812 #[cfg(feature = "async")]
1842 pub async fn from_config_and_document<P1, P2>(
1843 config_path: P1,
1844 document_path: P2,
1845 ) -> Result<Self>
1846 where
1847 P1: AsRef<std::path::Path>,
1848 P2: AsRef<std::path::Path>,
1849 {
1850 let mut graphrag = Self::from_config_file(config_path)?;
1852
1853 graphrag.initialize()?;
1855
1856 let content = std::fs::read_to_string(document_path).map_err(GraphRAGError::Io)?;
1858
1859 graphrag.add_document_from_text(&content)?;
1860
1861 graphrag.build_graph().await?;
1863
1864 Ok(graphrag)
1865 }
1866
1867 #[cfg(feature = "async")]
1893 pub async fn quick_start(text: &str) -> Result<Self> {
1894 let config = Config::load()?;
1896
1897 let mut graphrag = Self::new(config)?;
1898 graphrag.initialize()?;
1899 graphrag.add_document_from_text(text)?;
1900 graphrag.build_graph().await?;
1901
1902 Ok(graphrag)
1903 }
1904
1905 #[cfg(feature = "async")]
1925 pub async fn quick_start_with_config<F>(text: &str, configure: F) -> Result<Self>
1926 where
1927 F: FnOnce(crate::builder::GraphRAGBuilder) -> crate::builder::GraphRAGBuilder,
1928 {
1929 let builder = configure(Self::builder());
1930 let mut graphrag = builder.build()?;
1931 graphrag.initialize()?;
1932 graphrag.add_document_from_text(text)?;
1933 graphrag.build_graph().await?;
1934
1935 Ok(graphrag)
1936 }
1937
1938 fn ensure_initialized(&mut self) -> Result<()> {
1940 if !self.is_initialized() {
1941 self.initialize()
1942 } else {
1943 Ok(())
1944 }
1945 }
1946}
1947
1948#[cfg(test)]
1949mod tests {
1950 use super::*;
1951
1952 #[test]
1953 fn test_graphrag_creation() {
1954 let config = Config::default();
1955 let graphrag = GraphRAG::new(config);
1956 assert!(graphrag.is_ok());
1957 }
1958
1959 #[test]
1960 fn test_builder_pattern() {
1961 let graphrag = GraphRAG::builder()
1962 .with_output_dir("./test_output")
1963 .with_chunk_size(512)
1964 .with_top_k(10)
1965 .build();
1966 assert!(graphrag.is_ok());
1967 }
1968}