Skip to main content

graphrag_core/
lib.rs

1//! # GraphRAG Core
2//!
3//! Portable core library for GraphRAG - works on both native and WASM platforms.
4//!
5//! This is the foundational crate that provides:
6//! - Knowledge graph construction and management
7//! - Entity extraction and linking
8//! - Vector embeddings and similarity search
9//! - Graph algorithms (PageRank, community detection)
10//! - Retrieval systems (semantic, keyword, hybrid)
11//! - Caching and optimization
12//!
13//! ## Platform Support
14//!
15//! - **Native**: Full feature set with optional CUDA/Metal GPU acceleration
16//! - **WASM**: Browser-compatible with Voy vector search and Candle embeddings
17//!
18//! ## Feature Flags
19//!
20//! - `wasm`: Enable WASM compatibility (uses Voy instead of HNSW)
21//! - `cuda`: Enable NVIDIA GPU acceleration via Candle
22//! - `metal`: Enable Apple Silicon GPU acceleration
23//! - `webgpu`: Enable WebGPU acceleration for browser (via Burn)
24//! - `pagerank`: Enable PageRank-based retrieval
25//! - `lightrag`: Enable LightRAG optimizations (6000x token reduction)
26//! - `caching`: Enable intelligent LLM response caching
27//!
28//! ## Quick Start
29//!
30//! ```rust
31//! use graphrag_core::{GraphRAG, Config};
32//!
33//! # fn example() -> graphrag_core::Result<()> {
34//! let config = Config::default();
35//! let mut graphrag = GraphRAG::new(config)?;
36//! graphrag.initialize()?;
37//! # Ok(())
38//! # }
39//! ```
40
41#![warn(missing_docs)]
42#![warn(clippy::all)]
43// Note: WASM with wasm-bindgen DOES use std, so we don't disable it
44
45// ================================
46// MODULE DECLARATIONS
47// ================================
48
49// Core modules (always available)
50/// Configuration management and loading
51pub mod config;
52/// Core traits and types
53pub mod core;
54/// Text processing and chunking
55pub mod text;
56/// Vector operations and embeddings
57pub mod vector;
58/// Graph data structures and algorithms
59pub mod graph;
60/// Entity extraction and management
61pub mod entity;
62/// Retrieval strategies and implementations
63pub mod retrieval;
64/// Text generation and LLM interactions (async feature only)
65#[cfg(feature = "async")]
66pub mod generation;
67/// Storage backends and persistence
68#[cfg(any(feature = "memory-storage", feature = "persistent-storage", feature = "async"))]
69pub mod storage;
70
71/// Persistence layer for knowledge graphs (workspace management always available)
72pub mod persistence;
73/// Query processing and execution
74pub mod query;
75/// Builder pattern implementations
76pub mod builder;
77/// Text summarization capabilities
78pub mod summarization;
79/// Ollama LLM integration
80pub mod ollama;
81/// Natural language processing utilities
82pub mod nlp;
83/// Embedding generation and providers
84pub mod embeddings;
85
86// Pipeline modules
87/// Data processing pipelines
88pub mod pipeline;
89
90// Advanced features (feature-gated)
91#[cfg(feature = "parallel-processing")]
92pub mod parallel;
93
94#[cfg(feature = "lightrag")]
95/// LightRAG dual-level retrieval optimization
96pub mod lightrag;
97
98// Utility modules
99/// Reranking utilities for improving search result quality
100pub mod reranking;
101
102/// Monitoring, benchmarking, and performance tracking
103pub mod monitoring;
104
105/// Evaluation framework for query results and pipeline validation
106pub mod evaluation;
107
108/// API endpoints and handlers
109#[cfg(feature = "api")]
110pub mod api;
111
112/// Inference module for model predictions
113pub mod inference;
114
115/// Multi-document corpus processing
116#[cfg(feature = "corpus-processing")]
117pub mod corpus;
118
119// Feature-gated modules
120#[cfg(feature = "async")]
121/// Async GraphRAG implementation
122pub mod async_graphrag;
123
124#[cfg(feature = "async")]
125/// Async processing pipelines
126pub mod async_processing;
127
128#[cfg(feature = "caching")]
129/// Caching utilities for LLM responses
130pub mod caching;
131
132#[cfg(feature = "function-calling")]
133/// Function calling capabilities for LLMs
134pub mod function_calling;
135
136#[cfg(feature = "incremental")]
137/// Incremental graph updates
138pub mod incremental;
139
140#[cfg(feature = "rograg")]
141/// ROGRAG (Robustly Optimized GraphRAG) implementation
142pub mod rograg;
143
144// TODO: Implement remaining utility modules
145// pub mod automatic_entity_linking;
146// pub mod phase_saver;
147
148// ================================
149// PUBLIC API EXPORTS
150// ================================
151
152/// Prelude module containing the most commonly used types
153pub mod prelude {
154    // pub use crate::GraphRAG;
155    // pub use crate::builder::{GraphRAGBuilder, ConfigPreset, LLMProvider};
156    pub use crate::config::Config;
157    pub use crate::core::{
158        Document, DocumentId, Entity, EntityId, KnowledgeGraph,
159        GraphRAGError, Result,
160    };
161}
162
163// Re-export core types
164pub use crate::config::Config;
165pub use crate::core::{
166    ChunkId, Document, DocumentId, Entity, EntityId, EntityMention,
167    ErrorContext, ErrorSeverity, GraphRAGError, KnowledgeGraph,
168    Relationship, Result, TextChunk,
169};
170
171// Re-export core traits (async feature only)
172#[cfg(feature = "async")]
173pub use crate::core::traits::{
174    Embedder, EntityExtractor, GraphStore, LanguageModel,
175    Retriever, Storage, VectorStore,
176};
177
178// Storage exports (when storage features are enabled)
179#[cfg(feature = "memory-storage")]
180pub use crate::storage::MemoryStorage;
181
182// TODO: Re-export builder when implemented
183// pub use crate::builder::{ConfigPreset, GraphRAGBuilder, LLMProvider};
184
185// TODO: Re-export main system when implemented
186// pub use crate::GraphRAG;
187
188// Feature-gated exports
189#[cfg(feature = "lightrag")]
190pub use crate::lightrag::{
191    DualLevelRetriever, DualRetrievalConfig, DualRetrievalResults,
192    KeywordExtractor, KeywordExtractorConfig, DualLevelKeywords,
193    MergeStrategy, SemanticSearcher,
194};
195
196#[cfg(feature = "pagerank")]
197pub use crate::graph::pagerank::{
198    PageRankConfig, PersonalizedPageRank,
199};
200
201#[cfg(feature = "leiden")]
202pub use crate::graph::leiden::{
203    HierarchicalCommunities, LeidenConfig, LeidenCommunityDetector,
204};
205
206#[cfg(feature = "cross-encoder")]
207pub use crate::reranking::cross_encoder::{
208    CrossEncoder, CrossEncoderConfig, ConfidenceCrossEncoder,
209    RankedResult, RerankingStats,
210};
211
212#[cfg(feature = "pagerank")]
213pub use crate::retrieval::pagerank_retrieval::{
214    PageRankRetrievalSystem, ScoredResult,
215};
216
217#[cfg(feature = "pagerank")]
218pub use crate::retrieval::hipporag_ppr::{
219    HippoRAGConfig, HippoRAGRetriever, Fact,
220};
221
222// ================================
223// MAIN GRAPHRAG SYSTEM
224// ================================
225
226/// Main GraphRAG system
227///
228/// This is the primary entry point for using GraphRAG. It orchestrates
229/// all components: knowledge graph, retrieval, generation, and caching.
230///
231/// # Examples
232///
233/// ```rust
234/// use graphrag_core::{GraphRAG, Config};
235///
236/// # fn example() -> graphrag_core::Result<()> {
237/// let config = Config::default();
238/// let mut graphrag = GraphRAG::new(config)?;
239/// graphrag.initialize()?;
240///
241/// // Add documents
242/// graphrag.add_document_from_text("Your document text")?;
243///
244/// // Build knowledge graph
245/// graphrag.build_graph()?;
246///
247/// // Query
248/// let answer = graphrag.ask("Your question?")?;
249/// println!("Answer: {}", answer);
250/// # Ok(())
251/// # }
252/// ```
253pub struct GraphRAG {
254    config: Config,
255    knowledge_graph: Option<KnowledgeGraph>,
256    retrieval_system: Option<retrieval::RetrievalSystem>,
257    #[cfg(feature = "parallel-processing")]
258    #[allow(dead_code)]
259    parallel_processor: Option<parallel::ParallelProcessor>,
260}
261
262impl GraphRAG {
263    /// Create a new GraphRAG instance with the given configuration
264    pub fn new(config: Config) -> Result<Self> {
265        Ok(Self {
266            config,
267            knowledge_graph: None,
268            retrieval_system: None,
269            #[cfg(feature = "parallel-processing")]
270            parallel_processor: None,
271        })
272    }
273
274    // TODO: Implement builder when GraphRAGBuilder module exists
275    // /// Create a builder for configuring GraphRAG
276    // pub fn builder() -> GraphRAGBuilder {
277    //     GraphRAGBuilder::new()
278    // }
279
280    /// Initialize the GraphRAG system
281    pub fn initialize(&mut self) -> Result<()> {
282        self.knowledge_graph = Some(KnowledgeGraph::new());
283        self.retrieval_system = Some(retrieval::RetrievalSystem::new(&self.config)?);
284        Ok(())
285    }
286
287    /// Add a document from text content
288    pub fn add_document_from_text(&mut self, text: &str) -> Result<()> {
289        use crate::text::TextProcessor;
290        use indexmap::IndexMap;
291
292        // Use UUID for doc ID (works in both native and WASM)
293        let doc_id = DocumentId::new(
294            format!("doc_{}", uuid::Uuid::new_v4().simple())
295        );
296
297        let document = Document {
298            id: doc_id,
299            title: "Document".to_string(),
300            content: text.to_string(),
301            metadata: IndexMap::new(),
302            chunks: Vec::new(),
303        };
304
305        let text_processor = TextProcessor::new(
306            self.config.text.chunk_size,
307            self.config.text.chunk_overlap
308        )?;
309        let chunks = text_processor.chunk_text(&document)?;
310
311        let document_with_chunks = Document {
312            chunks,
313            ..document
314        };
315
316        self.add_document(document_with_chunks)
317    }
318
319    /// Add a document to the system
320    pub fn add_document(&mut self, document: Document) -> Result<()> {
321        let graph = self.knowledge_graph.as_mut()
322            .ok_or_else(|| GraphRAGError::Config {
323                message: "Knowledge graph not initialized".to_string(),
324            })?;
325
326        graph.add_document(document)
327    }
328
329    /// Clear all entities and relationships from the knowledge graph
330    ///
331    /// This method preserves documents and text chunks but removes all extracted entities and relationships.
332    /// Useful for rebuilding the graph from scratch without reloading documents.
333    pub fn clear_graph(&mut self) -> Result<()> {
334        let graph = self.knowledge_graph.as_mut()
335            .ok_or_else(|| GraphRAGError::Config {
336                message: "Knowledge graph not initialized".to_string(),
337            })?;
338
339        #[cfg(feature = "tracing")]
340        tracing::info!("Clearing knowledge graph (preserving documents and chunks)");
341
342        graph.clear_entities_and_relationships();
343        Ok(())
344    }
345
346    /// Build the knowledge graph from added documents
347    ///
348    /// This method implements dynamic pipeline selection based on the configured approach:
349    /// - **Semantic** (config.approach = "semantic"): Uses LLM-based entity extraction with gleaning
350    ///   for high-quality results. Requires Ollama to be enabled.
351    /// - **Algorithmic** (config.approach = "algorithmic"): Uses pattern-based entity extraction
352    ///   (regex + capitalization) for fast, resource-efficient processing.
353    /// - **Hybrid** (config.approach = "hybrid"): Combines both approaches with weighted fusion.
354    ///
355    /// The selection is controlled by `config.approach` and mapped from TomlConfig's [mode] section.
356    #[cfg(feature = "async")]
357    pub async fn build_graph(&mut self) -> Result<()> {
358        use indicatif::{ProgressBar, ProgressStyle};
359
360        let graph = self.knowledge_graph.as_mut()
361            .ok_or_else(|| GraphRAGError::Config {
362                message: "Knowledge graph not initialized".to_string(),
363            })?;
364
365        let chunks: Vec<_> = graph.chunks().cloned().collect();
366        let total_chunks = chunks.len();
367
368        // PHASE 1: Extract and add all entities
369        // Pipeline selection based on config.approach (semantic/algorithmic/hybrid)
370        // - Semantic: config.entities.use_gleaning = true (LLM-based with iterative refinement)
371        // - Algorithmic: config.entities.use_gleaning = false (pattern-based extraction)
372        // - Hybrid: config.entities.use_gleaning = true (uses LLM + pattern fusion)
373
374        // DEBUG: Log current configuration state
375        #[cfg(feature = "tracing")]
376        tracing::info!(
377            "build_graph() - Config state: approach='{}', use_gleaning={}, ollama.enabled={}",
378            self.config.approach,
379            self.config.entities.use_gleaning,
380            self.config.ollama.enabled
381        );
382
383        if self.config.entities.use_gleaning && self.config.ollama.enabled {
384            // LLM-based extraction with gleaning
385            #[cfg(feature = "async")]
386            {
387                use crate::entity::GleaningEntityExtractor;
388                use crate::ollama::OllamaClient;
389
390                #[cfg(feature = "tracing")]
391                tracing::info!(
392                    "Using LLM-based entity extraction with gleaning (max_rounds: {})",
393                    self.config.entities.max_gleaning_rounds
394                );
395
396                // Create Ollama client
397                let client = OllamaClient::new(self.config.ollama.clone());
398
399                // Create gleaning config from our config
400                let gleaning_config = crate::entity::GleaningConfig {
401                    max_gleaning_rounds: self.config.entities.max_gleaning_rounds,
402                    completion_threshold: 0.8,
403                    entity_confidence_threshold: self.config.entities.min_confidence as f64,
404                    use_llm_completion_check: true,
405                    entity_types: if self.config.entities.entity_types.is_empty() {
406                        vec![
407                            "PERSON".to_string(),
408                            "ORGANIZATION".to_string(),
409                            "LOCATION".to_string(),
410                        ]
411                    } else {
412                        self.config.entities.entity_types.clone()
413                    },
414                    temperature: 0.1,
415                    max_tokens: 1500,
416                };
417
418                // Create gleaning extractor with LLM client
419                let extractor = GleaningEntityExtractor::new(client, gleaning_config);
420
421                // Create progress bar for entity extraction
422                let pb = ProgressBar::new(total_chunks as u64);
423                pb.set_style(
424                    ProgressStyle::default_bar()
425                        .template("   [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} chunks ({eta})")
426                        .expect("Invalid progress bar template")
427                        .progress_chars("=>-")
428                );
429                pb.set_message("Extracting entities with LLM");
430
431                // Extract entities using async gleaning
432                for (idx, chunk) in chunks.iter().enumerate() {
433                    pb.set_message(format!("Chunk {}/{} (gleaning with {} rounds)",
434                        idx + 1, total_chunks, self.config.entities.max_gleaning_rounds));
435
436                    let (entities, relationships) = extractor.extract_with_gleaning(chunk).await?;
437
438                    // Add extracted entities
439                    for entity in entities {
440                        graph.add_entity(entity)?;
441                    }
442
443                    // Add extracted relationships
444                    for relationship in relationships {
445                        if let Err(e) = graph.add_relationship(relationship) {
446                            #[cfg(feature = "tracing")]
447                            tracing::warn!(
448                                "Failed to add relationship: {} -> {} ({}). Error: {}",
449                                e.to_string().split("entity ").nth(1).unwrap_or("unknown"),
450                                e.to_string().split("entity ").nth(2).unwrap_or("unknown"),
451                                "relationship",
452                                e
453                            );
454                        }
455                    }
456
457                    pb.inc(1);
458                }
459
460                pb.finish_with_message("Entity extraction complete");
461            }
462
463        } else {
464            // Pattern-based extraction (regex + capitalization)
465            use crate::entity::EntityExtractor;
466
467            #[cfg(feature = "tracing")]
468            tracing::info!("Using pattern-based entity extraction");
469
470            let extractor = EntityExtractor::new(self.config.entities.min_confidence)?;
471
472            // Create progress bar for pattern-based extraction
473            let pb = ProgressBar::new(total_chunks as u64);
474            pb.set_style(
475                ProgressStyle::default_bar()
476                    .template("   [{elapsed_precise}] [{bar:40.green/blue}] {pos}/{len} chunks ({eta})")
477                    .expect("Invalid progress bar template")
478                    .progress_chars("=>-")
479            );
480            pb.set_message("Extracting entities (pattern-based)");
481
482            for (idx, chunk) in chunks.iter().enumerate() {
483                pb.set_message(format!("Chunk {}/{} (pattern-based)", idx + 1, total_chunks));
484
485                let entities = extractor.extract_from_chunk(chunk)?;
486                for entity in entities {
487                    graph.add_entity(entity)?;
488                }
489
490                pb.inc(1);
491            }
492
493            pb.finish_with_message("Entity extraction complete");
494
495            // PHASE 2: Extract and add relationships between entities (for pattern-based only)
496            // Gleaning extractor already extracts relationships in Phase 1
497            // Only proceed if graph construction config enables relationship extraction
498            if self.config.graph.extract_relationships {
499            let all_entities: Vec<_> = graph.entities().cloned().collect();
500
501            // Create progress bar for relationship extraction
502            let rel_pb = ProgressBar::new(total_chunks as u64);
503            rel_pb.set_style(
504                ProgressStyle::default_bar()
505                    .template("   [{elapsed_precise}] [{bar:40.yellow/blue}] {pos}/{len} chunks ({eta})")
506                    .expect("Invalid progress bar template")
507                    .progress_chars("=>-")
508            );
509            rel_pb.set_message("Extracting relationships");
510
511            for (idx, chunk) in chunks.iter().enumerate() {
512                rel_pb.set_message(format!("Chunk {}/{} (relationships)", idx + 1, total_chunks));
513                // Get entities that appear in this chunk
514                let chunk_entities: Vec<_> = all_entities
515                    .iter()
516                    .filter(|e| e.mentions.iter().any(|m| m.chunk_id == chunk.id))
517                    .cloned()
518                    .collect();
519
520                if chunk_entities.len() < 2 {
521                    rel_pb.inc(1);
522                    continue; // Need at least 2 entities for relationships
523                }
524
525                // Extract relationships
526                let relationships = extractor.extract_relationships(&chunk_entities, chunk)?;
527
528                // Add relationships to graph
529                for (source_id, target_id, relation_type) in relationships {
530                    let relationship = Relationship {
531                        source: source_id.clone(),
532                        target: target_id.clone(),
533                        relation_type: relation_type.clone(),
534                        confidence: self.config.graph.relationship_confidence_threshold,
535                        context: vec![chunk.id.clone()],
536                    };
537
538                    // Log errors for debugging relationship extraction issues
539                    if let Err(_e) = graph.add_relationship(relationship) {
540                        #[cfg(feature = "tracing")]
541                        tracing::debug!(
542                            "Failed to add relationship: {} -> {} ({}). Error: {}",
543                            source_id,
544                            target_id,
545                            relation_type,
546                            _e
547                        );
548                    }
549                }
550
551                rel_pb.inc(1);
552            }
553
554            rel_pb.finish_with_message("Relationship extraction complete");
555            }  // End of extract_relationships check
556        }  // End of pattern-based extraction
557
558        Ok(())
559    }
560
561    /// Build the knowledge graph from added documents (synchronous fallback)
562    ///
563    /// This is a synchronous version for when the async feature is not enabled.
564    /// Only supports pattern-based entity extraction.
565    #[cfg(not(feature = "async"))]
566    pub fn build_graph(&mut self) -> Result<()> {
567        use crate::entity::EntityExtractor;
568
569        let graph = self.knowledge_graph.as_mut()
570            .ok_or_else(|| GraphRAGError::Config {
571                message: "Knowledge graph not initialized".to_string(),
572            })?;
573
574        let chunks: Vec<_> = graph.chunks().cloned().collect();
575
576        #[cfg(feature = "tracing")]
577        tracing::info!("Using pattern-based entity extraction (sync mode)");
578
579        let extractor = EntityExtractor::new(self.config.entities.min_confidence)?;
580
581        for chunk in &chunks {
582            let entities = extractor.extract_from_chunk(chunk)?;
583            for entity in entities {
584                graph.add_entity(entity)?;
585            }
586        }
587
588        // Extract relationships if enabled
589        if self.config.graph.extract_relationships {
590            let all_entities: Vec<_> = graph.entities().cloned().collect();
591
592            for chunk in &chunks {
593                let chunk_entities: Vec<_> = all_entities
594                    .iter()
595                    .filter(|e| e.mentions.iter().any(|m| m.chunk_id == chunk.id))
596                    .cloned()
597                    .collect();
598
599                if chunk_entities.len() < 2 {
600                    continue;
601                }
602
603                let relationships = extractor.extract_relationships(&chunk_entities, chunk)?;
604
605                for (source_id, target_id, relation_type) in relationships {
606                    let relationship = Relationship {
607                        source: source_id.clone(),
608                        target: target_id.clone(),
609                        relation_type: relation_type.clone(),
610                        confidence: self.config.graph.relationship_confidence_threshold,
611                        context: vec![chunk.id.clone()],
612                    };
613
614                    if let Err(_e) = graph.add_relationship(relationship) {
615                        #[cfg(feature = "tracing")]
616                        tracing::debug!(
617                            "Failed to add relationship: {} -> {} ({}). Error: {}",
618                            source_id,
619                            target_id,
620                            relation_type,
621                            _e
622                        );
623                    }
624                }
625            }
626        }
627
628        Ok(())
629    }
630
631    /// Query the system for relevant information
632    #[cfg(feature = "async")]
633    pub async fn ask(&mut self, query: &str) -> Result<String> {
634        self.ensure_initialized()?;
635
636        if self.has_documents() && !self.has_graph() {
637            self.build_graph().await?;
638        }
639
640        // Get full search results with metadata
641        let search_results = self.query_internal_with_results(query)?;
642
643        // If Ollama is enabled, generate semantic answer using LLM
644        if self.config.ollama.enabled {
645            return self.generate_semantic_answer_from_results(query, &search_results).await;
646        }
647
648        // Fallback: return formatted search results
649        let formatted: Vec<String> = search_results
650            .into_iter()
651            .map(|r| format!("{} (score: {:.2})", r.content, r.score))
652            .collect();
653        Ok(formatted.join("\n"))
654    }
655
656    /// Query the system for relevant information (synchronous version)
657    #[cfg(not(feature = "async"))]
658    pub fn ask(&mut self, query: &str) -> Result<String> {
659        self.ensure_initialized()?;
660
661        if self.has_documents() && !self.has_graph() {
662            self.build_graph()?;
663        }
664
665        let results = self.query_internal(query)?;
666        Ok(results.join("\n"))
667    }
668
669    /// Internal query method (public for CLI access to raw results)
670    pub fn query_internal(&mut self, query: &str) -> Result<Vec<String>> {
671        let retrieval = self.retrieval_system.as_mut()
672            .ok_or_else(|| GraphRAGError::Config {
673                message: "Retrieval system not initialized".to_string(),
674            })?;
675
676        let graph = self.knowledge_graph.as_mut()
677            .ok_or_else(|| GraphRAGError::Config {
678                message: "Knowledge graph not initialized".to_string(),
679            })?;
680
681        // Add embeddings to graph if not already present
682        retrieval.add_embeddings_to_graph(graph)?;
683
684        // Use hybrid query for real semantic search
685        let search_results = retrieval.hybrid_query(query, graph)?;
686
687        // Convert search results to strings
688        let result_strings: Vec<String> = search_results
689            .into_iter()
690            .map(|r| format!("{} (score: {:.2})", r.content, r.score))
691            .collect();
692
693        Ok(result_strings)
694    }
695
696    /// Internal query method that returns full SearchResult objects
697    fn query_internal_with_results(&mut self, query: &str) -> Result<Vec<retrieval::SearchResult>> {
698        let retrieval = self.retrieval_system.as_mut()
699            .ok_or_else(|| GraphRAGError::Config {
700                message: "Retrieval system not initialized".to_string(),
701            })?;
702
703        let graph = self.knowledge_graph.as_mut()
704            .ok_or_else(|| GraphRAGError::Config {
705                message: "Knowledge graph not initialized".to_string(),
706            })?;
707
708        // Add embeddings to graph if not already present
709        retrieval.add_embeddings_to_graph(graph)?;
710
711        // Use hybrid query for real semantic search
712        retrieval.hybrid_query(query, graph)
713    }
714
715
716    /// Generate semantic answer from SearchResult objects
717    #[cfg(feature = "async")]
718    async fn generate_semantic_answer_from_results(&self, query: &str, search_results: &[retrieval::SearchResult]) -> Result<String> {
719        use crate::ollama::OllamaClient;
720
721        let graph = self.knowledge_graph.as_ref()
722            .ok_or_else(|| GraphRAGError::Config {
723                message: "Knowledge graph not initialized".to_string(),
724            })?;
725
726        // Build context from search results by fetching actual chunk content
727        let mut context_parts = Vec::new();
728
729        for result in search_results.iter().take(5) {
730            // For entity results, fetch the chunks where the entity appears
731            if result.result_type == retrieval::ResultType::Entity && !result.source_chunks.is_empty() {
732                // Get the first few chunks where this entity is mentioned
733                for chunk_id_str in result.source_chunks.iter().take(2) {
734                    let chunk_id = ChunkId::new(chunk_id_str.clone());
735                    if let Some(chunk) = graph.chunks().find(|c| c.id == chunk_id) {
736                        let chunk_excerpt = if chunk.content.len() > 400 {
737                            format!("{}...", &chunk.content[..400])
738                        } else {
739                            chunk.content.clone()
740                        };
741
742                        context_parts.push(format!(
743                            "[Entity: {} | Relevance: {:.2}]\n{}",
744                            result.content.split(" (score:").next().unwrap_or(&result.content),
745                            result.score,
746                            chunk_excerpt
747                        ));
748                    }
749                }
750            }
751            // For chunk results, use the content directly
752            else if result.result_type == retrieval::ResultType::Chunk {
753                let chunk_excerpt = if result.content.len() > 400 {
754                    format!("{}...", &result.content[..400])
755                } else {
756                    result.content.clone()
757                };
758
759                context_parts.push(format!(
760                    "[Chunk | Relevance: {:.2}]\n{}",
761                    result.score,
762                    chunk_excerpt
763                ));
764            }
765            // For other result types, use content as-is
766            else {
767                context_parts.push(format!(
768                    "[{:?} | Relevance: {:.2}]\n{}",
769                    result.result_type,
770                    result.score,
771                    result.content
772                ));
773            }
774        }
775
776        let context = context_parts.join("\n\n---\n\n");
777
778        if context.trim().is_empty() {
779            return Ok("No relevant information found in the knowledge graph.".to_string());
780        }
781
782        // Create Ollama client
783        let client = OllamaClient::new(self.config.ollama.clone());
784
785        // Build prompt for semantic answer generation with RAG best practices (2025)
786        let prompt = format!(
787            "You are a knowledgeable assistant specialized in answering questions based on a knowledge graph.\n\n\
788            IMPORTANT INSTRUCTIONS:\n\
789            - Answer ONLY using information from the provided context below\n\
790            - Provide direct, conversational, and natural responses\n\
791            - Do NOT show your reasoning process or use <think> tags\n\
792            - If the context lacks sufficient information, clearly state: \"I don't have enough information to answer this question.\"\n\
793            - Keep answers concise but complete (2-4 sentences)\n\
794            - Use a natural, helpful tone as if speaking to a person\n\n\
795            CONTEXT:\n\
796            {}\n\n\
797            QUESTION: {}\n\n\
798            ANSWER (direct response only, no reasoning):",
799            context, query
800        );
801
802        // Generate answer using LLM
803        match client.generate(&prompt).await {
804            Ok(answer) => {
805                // Post-processing: Remove <think> tags if present (Qwen3)
806                let cleaned_answer = Self::remove_thinking_tags(&answer);
807                Ok(cleaned_answer.trim().to_string())
808            },
809            Err(e) => {
810                #[cfg(feature = "tracing")]
811                tracing::warn!("LLM generation failed: {}. Falling back to search results.", e);
812
813                // Fallback: return formatted search results
814                Ok(format!("Relevant information from knowledge graph:\n\n{}", context))
815            }
816        }
817    }
818
819    /// Remove thinking tags from LLM output (for Qwen3 and similar models)
820    ///
821    /// Qwen3 often outputs <think>...</think> tags showing internal reasoning.
822    /// This function removes all such tags and their content.
823    #[cfg(feature = "async")]
824    fn remove_thinking_tags(text: &str) -> String {
825        // Remove all <think>...</think> blocks (including nested ones)
826        // Use a simple approach: repeatedly remove until no more found
827        let mut result = text.to_string();
828
829        loop {
830            // Find opening tag
831            if let Some(start) = result.find("<think>") {
832                // Find corresponding closing tag
833                if let Some(end) = result[start..].find("</think>") {
834                    // Remove the entire block
835                    let end_pos = start + end + "</think>".len();
836                    result.replace_range(start..end_pos, "");
837                } else {
838                    // No closing tag found, just remove opening tag
839                    result.replace_range(start..start + "<think>".len(), "");
840                    break;
841                }
842            } else {
843                // No more opening tags
844                break;
845            }
846        }
847
848        result.trim().to_string()
849    }
850
851    /// Check if system is initialized
852    pub fn is_initialized(&self) -> bool {
853        self.knowledge_graph.is_some() && self.retrieval_system.is_some()
854    }
855
856    /// Check if documents have been added
857    pub fn has_documents(&self) -> bool {
858        if let Some(graph) = &self.knowledge_graph {
859            graph.chunks().count() > 0
860        } else {
861            false
862        }
863    }
864
865    /// Check if graph has been built
866    pub fn has_graph(&self) -> bool {
867        if let Some(graph) = &self.knowledge_graph {
868            graph.entities().count() > 0
869        } else {
870            false
871        }
872    }
873
874    /// Get a reference to the knowledge graph
875    pub fn knowledge_graph(&self) -> Option<&KnowledgeGraph> {
876        self.knowledge_graph.as_ref()
877    }
878
879    /// Get entity details by ID
880    pub fn get_entity(&self, entity_id: &str) -> Option<&Entity> {
881        if let Some(graph) = &self.knowledge_graph {
882            graph.entities().find(|e| e.id.0 == entity_id)
883        } else {
884            None
885        }
886    }
887
888    /// Get all relationships involving an entity
889    pub fn get_entity_relationships(&self, entity_id: &str) -> Vec<&Relationship> {
890        if let Some(graph) = &self.knowledge_graph {
891            let entity_id_obj = EntityId::new(entity_id.to_string());
892            graph.relationships()
893                .filter(|r| r.source == entity_id_obj || r.target == entity_id_obj)
894                .collect()
895        } else {
896            Vec::new()
897        }
898    }
899
900    /// Get chunk by ID
901    pub fn get_chunk(&self, chunk_id: &str) -> Option<&TextChunk> {
902        if let Some(graph) = &self.knowledge_graph {
903            graph.chunks().find(|c| c.id.0 == chunk_id)
904        } else {
905            None
906        }
907    }
908
909    /// Query using PageRank-based retrieval (when pagerank feature is enabled)
910    #[cfg(all(feature = "pagerank", feature = "async"))]
911    pub async fn ask_with_pagerank(&mut self, query: &str) -> Result<Vec<retrieval::pagerank_retrieval::ScoredResult>> {
912        use crate::retrieval::pagerank_retrieval::PageRankRetrievalSystem;
913
914        self.ensure_initialized()?;
915
916        if self.has_documents() && !self.has_graph() {
917            self.build_graph().await?;
918        }
919
920        let graph = self.knowledge_graph.as_ref()
921            .ok_or_else(|| GraphRAGError::Config {
922                message: "Knowledge graph not initialized".to_string(),
923            })?;
924
925        let pagerank_system = PageRankRetrievalSystem::new(10);
926        pagerank_system.search_with_pagerank(query, graph, Some(5))
927    }
928
929    /// Query using PageRank-based retrieval (when pagerank feature is enabled, sync version)
930    #[cfg(all(feature = "pagerank", not(feature = "async")))]
931    pub fn ask_with_pagerank(&mut self, query: &str) -> Result<Vec<retrieval::pagerank_retrieval::ScoredResult>> {
932        use crate::retrieval::pagerank_retrieval::PageRankRetrievalSystem;
933
934        self.ensure_initialized()?;
935
936        if self.has_documents() && !self.has_graph() {
937            self.build_graph()?;
938        }
939
940        let graph = self.knowledge_graph.as_ref()
941            .ok_or_else(|| GraphRAGError::Config {
942                message: "Knowledge graph not initialized".to_string(),
943            })?;
944
945        let pagerank_system = PageRankRetrievalSystem::new(10);
946        pagerank_system.search_with_pagerank(query, graph, Some(5))
947    }
948
949    /// Get a mutable reference to the knowledge graph
950    pub fn knowledge_graph_mut(&mut self) -> Option<&mut KnowledgeGraph> {
951        self.knowledge_graph.as_mut()
952    }
953
954    // ================================
955    // CONVENIENCE CONSTRUCTORS
956    // ================================
957
958    /// Create GraphRAG from a JSON5 config file
959    ///
960    /// This is a convenience method that loads a JSON5 config file and creates a GraphRAG instance.
961    ///
962    /// # Examples
963    ///
964    /// ```rust,no_run
965    /// # #[cfg(feature = "json5-support")]
966    /// # async fn example() -> graphrag_core::Result<()> {
967    /// use graphrag_core::GraphRAG;
968    ///
969    /// let graphrag = GraphRAG::from_json5_file("config/templates/symposium_zero_cost.graphrag.json5")?;
970    /// # Ok(())
971    /// # }
972    /// ```
973    #[cfg(feature = "json5-support")]
974    pub fn from_json5_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
975        use crate::config::json5_loader::load_json5_config;
976        use crate::config::setconfig::SetConfig;
977
978        let set_config = load_json5_config::<SetConfig, _>(path)?;
979        let config = set_config.to_graphrag_config();
980        Self::new(config)
981    }
982
983    /// Create GraphRAG from a config file (auto-detect format: TOML, JSON5, YAML, JSON)
984    ///
985    /// This method automatically detects the config file format based on the file extension
986    /// and loads it appropriately.
987    ///
988    /// Supported formats:
989    /// - `.toml` - TOML format
990    /// - `.json5` - JSON5 format (requires `json5-support` feature)
991    /// - `.yaml`, `.yml` - YAML format
992    /// - `.json` - JSON format
993    ///
994    /// # Examples
995    ///
996    /// ```rust,no_run
997    /// # async fn example() -> graphrag_core::Result<()> {
998    /// use graphrag_core::GraphRAG;
999    ///
1000    /// // Auto-detect format from extension
1001    /// let graphrag = GraphRAG::from_config_file("config/templates/symposium_zero_cost.graphrag.json5")?;
1002    /// # Ok(())
1003    /// # }
1004    /// ```
1005    pub fn from_config_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
1006        use crate::config::setconfig::SetConfig;
1007
1008        let set_config = SetConfig::from_file(path)?;
1009        let config = set_config.to_graphrag_config();
1010        Self::new(config)
1011    }
1012
1013    /// Complete workflow: load config + process document + build graph
1014    ///
1015    /// This is the most convenient method for getting started with GraphRAG. It:
1016    /// 1. Loads the config file (auto-detecting the format)
1017    /// 2. Initializes the GraphRAG system
1018    /// 3. Loads and processes the document
1019    /// 4. Builds the knowledge graph
1020    ///
1021    /// After this method completes, the GraphRAG instance is ready to answer queries.
1022    ///
1023    /// # Examples
1024    ///
1025    /// ```rust,no_run
1026    /// # #[cfg(feature = "async")]
1027    /// # async fn example() -> graphrag_core::Result<()> {
1028    /// use graphrag_core::GraphRAG;
1029    ///
1030    /// // Complete workflow in one call
1031    /// let mut graphrag = GraphRAG::from_config_and_document(
1032    ///     "config/templates/symposium_zero_cost.graphrag.json5",
1033    ///     "docs-example/Symposium.txt"
1034    /// ).await?;
1035    ///
1036    /// // Ready to query
1037    /// let answer = graphrag.ask("What is Socrates' view on love?").await?;
1038    /// println!("Answer: {}", answer);
1039    /// # Ok(())
1040    /// # }
1041    /// ```
1042    #[cfg(feature = "async")]
1043    pub async fn from_config_and_document<P1, P2>(
1044        config_path: P1,
1045        document_path: P2
1046    ) -> Result<Self>
1047    where
1048        P1: AsRef<std::path::Path>,
1049        P2: AsRef<std::path::Path>,
1050    {
1051        // Load config
1052        let mut graphrag = Self::from_config_file(config_path)?;
1053
1054        // Initialize
1055        graphrag.initialize()?;
1056
1057        // Load document
1058        let content = std::fs::read_to_string(document_path)
1059            .map_err(GraphRAGError::Io)?;
1060
1061        graphrag.add_document_from_text(&content)?;
1062
1063        // Build graph
1064        graphrag.build_graph().await?;
1065
1066        Ok(graphrag)
1067    }
1068
1069    /// Ensure system is initialized
1070    fn ensure_initialized(&mut self) -> Result<()> {
1071        if !self.is_initialized() {
1072            self.initialize()
1073        } else {
1074            Ok(())
1075        }
1076    }
1077}
1078
1079#[cfg(test)]
1080mod tests {
1081    use super::*;
1082
1083    #[test]
1084    fn test_graphrag_creation() {
1085        let config = Config::default();
1086        let graphrag = GraphRAG::new(config);
1087        assert!(graphrag.is_ok());
1088    }
1089
1090    // TODO: Enable when GraphRAGBuilder is fully implemented
1091    // #[test]
1092    // fn test_builder_pattern() {
1093    //     let graphrag = GraphRAG::builder()
1094    //         .with_preset(ConfigPreset::Basic)
1095    //         .build();
1096    //     assert!(graphrag.is_ok());
1097    // }
1098}