graphrag_core/lib.rs
1//! # GraphRAG Core
2//!
3//! Portable core library for GraphRAG - works on both native and WASM platforms.
4//!
5//! This is the foundational crate that provides:
6//! - Knowledge graph construction and management
7//! - Entity extraction and linking
8//! - Vector embeddings and similarity search
9//! - Graph algorithms (PageRank, community detection)
10//! - Retrieval systems (semantic, keyword, hybrid)
11//! - Caching and optimization
12//!
13//! ## Platform Support
14//!
15//! - **Native**: Full feature set with optional CUDA/Metal GPU acceleration
16//! - **WASM**: Browser-compatible with Voy vector search and Candle embeddings
17//!
18//! ## Feature Flags
19//!
20//! - `wasm`: Enable WASM compatibility (uses Voy instead of HNSW)
21//! - `cuda`: Enable NVIDIA GPU acceleration via Candle
22//! - `metal`: Enable Apple Silicon GPU acceleration
23//! - `webgpu`: Enable WebGPU acceleration for browser (via Burn)
24//! - `pagerank`: Enable PageRank-based retrieval
25//! - `lightrag`: Enable LightRAG optimizations (6000x token reduction)
26//! - `caching`: Enable intelligent LLM response caching
27//!
28//! ## Quick Start
29//!
30//! ```rust
31//! use graphrag_core::{GraphRAG, Config};
32//!
33//! # fn example() -> graphrag_core::Result<()> {
34//! let config = Config::default();
35//! let mut graphrag = GraphRAG::new(config)?;
36//! graphrag.initialize()?;
37//! # Ok(())
38//! # }
39//! ```
40
41#![warn(missing_docs)]
42#![warn(clippy::all)]
43// Note: WASM with wasm-bindgen DOES use std, so we don't disable it
44
45// ================================
46// MODULE DECLARATIONS
47// ================================
48
49// Core modules (always available)
50/// Configuration management and loading
51pub mod config;
52/// Core traits and types
53pub mod core;
54/// Entity extraction and management
55pub mod entity;
56/// Text generation and LLM interactions (async feature only)
57#[cfg(feature = "async")]
58pub mod generation;
59/// Graph data structures and algorithms
60pub mod graph;
61/// Retrieval strategies and implementations
62pub mod retrieval;
63/// Storage backends and persistence
64#[cfg(any(
65 feature = "memory-storage",
66 feature = "persistent-storage",
67 feature = "async"
68))]
69pub mod storage;
70/// Text processing and chunking
71pub mod text;
72/// Vector operations and embeddings
73pub mod vector;
74
75/// Builder pattern implementations
76pub mod builder;
77/// Embedding generation and providers
78pub mod embeddings;
79/// Natural language processing utilities
80pub mod nlp;
81/// Ollama LLM integration
82pub mod ollama;
83/// Persistence layer for knowledge graphs (workspace management always available)
84pub mod persistence;
85/// Query processing and execution
86pub mod query;
87/// Text summarization capabilities
88pub mod summarization;
89
90// Pipeline modules
91/// Data processing pipelines
92pub mod pipeline;
93
94// Advanced features (feature-gated)
95#[cfg(feature = "parallel-processing")]
96pub mod parallel;
97
98#[cfg(feature = "lightrag")]
99/// LightRAG dual-level retrieval optimization
100pub mod lightrag;
101
102/// Composable pipeline executor for build-graph operations
103pub mod pipeline_executor;
104
105// Utility modules
106/// Reranking utilities for improving search result quality
107pub mod reranking;
108
109/// Monitoring, benchmarking, and performance tracking
110pub mod monitoring;
111
112/// RAG answer evaluation and criticism
113pub mod critic;
114
115/// Evaluation framework for query results and pipeline validation
116pub mod evaluation;
117
118/// Graph optimization (weight optimization, DW-GRPO)
119#[cfg(feature = "async")]
120pub mod optimization;
121
122/// API endpoints and handlers
123#[cfg(feature = "api")]
124pub mod api;
125
126/// Inference module for model predictions
127pub mod inference;
128
129/// Multi-document corpus processing
130#[cfg(feature = "corpus-processing")]
131pub mod corpus;
132
133// Feature-gated modules
134#[cfg(feature = "async")]
135/// Async GraphRAG implementation
136pub mod async_graphrag;
137
138#[cfg(feature = "async")]
139/// Async processing pipelines
140pub mod async_processing;
141
142#[cfg(feature = "caching")]
143/// Caching utilities for LLM responses
144pub mod caching;
145
146#[cfg(feature = "function-calling")]
147/// Function calling capabilities for LLMs
148pub mod function_calling;
149
150#[cfg(feature = "incremental")]
151/// Incremental graph updates
152pub mod incremental;
153
154#[cfg(feature = "rograg")]
155/// ROGRAG (Robustly Optimized GraphRAG) implementation
156pub mod rograg;
157
158// Future utility modules (optional, not currently needed):
159// pub mod automatic_entity_linking; // Advanced entity linking
160// pub mod phase_saver; // Phase state persistence
161
162// ================================
163// PUBLIC API EXPORTS
164// ================================
165
166/// Prelude module containing the most commonly used types
167///
168/// Import everything you need with a single line:
169/// ```rust
170/// use graphrag_core::prelude::*;
171/// ```
172///
173/// This includes:
174/// - `GraphRAG` - The main orchestrator
175/// - `Config` - Configuration management
176/// - `GraphRAGBuilder` - Fluent configuration builder
177/// - Core types: `Document`, `Entity`, `Relationship`, `TextChunk`
178/// - Error handling: `Result`, `GraphRAGError`
179pub mod prelude {
180 // Main entry point
181 pub use crate::GraphRAG;
182
183 // Configuration & Builders
184 pub use crate::builder::GraphRAGBuilder;
185 pub use crate::builder::TypedBuilder;
186 pub use crate::config::Config;
187
188 // Error handling
189 pub use crate::core::{GraphRAGError, Result};
190
191 // Core data types
192 pub use crate::core::{
193 ChunkId, Document, DocumentId, Entity, EntityId, EntityMention, KnowledgeGraph,
194 Relationship, TextChunk,
195 };
196
197 // Search results and explained answers
198 pub use crate::retrieval::SearchResult;
199 pub use crate::retrieval::{ExplainedAnswer, ReasoningStep, SourceReference, SourceType};
200
201 // Pipeline executor
202 pub use crate::pipeline_executor::{PipelineExecutor, PipelineReport};
203
204 // Config deserialization helper
205 pub use crate::config::setconfig::SetConfig;
206}
207
208// Re-export core types
209pub use crate::config::Config;
210pub use crate::core::{
211 ChunkId, Document, DocumentId, Entity, EntityId, EntityMention, ErrorContext, ErrorSeverity,
212 ErrorSuggestion, GraphRAGError, KnowledgeGraph, Relationship, Result, TextChunk,
213};
214
215// Re-export core traits (async feature only)
216#[cfg(feature = "async")]
217pub use crate::core::traits::{
218 Embedder, EntityExtractor, GraphStore, LanguageModel, Retriever, Storage, VectorStore,
219};
220
221// Storage exports (when storage features are enabled)
222#[cfg(feature = "memory-storage")]
223pub use crate::storage::MemoryStorage;
224
225// Re-export builder (GraphRAGBuilder exists, ConfigPreset and LLMProvider not yet implemented)
226pub use crate::builder::GraphRAGBuilder;
227// Note: GraphRAG struct is already public (defined at line 247)
228// Note: builder::GraphRAG is a placeholder - the real implementation is the main GraphRAG struct
229
230// Feature-gated exports
231#[cfg(feature = "lightrag")]
232pub use crate::lightrag::{
233 DualLevelKeywords, DualLevelRetriever, DualRetrievalConfig, DualRetrievalResults,
234 KeywordExtractor, KeywordExtractorConfig, MergeStrategy, SemanticSearcher,
235};
236
237#[cfg(feature = "pagerank")]
238pub use crate::graph::pagerank::{PageRankConfig, PersonalizedPageRank};
239
240#[cfg(feature = "leiden")]
241pub use crate::graph::leiden::{HierarchicalCommunities, LeidenCommunityDetector, LeidenConfig};
242
243#[cfg(feature = "cross-encoder")]
244pub use crate::reranking::cross_encoder::{
245 ConfidenceCrossEncoder, CrossEncoder, CrossEncoderConfig, RankedResult, RerankingStats,
246};
247
248#[cfg(feature = "pagerank")]
249pub use crate::retrieval::pagerank_retrieval::{PageRankRetrievalSystem, ScoredResult};
250
251#[cfg(feature = "pagerank")]
252pub use crate::retrieval::hipporag_ppr::{Fact, HippoRAGConfig, HippoRAGRetriever};
253
254// ================================
255// MAIN GRAPHRAG SYSTEM
256// ================================
257//
258// The `GraphRAG` orchestrator type lives in `graphrag.rs`. It is re-exported
259// here so external callers continue to import it via `graphrag_core::GraphRAG`.
260
261mod graphrag;
262pub use graphrag::GraphRAG;