rexis_rag/graph_retrieval/mod.rs
1//! # Graph-Based Retrieval Module
2//!
3//! Advanced knowledge graph construction and graph-based retrieval for RAG systems.
4//!
5//! This module enables sophisticated reasoning over structured knowledge by building
6//! knowledge graphs from documents and leveraging graph traversal algorithms for
7//! enhanced retrieval. It provides both automatic graph construction and manual
8//! graph management capabilities.
9//!
10//! ## Features
11//!
12//! - **Knowledge Graph Construction**: Automatic entity and relationship extraction
13//! - **Graph Algorithms**: PageRank, community detection, path finding
14//! - **Entity Recognition**: Multi-type entity extraction with confidence scoring
15//! - **Relationship Extraction**: Semantic relationship detection between entities
16//! - **Graph Storage**: Efficient storage and indexing for large graphs
17//! - **Query Expansion**: Graph-based query enhancement and expansion
18//! - **Hybrid Retrieval**: Combine graph and vector retrieval
19//! - **Graph Analytics**: Centrality measures, clustering, and graph statistics
20//!
21//! ## Use Cases
22//!
23//! - **Knowledge Base Construction**: Build structured knowledge from documents
24//! - **Question Answering**: Multi-hop reasoning across connected entities
25//! - **Recommendation Systems**: Find related entities and concepts
26//! - **Fact Verification**: Verify claims using graph-based evidence
27//! - **Research Discovery**: Find connections between research topics
28//!
29//! ## Examples
30//!
31//! ### Building a Knowledge Graph
32//! ```rust
33//! use rrag::graph_retrieval::{GraphBuilder, EntityExtractor, RelationshipExtractor};
34//!
35//! # async fn example() -> rrag::RragResult<()> {
36//! let mut builder = GraphBuilder::new()
37//! .with_entity_extraction(true)
38//! .with_relationship_detection(true)
39//! .build();
40//!
41//! // Add documents to build the graph
42//! let documents = vec![
43//! "Albert Einstein was born in Germany in 1879.",
44//! "Einstein developed the theory of relativity.",
45//! "The theory of relativity revolutionized physics."
46//! ];
47//!
48//! for doc in documents {
49//! builder.add_document(doc).await?;
50//! }
51//!
52//! let graph = builder.build().await?;
53//! tracing::debug!("Built graph with {} nodes and {} edges",
54//! graph.node_count(),
55//! graph.edge_count());
56//! # Ok(())
57//! # }
58//! ```
59//!
60//! ### Graph-Based Query Expansion
61//! ```rust
62//! use rrag::graph_retrieval::{GraphQueryExpander, ExpansionStrategy};
63//!
64//! # async fn example() -> rrag::RragResult<()> {
65//! let expander = GraphQueryExpander::new(graph)
66//! .with_strategy(ExpansionStrategy::SemanticPath)
67//! .with_max_hops(2);
68//!
69//! let original_query = "Einstein's theories";
70//! let expanded = expander.expand_query(original_query).await?;
71//!
72//! tracing::debug!("Original: {}", original_query);
73//! tracing::debug!("Expanded: {:?}", expanded.expanded_terms);
74//! // Output might include: ["theory of relativity", "special relativity",
75//! // "general relativity", "physics", "German physicist"]
76//! # Ok(())
77//! # }
78//! ```
79//!
80//! ### Multi-Hop Reasoning
81//! ```rust
82//! use rrag::graph_retrieval::{GraphRetriever, TraversalStrategy};
83//!
84//! # async fn example() -> rrag::RragResult<()> {
85//! let retriever = GraphRetriever::new(graph)
86//! .with_traversal_strategy(TraversalStrategy::BreadthFirst)
87//! .with_max_depth(3);
88//!
89//! // Find connections between entities
90//! let connections = retriever.find_path_between(
91//! "Einstein",
92//! "quantum mechanics"
93//! ).await?;
94//!
95//! for connection in connections {
96//! tracing::debug!("Path: {}", connection.format_path());
97//! }
98//! # Ok(())
99//! # }
100//! ```
101//!
102//! ### Entity-Centric Retrieval
103//! ```rust
104//! use rrag::graph_retrieval::{EntityCentricRetriever, RetrievalOptions};
105//!
106//! # async fn example() -> rrag::RragResult<()> {
107//! let retriever = EntityCentricRetriever::new(graph);
108//!
109//! let query = "What did Einstein contribute to physics?";
110//! let results = retriever.retrieve_with_entities(
111//! query,
112//! RetrievalOptions::new()
113//! .with_entity_expansion(true)
114//! .with_relationship_traversal(true)
115//! ).await?;
116//!
117//! for result in results {
118//! tracing::debug!("Document: {}", result.content);
119//! tracing::debug!("Related entities: {:?}", result.entities);
120//! tracing::debug!("Relationship path: {:?}", result.path);
121//! }
122//! # Ok(())
123//! # }
124//! ```
125//!
126//! ### Graph Analytics
127//! ```rust
128//! use rrag::graph_retrieval::{GraphAnalyzer, CentralityMetric};
129//!
130//! # async fn example() -> rrag::RragResult<()> {
131//! let analyzer = GraphAnalyzer::new(graph);
132//!
133//! // Find most important entities
134//! let pagerank_scores = analyzer.compute_centrality(
135//! CentralityMetric::PageRank
136//! ).await?;
137//!
138//! let top_entities = pagerank_scores.top_k(10);
139//! for (entity, score) in top_entities {
140//! tracing::debug!("Entity: {}, Importance: {:.3}", entity, score);
141//! }
142//!
143//! // Detect communities
144//! let communities = analyzer.detect_communities().await?;
145//! for (idx, community) in communities.iter().enumerate() {
146//! tracing::debug!("Community {}: {:?}", idx, community.entities);
147//! }
148//! # Ok(())
149//! # }
150//! ```
151//!
152//! ## Performance Optimization
153//!
154//! - **Parallel Processing**: Multi-threaded entity extraction
155//! - **Batch Operations**: Process multiple documents together
156//! - **Graph Indexing**: Pre-built indexes for fast traversal
157//! - **Caching**: Cache frequently accessed graph patterns
158//! - **Memory Mapping**: Efficient storage for large graphs
159//! - **Incremental Updates**: Add nodes/edges without rebuilding
160
161pub mod algorithms;
162pub mod builder;
163pub mod config;
164pub mod entity;
165pub mod graph;
166pub mod query_expansion;
167pub mod retriever;
168pub mod storage;
169
170// Re-exports
171pub use algorithms::{GraphAlgorithms, PageRankConfig, PathFindingConfig, TraversalConfig};
172pub use builder::{GraphBuildConfig, GraphRetrievalBuilder};
173pub use config::{AlgorithmConfig, GraphConfig, GraphConfigBuilder};
174pub use entity::{Entity, EntityExtractor, EntityType, RelationType, Relationship};
175pub use graph::{EdgeType, GraphEdge, GraphMetrics, GraphNode, KnowledgeGraph, NodeType};
176pub use query_expansion::{ExpansionResult, ExpansionStrategy, QueryExpander};
177pub use retriever::{GraphRetrievalConfig, GraphRetriever, GraphSearchResult};
178pub use storage::GraphStorageConfig;
179pub use storage::{GraphIndex, GraphQuery, GraphQueryResult, GraphStorage};
180
181use crate::RragError;
182
183/// Graph-based retrieval error types
184#[derive(Debug, thiserror::Error)]
185pub enum GraphError {
186 #[error("Entity extraction failed: {message}")]
187 EntityExtraction { message: String },
188
189 #[error("Graph construction failed: {message}")]
190 GraphConstruction { message: String },
191
192 #[error("Graph algorithm error: {algorithm} - {message}")]
193 Algorithm { algorithm: String, message: String },
194
195 #[error("Graph storage error: {operation} - {message}")]
196 Storage { operation: String, message: String },
197
198 #[error("Query expansion failed: {strategy} - {message}")]
199 QueryExpansion { strategy: String, message: String },
200
201 #[error("Graph index error: {message}")]
202 Index { message: String },
203}
204
205impl From<GraphError> for RragError {
206 fn from(err: GraphError) -> Self {
207 RragError::retrieval(err.to_string())
208 }
209}