oxirs_graphrag/
config.rs

1//! GraphRAG configuration
2
3use serde::{Deserialize, Serialize};
4
5/// GraphRAG configuration
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct GraphRAGConfig {
8    /// Number of seed nodes from vector search
9    #[serde(default = "default_top_k")]
10    pub top_k: usize,
11
12    /// Maximum number of seeds after fusion
13    #[serde(default = "default_max_seeds")]
14    pub max_seeds: usize,
15
16    /// Graph expansion hops
17    #[serde(default = "default_expansion_hops")]
18    pub expansion_hops: usize,
19
20    /// Maximum subgraph size (triples)
21    #[serde(default = "default_max_subgraph_size")]
22    pub max_subgraph_size: usize,
23
24    /// Maximum triples to include in LLM context
25    #[serde(default = "default_max_context_triples")]
26    pub max_context_triples: usize,
27
28    /// Enable community detection
29    #[serde(default = "default_enable_communities")]
30    pub enable_communities: bool,
31
32    /// Community detection algorithm
33    #[serde(default)]
34    pub community_algorithm: CommunityAlgorithm,
35
36    /// Fusion strategy
37    #[serde(default)]
38    pub fusion_strategy: FusionStrategy,
39
40    /// Weight for vector similarity scores (0.0 - 1.0)
41    #[serde(default = "default_vector_weight")]
42    pub vector_weight: f32,
43
44    /// Weight for keyword/BM25 scores (0.0 - 1.0)
45    #[serde(default = "default_keyword_weight")]
46    pub keyword_weight: f32,
47
48    /// Path patterns for graph expansion (SPARQL property paths)
49    #[serde(default)]
50    pub path_patterns: Vec<String>,
51
52    /// Similarity threshold for vector search
53    #[serde(default = "default_similarity_threshold")]
54    pub similarity_threshold: f32,
55
56    /// Cache size for query results
57    #[serde(default)]
58    pub cache_size: Option<usize>,
59
60    /// Enable query expansion
61    #[serde(default)]
62    pub enable_query_expansion: bool,
63
64    /// Enable hierarchical summarization
65    #[serde(default)]
66    pub enable_hierarchical_summary: bool,
67
68    /// Maximum community levels for hierarchical summarization
69    #[serde(default = "default_max_community_levels")]
70    pub max_community_levels: usize,
71
72    /// LLM model to use for generation
73    #[serde(default)]
74    pub llm_model: Option<String>,
75
76    /// Temperature for LLM generation
77    #[serde(default = "default_temperature")]
78    pub temperature: f32,
79
80    /// Maximum tokens for LLM response
81    #[serde(default = "default_max_tokens")]
82    pub max_tokens: usize,
83}
84
85impl Default for GraphRAGConfig {
86    fn default() -> Self {
87        Self {
88            top_k: default_top_k(),
89            max_seeds: default_max_seeds(),
90            expansion_hops: default_expansion_hops(),
91            max_subgraph_size: default_max_subgraph_size(),
92            max_context_triples: default_max_context_triples(),
93            enable_communities: default_enable_communities(),
94            community_algorithm: CommunityAlgorithm::default(),
95            fusion_strategy: FusionStrategy::default(),
96            vector_weight: default_vector_weight(),
97            keyword_weight: default_keyword_weight(),
98            path_patterns: vec![],
99            similarity_threshold: default_similarity_threshold(),
100            cache_size: Some(1000),
101            enable_query_expansion: false,
102            enable_hierarchical_summary: false,
103            max_community_levels: default_max_community_levels(),
104            llm_model: None,
105            temperature: default_temperature(),
106            max_tokens: default_max_tokens(),
107        }
108    }
109}
110
111/// Community detection algorithm
112#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
113pub enum CommunityAlgorithm {
114    /// Louvain algorithm (fast, good quality)
115    #[default]
116    Louvain,
117    /// Leiden algorithm (improved Louvain)
118    Leiden,
119    /// Label propagation (very fast, lower quality)
120    LabelPropagation,
121    /// Connected components (simplest)
122    ConnectedComponents,
123}
124
125/// Fusion strategy for combining retrieval results
126#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
127pub enum FusionStrategy {
128    /// Reciprocal Rank Fusion (default, robust)
129    #[default]
130    ReciprocalRankFusion,
131    /// Linear combination of scores
132    LinearCombination,
133    /// Take highest score per entity
134    HighestScore,
135    /// Learning-to-rank (requires model)
136    LearningToRank,
137}
138
139// Default value functions
140fn default_top_k() -> usize {
141    20
142}
143fn default_max_seeds() -> usize {
144    10
145}
146fn default_expansion_hops() -> usize {
147    2
148}
149fn default_max_subgraph_size() -> usize {
150    500
151}
152fn default_max_context_triples() -> usize {
153    100
154}
155fn default_enable_communities() -> bool {
156    true
157}
158fn default_vector_weight() -> f32 {
159    0.7
160}
161fn default_keyword_weight() -> f32 {
162    0.3
163}
164fn default_similarity_threshold() -> f32 {
165    0.7
166}
167fn default_max_community_levels() -> usize {
168    3
169}
170fn default_temperature() -> f32 {
171    0.7
172}
173fn default_max_tokens() -> usize {
174    2048
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    #[test]
182    fn test_default_config() {
183        let config = GraphRAGConfig::default();
184        assert_eq!(config.top_k, 20);
185        assert_eq!(config.expansion_hops, 2);
186        assert!(config.enable_communities);
187        assert_eq!(config.fusion_strategy, FusionStrategy::ReciprocalRankFusion);
188    }
189
190    #[test]
191    fn test_config_serialization() {
192        let config = GraphRAGConfig::default();
193        let json = serde_json::to_string(&config).unwrap();
194        let parsed: GraphRAGConfig = serde_json::from_str(&json).unwrap();
195        assert_eq!(parsed.top_k, config.top_k);
196    }
197}