1#![allow(dead_code)]
3
4use crate::config::Config;
5use crate::core::{GraphRAGError, Result};
6use std::fs;
7use std::path::Path;
8
9#[cfg(feature = "toml-support")]
10use toml;
11
12#[cfg(feature = "serde_json")]
13use serde_json;
14
15#[derive(Debug, Clone)]
17pub enum ConfigFormat {
18 Toml,
20 Json,
22 Yaml,
24}
25
26impl ConfigFormat {
27 pub fn from_extension(path: &str) -> Self {
29 let path = Path::new(path);
30 match path.extension().and_then(|s| s.to_str()) {
31 Some("toml") => ConfigFormat::Toml,
32 Some("json") => ConfigFormat::Json,
33 Some("yaml" | "yml") => ConfigFormat::Yaml,
34 _ => ConfigFormat::Toml, }
36 }
37}
38
39pub fn load_config(path: &str) -> Result<Config> {
41 let format = ConfigFormat::from_extension(path);
42
43 if !Path::new(path).exists() {
44 return Err(GraphRAGError::Config {
45 message: format!("Configuration file not found: {path}"),
46 });
47 }
48
49 let content = fs::read_to_string(path)?;
50
51 match format {
52 ConfigFormat::Toml => load_toml_config(&content),
53 ConfigFormat::Json => load_json_config(&content),
54 ConfigFormat::Yaml => load_yaml_config(&content),
55 }
56}
57
58#[cfg(feature = "toml-support")]
59fn load_toml_config(content: &str) -> Result<Config> {
60 let raw_config: RawConfig = toml::from_str(content).map_err(|e| GraphRAGError::Config {
61 message: format!("Failed to parse TOML config: {e}"),
62 })?;
63
64 Ok(convert_raw_config(raw_config))
65}
66
67#[cfg(not(feature = "toml-support"))]
68fn load_toml_config(_content: &str) -> Result<Config> {
69 Err(GraphRAGError::Config {
70 message: "TOML support not enabled. Enable 'toml-support' feature.".to_string(),
71 })
72}
73
74#[cfg(feature = "serde_json")]
75fn load_json_config(content: &str) -> Result<Config> {
76 let raw_config: RawConfig =
77 serde_json::from_str(content).map_err(|e| GraphRAGError::Config {
78 message: format!("Failed to parse JSON config: {e}"),
79 })?;
80
81 Ok(convert_raw_config(raw_config))
82}
83
84#[cfg(not(feature = "serde_json"))]
85fn load_json_config(_content: &str) -> Result<Config> {
86 Err(GraphRAGError::Config {
87 message: "JSON support not enabled. Enable 'serde_json' feature.".to_string(),
88 })
89}
90
91#[cfg(feature = "yaml-support")]
92fn load_yaml_config(content: &str) -> Result<Config> {
93 let raw_config: RawConfig =
94 serde_yaml::from_str(content).map_err(|e| GraphRAGError::Config {
95 message: format!("Failed to parse YAML config: {e}"),
96 })?;
97
98 Ok(convert_raw_config(raw_config))
99}
100
101#[cfg(not(feature = "yaml-support"))]
102fn load_yaml_config(_content: &str) -> Result<Config> {
103 Err(GraphRAGError::Config {
104 message: "YAML support not enabled. Enable 'yaml-support' feature.".to_string(),
105 })
106}
107
108#[derive(Debug, serde::Deserialize, Default)]
110#[allow(dead_code)]
111struct RawConfig {
112 #[serde(default)]
113 system: SystemConfig,
114 #[serde(default)]
115 features: FeaturesConfig,
116 #[serde(default)]
117 text_processing: RawTextProcessingConfig,
118 #[serde(default)]
119 entity_extraction: RawEntityExtractionConfig,
120 #[serde(default)]
121 graph_construction: RawGraphConstructionConfig,
122 #[serde(default)]
123 vector_processing: RawVectorProcessingConfig,
124 #[serde(default)]
125 query_processing: RawQueryProcessingConfig,
126 #[serde(default)]
127 adaptive_retrieval: RawAdaptiveRetrievalConfig,
128 #[serde(default)]
129 ranking_policies: RawRankingPoliciesConfig,
130 #[serde(default)]
131 reranking: RawRerankingConfig,
132 #[serde(default)]
133 generation: RawGenerationConfig,
134 #[serde(default)]
135 ollama: RawOllamaConfig,
136 #[serde(default)]
137 async_processing: RawAsyncProcessingConfig,
138 #[serde(default)]
139 function_calling: RawFunctionCallingConfig,
140 #[serde(default)]
141 monitoring: RawMonitoringConfig,
142 #[serde(default)]
143 storage: RawStorageConfig,
144 #[serde(default)]
145 parallel_processing: RawParallelProcessingConfig,
146 #[serde(default)]
147 logging: RawLoggingConfig,
148 #[serde(default)]
149 experimental: RawExperimentalConfig,
150}
151
152#[derive(Debug, serde::Deserialize, Default)]
153#[allow(dead_code)]
154struct SystemConfig {
155 log_level: Option<String>,
156 max_memory_mb: Option<u64>,
157 temp_dir: Option<String>,
158 output_dir: Option<String>,
159}
160
161#[derive(Debug, serde::Deserialize, Default)]
162#[allow(dead_code)]
163struct FeaturesConfig {
164 text_processing: Option<bool>,
165 entity_extraction: Option<bool>,
166 graph_construction: Option<bool>,
167 vector_processing: Option<bool>,
168 async_processing: Option<bool>,
169 function_calling: Option<bool>,
170 monitoring: Option<bool>,
171}
172
173#[derive(Debug, serde::Deserialize, Default)]
174#[allow(dead_code)]
175struct RawTextProcessingConfig {
176 enabled: Option<bool>,
177 chunk_size: Option<usize>,
178 chunk_overlap: Option<usize>,
179 min_chunk_size: Option<usize>,
180 max_chunk_size: Option<usize>,
181 normalize_whitespace: Option<bool>,
182 remove_artifacts: Option<bool>,
183 extract_keywords: Option<bool>,
184 keyword_min_score: Option<f64>,
185 #[serde(default)]
186 enrichment: Option<RawEnrichmentConfig>,
187}
188
189#[derive(Debug, serde::Deserialize, Default)]
190#[allow(dead_code)]
191struct RawEnrichmentConfig {
192 enabled: Option<bool>,
193 auto_detect_format: Option<bool>,
194 parser_type: Option<String>,
195 extract_keywords: Option<bool>,
196 max_keywords_per_chunk: Option<usize>,
197 use_tfidf: Option<bool>,
198 generate_summaries: Option<bool>,
199 min_chunk_length_for_summary: Option<usize>,
200 max_summary_length: Option<usize>,
201 extract_chapter: Option<bool>,
202 extract_section: Option<bool>,
203 extract_position: Option<bool>,
204 calculate_confidence: Option<bool>,
205 detect_headings: Option<bool>,
206 detect_numbering: Option<bool>,
207 detect_underlines: Option<bool>,
208 detect_all_caps: Option<bool>,
209 detect_roman_numerals: Option<bool>,
210}
211
212#[derive(Debug, serde::Deserialize, Default)]
213#[allow(dead_code)]
214struct RawEntityExtractionConfig {
215 enabled: Option<bool>,
216 min_confidence: Option<f32>,
217 use_gleaning: Option<bool>,
218 max_gleaning_rounds: Option<usize>,
219 gleaning_improvement_threshold: Option<f64>,
220 semantic_merging: Option<bool>,
221 merge_similarity_threshold: Option<f64>,
222 automatic_linking: Option<bool>,
223 linking_confidence_threshold: Option<f64>,
224 gleaning: Option<RawGleaningConfig>,
225}
226
227#[derive(Debug, serde::Deserialize, Default)]
228struct RawGleaningConfig {
229 focus_areas: Option<Vec<String>>,
230 context_window: Option<usize>,
231 llm_temperature: Option<f64>,
232}
233
234#[derive(Debug, serde::Deserialize, Default)]
235struct RawGraphConstructionConfig {
236 enabled: Option<bool>,
237 incremental_updates: Option<bool>,
238 use_pagerank: Option<bool>,
239 pagerank_damping: Option<f64>,
240 pagerank_iterations: Option<usize>,
241 pagerank_convergence: Option<f64>,
242 extract_relationships: Option<bool>,
243 relationship_confidence_threshold: Option<f64>,
244}
245
246#[derive(Debug, serde::Deserialize, Default)]
247struct RawVectorProcessingConfig {
248 enabled: Option<bool>,
249 embedding_model: Option<String>,
250 embedding_dimensions: Option<usize>,
251 use_hnsw_index: Option<bool>,
252 hnsw_ef_construction: Option<usize>,
253 hnsw_m: Option<usize>,
254 similarity_threshold: Option<f64>,
255}
256
257#[derive(Debug, serde::Deserialize, Default)]
258struct RawQueryProcessingConfig {
259 enabled: Option<bool>,
260 use_advanced_pipeline: Option<bool>,
261 use_intent_classification: Option<bool>,
262 use_concept_extraction: Option<bool>,
263 use_temporal_parsing: Option<bool>,
264 confidence_threshold: Option<f64>,
265 intent_classification: Option<RawIntentClassificationConfig>,
266}
267
268#[derive(Debug, serde::Deserialize, Default)]
269struct RawIntentClassificationConfig {
270 factual_patterns: Option<Vec<String>>,
271 relational_patterns: Option<Vec<String>>,
272 temporal_patterns: Option<Vec<String>>,
273 causal_patterns: Option<Vec<String>>,
274 comparative_patterns: Option<Vec<String>>,
275}
276
277#[derive(Debug, serde::Deserialize, Default)]
278struct RawAdaptiveRetrievalConfig {
279 enabled: Option<bool>,
280 default_strategies: Option<Vec<String>>,
281 strategy_weights: Option<std::collections::HashMap<String, f64>>,
282 dynamic_weighting: Option<bool>,
283 diversity_factor: Option<f64>,
284 max_results_per_strategy: Option<usize>,
285}
286
287#[derive(Debug, serde::Deserialize, Default)]
288struct RawRankingPoliciesConfig {
289 enabled: Option<bool>,
290 use_elbow_detection: Option<bool>,
291 use_top_k_diversity: Option<bool>,
292 use_threshold_filtering: Option<bool>,
293 use_intent_aware_ranking: Option<bool>,
294 use_confidence_filtering: Option<bool>,
295 elbow_detection: Option<RawElbowDetectionConfig>,
296 top_k: Option<RawTopKConfig>,
297 threshold: Option<RawThresholdConfig>,
298}
299
300#[derive(Debug, serde::Deserialize, Default)]
301struct RawElbowDetectionConfig {
302 min_results: Option<usize>,
303 max_results: Option<usize>,
304 smoothing_factor: Option<f64>,
305}
306
307#[derive(Debug, serde::Deserialize, Default)]
308struct RawTopKConfig {
309 k: Option<usize>,
310 diversity_threshold: Option<f64>,
311 entity_type_balance: Option<bool>,
312}
313
314#[derive(Debug, serde::Deserialize, Default)]
315struct RawThresholdConfig {
316 min_score: Option<f64>,
317 confidence_weight: Option<f64>,
318}
319
320#[derive(Debug, serde::Deserialize, Default)]
321struct RawRerankingConfig {
322 enabled: Option<bool>,
323 use_confidence_filtering: Option<bool>,
324 use_cross_encoder: Option<bool>,
325 use_diversity_selection: Option<bool>,
326 final_result_limit: Option<usize>,
327}
328
329#[derive(Debug, serde::Deserialize, Default)]
330struct RawGenerationConfig {
331 enabled: Option<bool>,
332 use_context_assembly: Option<bool>,
333 max_context_length: Option<usize>,
334 use_prompt_templates: Option<bool>,
335 include_citations: Option<bool>,
336 include_confidence_scores: Option<bool>,
337 templates: Option<RawTemplatesConfig>,
338}
339
340#[derive(Debug, serde::Deserialize, Default)]
341struct RawTemplatesConfig {
342 factual: Option<String>,
343 relational: Option<String>,
344 temporal: Option<String>,
345}
346
347#[derive(Debug, serde::Deserialize, Default)]
348struct RawOllamaConfig {
349 enabled: Option<bool>,
350 base_url: Option<String>,
351 model_name: Option<String>,
352 embedding_model: Option<String>,
353 timeout_seconds: Option<u64>,
354 max_retries: Option<u32>,
355 generation: Option<RawOllamaGenerationConfig>,
356}
357
358#[derive(Debug, serde::Deserialize, Default)]
359struct RawOllamaGenerationConfig {
360 temperature: Option<f64>,
361 top_p: Option<f64>,
362 max_tokens: Option<u32>,
363 stream: Option<bool>,
364}
365
366#[derive(Debug, serde::Deserialize, Default)]
367struct RawAsyncProcessingConfig {
368 enabled: Option<bool>,
369 max_concurrent_llm_calls: Option<usize>,
370 max_concurrent_embeddings: Option<usize>,
371 max_concurrent_documents: Option<usize>,
372 llm_rate_limit_per_second: Option<f64>,
373 embedding_rate_limit_per_second: Option<f64>,
374 batching: Option<RawBatchingConfig>,
375}
376
377#[derive(Debug, serde::Deserialize, Default)]
378struct RawBatchingConfig {
379 batch_size: Option<usize>,
380 batch_timeout_seconds: Option<u64>,
381 max_batch_memory_mb: Option<usize>,
382}
383
384#[derive(Debug, serde::Deserialize, Default)]
385struct RawFunctionCallingConfig {
386 enabled: Option<bool>,
387 max_function_calls: Option<usize>,
388 timeout_per_call_seconds: Option<u64>,
389 allow_nested_calls: Option<bool>,
390}
391
392#[derive(Debug, serde::Deserialize, Default)]
393struct RawMonitoringConfig {
394 enabled: Option<bool>,
395 collect_performance_metrics: Option<bool>,
396 collect_usage_statistics: Option<bool>,
397 health_check_interval_seconds: Option<u64>,
398 log_slow_operations: Option<bool>,
399 slow_operation_threshold_ms: Option<u64>,
400 benchmarking: Option<RawBenchmarkingConfig>,
401}
402
403#[derive(Debug, serde::Deserialize, Default)]
404struct RawBenchmarkingConfig {
405 enabled: Option<bool>,
406 run_periodic_benchmarks: Option<bool>,
407 benchmark_interval_hours: Option<u64>,
408 auto_recommendations: Option<bool>,
409}
410
411#[derive(Debug, serde::Deserialize, Default)]
412struct RawStorageConfig {
413 r#type: Option<String>,
414 workspace_isolation: Option<bool>,
415 max_workspaces: Option<usize>,
416 backup_enabled: Option<bool>,
417 backup_interval_hours: Option<u64>,
418 persistent: Option<RawPersistentConfig>,
419}
420
421#[derive(Debug, serde::Deserialize, Default)]
422struct RawPersistentConfig {
423 database_path: Option<String>,
424 enable_wal: Option<bool>,
425 cache_size_mb: Option<usize>,
426}
427
428#[derive(Debug, serde::Deserialize, Default)]
429struct RawParallelProcessingConfig {
430 enabled: Option<bool>,
431 max_threads: Option<usize>,
432 thread_pool_size: Option<usize>,
433 load_balancing: Option<bool>,
434 work_stealing: Option<bool>,
435}
436
437#[derive(Debug, serde::Deserialize, Default)]
438struct RawLoggingConfig {
439 level: Option<String>,
440 format: Option<String>,
441 include_timestamps: Option<bool>,
442 include_module_path: Option<bool>,
443 log_to_file: Option<bool>,
444 log_file: Option<String>,
445 max_log_file_mb: Option<usize>,
446 rotate_logs: Option<bool>,
447}
448
449#[derive(Debug, serde::Deserialize, Default)]
450struct RawExperimentalConfig {
451 neural_reranking: Option<bool>,
452 federated_learning: Option<bool>,
453 real_time_updates: Option<bool>,
454 distributed_processing: Option<bool>,
455}
456
457fn convert_raw_config(raw: RawConfig) -> Config {
459 let mut config = Config::default();
460
461 if let Some(_text_enabled) = raw.features.text_processing {
463 }
465
466 if let Some(_chunk_size) = raw.text_processing.chunk_size {
468 }
470
471 if let Some(min_confidence) = raw.entity_extraction.min_confidence {
473 config.entities.min_confidence = min_confidence;
474 }
475
476 if let Some(extract_rels) = raw.graph_construction.extract_relationships {
478 config.graph.extract_relationships = extract_rels;
479 }
480 if let Some(threshold) = raw.graph_construction.relationship_confidence_threshold {
481 config.graph.relationship_confidence_threshold = threshold as f32;
482 }
483
484 if let Some(enabled) = raw.parallel_processing.enabled {
486 config.parallel.enabled = enabled;
487 }
488 if let Some(max_threads) = raw.parallel_processing.max_threads {
489 config.parallel.num_threads = if max_threads == 0 {
490 #[cfg(feature = "parallel-processing")]
491 {
492 num_cpus::get()
493 }
494 #[cfg(not(feature = "parallel-processing"))]
495 {
496 1
497 }
498 } else {
499 max_threads
500 };
501 }
502
503 config
504}
505
506pub fn save_config(config: &Config, path: &str) -> Result<()> {
508 let format = ConfigFormat::from_extension(path);
509
510 match format {
511 ConfigFormat::Toml => save_toml_config(config, path),
512 ConfigFormat::Json => save_json_config(config, path),
513 ConfigFormat::Yaml => save_yaml_config(config, path),
514 }
515}
516
517#[cfg(feature = "toml-support")]
518fn save_toml_config(_config: &Config, path: &str) -> Result<()> {
519 let content = r#"[text]
520chunk_size = 1000
521chunk_overlap = 200
522
523[entities]
524min_confidence = 0.7
525entity_types = ["PERSON", "ORG", "LOCATION"]
526
527[graph]
528max_connections = 10
529similarity_threshold = 0.8
530
531[parallel]
532enabled = true
533num_threads = 0
534"#;
535 fs::write(path, content)?;
536 Ok(())
537}
538
539#[cfg(not(feature = "toml-support"))]
540fn save_toml_config(_config: &Config, _path: &str) -> Result<()> {
541 Err(GraphRAGError::Config {
542 message: "TOML support not enabled. Enable 'toml-support' feature.".to_string(),
543 })
544}
545
546#[cfg(feature = "serde_json")]
547fn save_json_config(_config: &Config, path: &str) -> Result<()> {
548 let content = r#"{
549 "text": {
550 "chunk_size": 1000,
551 "chunk_overlap": 200
552 },
553 "entities": {
554 "min_confidence": 0.7,
555 "entity_types": ["PERSON", "ORG", "LOCATION"]
556 },
557 "graph": {
558 "max_connections": 10,
559 "similarity_threshold": 0.8
560 },
561 "parallel": {
562 "enabled": true,
563 "num_threads": 0
564 }
565}"#;
566 fs::write(path, content)?;
567 Ok(())
568}
569
570#[cfg(not(feature = "serde_json"))]
571fn save_json_config(_config: &Config, _path: &str) -> Result<()> {
572 Err(GraphRAGError::Config {
573 message: "JSON support not enabled.".to_string(),
574 })
575}
576
577#[cfg(feature = "yaml-support")]
578fn save_yaml_config(_config: &Config, path: &str) -> Result<()> {
579 let content = r#"text:
580 chunk_size: 1000
581 chunk_overlap: 200
582
583entities:
584 min_confidence: 0.7
585 entity_types: ["PERSON", "ORG", "LOCATION"]
586
587graph:
588 max_connections: 10
589 similarity_threshold: 0.8
590
591parallel:
592 enabled: true
593 num_threads: 0
594"#;
595 fs::write(path, content)?;
596 Ok(())
597}
598
599#[cfg(not(feature = "yaml-support"))]
600fn save_yaml_config(_config: &Config, _path: &str) -> Result<()> {
601 Err(GraphRAGError::Config {
602 message: "YAML support not enabled.".to_string(),
603 })
604}
605
606#[cfg(test)]
609mod tests {
610 use super::*;
611
612 #[test]
613 fn test_config_format_detection() {
614 assert!(matches!(
615 ConfigFormat::from_extension("config.toml"),
616 ConfigFormat::Toml
617 ));
618 assert!(matches!(
619 ConfigFormat::from_extension("config.json"),
620 ConfigFormat::Json
621 ));
622 assert!(matches!(
623 ConfigFormat::from_extension("config.yaml"),
624 ConfigFormat::Yaml
625 ));
626 assert!(matches!(
627 ConfigFormat::from_extension("config.yml"),
628 ConfigFormat::Yaml
629 ));
630 assert!(matches!(
631 ConfigFormat::from_extension("config"),
632 ConfigFormat::Toml
633 ));
634 }
635}