1use crate::Result;
2use std::fs;
3
4pub mod enhancements;
6#[cfg(feature = "json5-support")]
8pub mod json5_loader;
9pub mod loader;
11#[cfg(feature = "json5-support")]
13pub mod schema_validator;
14pub mod setconfig;
16pub mod validation;
18
19pub use setconfig::{
20 AlgorithmicEmbeddingsConfig,
21 AlgorithmicEntityConfig,
22 AlgorithmicGraphConfig,
23 AlgorithmicPipelineConfig,
25 AlgorithmicRetrievalConfig,
26 HybridEmbeddingsConfig,
27 HybridEntityConfig,
28 HybridGraphConfig,
29 HybridPipelineConfig,
31 HybridRetrievalConfig,
32 HybridWeightsConfig,
33 ModeConfig,
35 SemanticEmbeddingsConfig,
36 SemanticEntityConfig,
37 SemanticGraphConfig,
38 SemanticPipelineConfig,
40 SemanticRetrievalConfig,
41 SetConfig,
42};
43pub use validation::{validate_config_file, Validatable, ValidationResult};
44
45#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
47pub struct Config {
48 pub output_dir: String,
50
51 pub chunk_size: usize,
53
54 pub chunk_overlap: usize,
56
57 pub max_entities_per_chunk: Option<usize>,
59
60 pub top_k_results: Option<usize>,
62
63 pub similarity_threshold: Option<f32>,
65
66 #[serde(default = "default_approach")]
69 pub approach: String,
70
71 pub embeddings: EmbeddingConfig,
73
74 pub graph: GraphConfig,
76
77 pub text: TextConfig,
79
80 pub entities: EntityConfig,
82
83 pub retrieval: RetrievalConfig,
85
86 pub parallel: ParallelConfig,
88
89 pub ollama: crate::ollama::OllamaConfig,
91
92 pub gliner: GlinerConfig,
94
95 pub enhancements: enhancements::EnhancementsConfig,
97
98 pub auto_save: AutoSaveConfig,
100
101 pub summarization: crate::summarization::HierarchicalConfig,
103
104 pub zero_cost_approach: ZeroCostApproachConfig,
106
107 #[serde(default)]
109 pub advanced_features: AdvancedFeaturesConfig,
110
111 #[serde(default)]
114 pub suppress_progress_bars: bool,
115}
116
117#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
119pub struct GlinerConfig {
120 pub enabled: bool,
122 pub model_path: String,
124 pub tokenizer_path: String,
126 pub mode: String,
128 pub entity_labels: Vec<String>,
130 pub relation_labels: Vec<String>,
132 pub entity_threshold: f32,
134 pub relation_threshold: f32,
136 pub use_gpu: bool,
138}
139
140impl Default for GlinerConfig {
141 fn default() -> Self {
142 Self {
143 enabled: false,
144 model_path: String::new(),
145 tokenizer_path: String::new(),
146 mode: "span".to_string(),
147 entity_labels: vec![
148 "person".into(),
149 "organization".into(),
150 "location".into(),
151 "concept".into(),
152 ],
153 relation_labels: vec!["related to".into(), "part of".into(), "causes".into()],
154 entity_threshold: 0.4,
155 relation_threshold: 0.5,
156 use_gpu: false,
157 }
158 }
159}
160
161#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
163pub struct AutoSaveConfig {
164 #[serde(default)]
168 pub enabled: bool,
169
170 #[serde(default)]
173 pub base_dir: Option<String>,
174
175 #[serde(default = "default_auto_save_interval")]
177 pub interval_seconds: u64,
178
179 #[serde(default)]
181 pub workspace_name: Option<String>,
182
183 #[serde(default = "default_max_versions")]
185 pub max_versions: usize,
186}
187
188#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
190pub struct ZeroCostApproachConfig {
191 #[serde(default = "default_zero_cost_approach")]
193 pub approach: String,
194
195 #[serde(default)]
197 pub lazy_graphrag: LazyGraphRAGConfig,
198
199 #[serde(default)]
201 pub e2_graphrag: E2GraphRAGConfig,
202
203 #[serde(default)]
205 pub pure_algorithmic: PureAlgorithmicConfig,
206
207 #[serde(default)]
209 pub hybrid_strategy: HybridStrategyConfig,
210}
211
212#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, Default)]
215pub struct LazyGraphRAGConfig {
216 pub enabled: bool,
218 pub concept_extraction: ConceptExtractionConfig,
220 pub co_occurrence: CoOccurrenceConfig,
222 pub indexing: LazyIndexingConfig,
224 pub query_expansion: LazyQueryExpansionConfig,
226 pub relevance_scoring: LazyRelevanceScoringConfig,
228}
229
230#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
233pub struct ConceptExtractionConfig {
234 pub min_concept_length: usize,
236 pub max_concept_words: usize,
238 pub use_noun_phrases: bool,
240 pub use_capitalization: bool,
242 pub use_title_case: bool,
244 pub use_tf_idf_scoring: bool,
246 pub min_term_frequency: usize,
248 pub max_concepts_per_chunk: usize,
250 pub min_concept_score: f32,
252 pub exclude_stopwords: bool,
254 pub custom_stopwords: Vec<String>,
256}
257
258#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
261pub struct CoOccurrenceConfig {
262 pub window_size: usize,
264 pub min_co_occurrence: usize,
266 pub jaccard_threshold: f32,
268 pub max_edges_per_node: usize,
270}
271
272#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
275pub struct LazyIndexingConfig {
276 pub use_bidirectional_index: bool,
278 pub enable_hnsw_index: bool,
280 pub cache_size: usize,
282}
283
284#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
287pub struct LazyQueryExpansionConfig {
288 pub enabled: bool,
290 pub max_expansions: usize,
292 pub expansion_model: String,
294 pub expansion_temperature: f32,
296 pub max_tokens_per_expansion: usize,
298}
299
300#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
303pub struct LazyRelevanceScoringConfig {
304 pub enabled: bool,
306 pub scoring_model: String,
308 pub batch_size: usize,
310 pub temperature: f32,
312 pub max_tokens_per_score: usize,
314}
315
316#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, Default)]
319pub struct E2GraphRAGConfig {
320 pub enabled: bool,
322
323 pub ner_extraction: NERExtractionConfig,
325
326 pub keyword_extraction: KeywordExtractionConfig,
328
329 pub graph_construction: E2GraphConstructionConfig,
331
332 pub indexing: E2IndexingConfig,
334}
335
336#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
339pub struct NERExtractionConfig {
340 pub entity_types: Vec<String>,
342
343 pub use_capitalized_patterns: bool,
345
346 pub use_title_case_patterns: bool,
348
349 pub use_quoted_patterns: bool,
351
352 pub use_abbreviations: bool,
354
355 pub use_contextual_disambiguation: bool,
357
358 pub min_context_words: usize,
360
361 pub min_confidence: f32,
363
364 pub use_positional_boost: bool,
366
367 pub use_frequency_boost: bool,
369}
370
371impl Default for NERExtractionConfig {
372 fn default() -> Self {
373 Self {
374 entity_types: vec![
375 "PERSON".to_string(),
376 "ORG".to_string(),
377 "LOCATION".to_string(),
378 ],
379 use_capitalized_patterns: true,
380 use_title_case_patterns: true,
381 use_quoted_patterns: true,
382 use_abbreviations: true,
383 use_contextual_disambiguation: true,
384 min_context_words: 5,
385 min_confidence: 0.7,
386 use_positional_boost: true,
387 use_frequency_boost: true,
388 }
389 }
390}
391
392#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
395pub struct KeywordExtractionConfig {
396 pub algorithms: Vec<String>,
398
399 pub max_keywords_per_chunk: usize,
401
402 pub min_keyword_length: usize,
404
405 pub combine_algorithms: bool,
407}
408
409impl Default for KeywordExtractionConfig {
410 fn default() -> Self {
411 Self {
412 algorithms: vec!["tfidf".to_string(), "yake".to_string()],
413 max_keywords_per_chunk: 10,
414 min_keyword_length: 3,
415 combine_algorithms: true,
416 }
417 }
418}
419
420#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
423pub struct E2GraphConstructionConfig {
424 pub relationship_types: Vec<String>,
426
427 pub min_relationship_score: f32,
429
430 pub max_relationships_per_entity: usize,
432
433 pub use_mutual_information: bool,
435}
436
437impl Default for E2GraphConstructionConfig {
438 fn default() -> Self {
439 Self {
440 relationship_types: vec!["CO_OCCURS_WITH".to_string(), "RELATED_TO".to_string()],
441 min_relationship_score: 0.5,
442 max_relationships_per_entity: 20,
443 use_mutual_information: true,
444 }
445 }
446}
447
448#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
451pub struct E2IndexingConfig {
452 pub batch_size: usize,
454
455 pub enable_parallel_processing: bool,
457
458 pub cache_concept_vectors: bool,
460
461 pub use_hash_embeddings: bool,
463}
464
465impl Default for E2IndexingConfig {
466 fn default() -> Self {
467 Self {
468 batch_size: 32,
469 enable_parallel_processing: true,
470 cache_concept_vectors: true,
471 use_hash_embeddings: false,
472 }
473 }
474}
475
476#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
482pub struct PureAlgorithmicConfig {
483 pub enabled: bool,
485 pub pattern_extraction: PatternExtractionConfig,
487 pub keyword_extraction: PureKeywordExtractionConfig,
489 pub relationship_discovery: RelationshipDiscoveryConfig,
491 pub search_ranking: SearchRankingConfig,
493}
494
495#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
500pub struct PatternExtractionConfig {
501 pub capitalized_patterns: Vec<String>,
503 pub technical_patterns: Vec<String>,
505 pub context_patterns: Vec<String>,
507}
508
509#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
514pub struct PureKeywordExtractionConfig {
515 pub algorithm: String,
517 pub max_keywords: usize,
519 pub min_word_length: usize,
521 pub use_positional_boost: bool,
523 pub use_frequency_filter: bool,
525 pub min_term_frequency: usize,
527 pub max_term_frequency_ratio: f32,
529}
530
531#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
536pub struct RelationshipDiscoveryConfig {
537 pub window_size: usize,
539 pub min_co_occurrence: usize,
541 pub use_mutual_information: bool,
543 pub relationship_types: Vec<String>,
545 pub scoring_method: String,
547 pub min_similarity_score: f32,
549}
550
551#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
556pub struct SearchRankingConfig {
557 pub vector_search: VectorSearchConfig,
559 pub keyword_search: KeywordSearchConfig,
561 pub graph_traversal: GraphTraversalConfig,
563 pub hybrid_fusion: HybridFusionConfig,
565}
566
567#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, Default)]
572pub struct VectorSearchConfig {
573 pub enabled: bool,
575}
576
577#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
582pub struct KeywordSearchConfig {
583 pub enabled: bool,
585 pub algorithm: String,
587 pub k1: f32,
589 pub b: f32,
591}
592
593#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
598pub struct GraphTraversalConfig {
599 pub enabled: bool,
601 pub algorithm: String,
603 pub damping_factor: f32,
605 pub max_iterations: usize,
607 pub personalized: bool,
609}
610
611#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
616pub struct HybridFusionConfig {
617 pub enabled: bool,
619 pub weights: FusionWeights,
621}
622
623#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
628pub struct FusionWeights {
629 pub keywords: f32,
631 pub graph: f32,
633 pub bm25: f32,
635}
636
637#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
642pub struct HybridStrategyConfig {
643 pub lazy_algorithmic: LazyAlgorithmicConfig,
645 pub progressive: ProgressiveConfig,
647 pub budget_aware: BudgetAwareConfig,
649}
650
651#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
656pub struct LazyAlgorithmicConfig {
657 pub indexing_approach: String,
659 pub query_approach: String,
661 pub cost_optimization: String,
663}
664
665#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
670pub struct ProgressiveConfig {
671 pub level_0: String,
673 pub level_1: String,
675 pub level_2: String,
677 pub level_3: String,
679 pub level_4_plus: String,
681}
682
683#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
688pub struct BudgetAwareConfig {
689 pub daily_budget_usd: f64,
691 pub queries_per_day: usize,
693 pub max_llm_cost_per_query: f64,
695 pub strategy: String,
697 pub fallback_to_algorithmic: bool,
699}
700
701fn default_zero_cost_approach() -> String {
703 "pure_algorithmic".to_string()
704}
705
706impl Default for ZeroCostApproachConfig {
707 fn default() -> Self {
708 Self {
709 approach: default_zero_cost_approach(),
710 lazy_graphrag: LazyGraphRAGConfig::default(),
711 e2_graphrag: E2GraphRAGConfig::default(),
712 pure_algorithmic: PureAlgorithmicConfig::default(),
713 hybrid_strategy: HybridStrategyConfig::default(),
714 }
715 }
716}
717
718impl Default for ConceptExtractionConfig {
720 fn default() -> Self {
721 Self {
722 min_concept_length: 3,
723 max_concept_words: 5,
724 use_noun_phrases: true,
725 use_capitalization: true,
726 use_title_case: true,
727 use_tf_idf_scoring: true,
728 min_term_frequency: 2,
729 max_concepts_per_chunk: 10,
730 min_concept_score: 0.1,
731 exclude_stopwords: true,
732 custom_stopwords: vec!["the".to_string(), "and".to_string(), "or".to_string()],
733 }
734 }
735}
736impl Default for CoOccurrenceConfig {
737 fn default() -> Self {
738 Self {
739 window_size: 50,
740 min_co_occurrence: 2,
741 jaccard_threshold: 0.2,
742 max_edges_per_node: 25,
743 }
744 }
745}
746impl Default for LazyIndexingConfig {
747 fn default() -> Self {
748 Self {
749 use_bidirectional_index: true,
750 enable_hnsw_index: false,
751 cache_size: 10000,
752 }
753 }
754}
755impl Default for LazyQueryExpansionConfig {
756 fn default() -> Self {
757 Self {
758 enabled: true,
759 max_expansions: 3,
760 expansion_model: "llama3.1:8b".to_string(),
761 expansion_temperature: 0.1,
762 max_tokens_per_expansion: 50,
763 }
764 }
765}
766impl Default for LazyRelevanceScoringConfig {
767 fn default() -> Self {
768 Self {
769 enabled: true,
770 scoring_model: "llama3.1:8b".to_string(),
771 batch_size: 10,
772 temperature: 0.2,
773 max_tokens_per_score: 30,
774 }
775 }
776}
777impl Default for PureAlgorithmicConfig {
778 fn default() -> Self {
779 Self {
780 enabled: true,
781 pattern_extraction: Default::default(),
782 keyword_extraction: Default::default(),
783 relationship_discovery: Default::default(),
784 search_ranking: Default::default(),
785 }
786 }
787}
788impl Default for PatternExtractionConfig {
789 fn default() -> Self {
790 Self {
791 capitalized_patterns: vec![r"[A-Z][a-z]+".to_string()],
792 technical_patterns: vec![r"[a-z]+-[a-z]+".to_string()],
793 context_patterns: vec![r"\b(the|this)\s+(\w+)".to_string()],
794 }
795 }
796}
797impl Default for PureKeywordExtractionConfig {
798 fn default() -> Self {
799 Self {
800 algorithm: "tf_idf".to_string(),
801 max_keywords: 20,
802 min_word_length: 4,
803 use_positional_boost: true,
804 use_frequency_filter: true,
805 min_term_frequency: 2,
806 max_term_frequency_ratio: 0.8,
807 }
808 }
809}
810impl Default for RelationshipDiscoveryConfig {
811 fn default() -> Self {
812 Self {
813 window_size: 30,
814 min_co_occurrence: 2,
815 use_mutual_information: true,
816 relationship_types: vec!["co_occurs_with".to_string()],
817 scoring_method: "jaccard_similarity".to_string(),
818 min_similarity_score: 0.1,
819 }
820 }
821}
822impl Default for SearchRankingConfig {
823 fn default() -> Self {
824 Self {
825 vector_search: VectorSearchConfig { enabled: false },
826 keyword_search: KeywordSearchConfig {
827 enabled: true,
828 algorithm: "bm25".to_string(),
829 k1: 1.2,
830 b: 0.75,
831 },
832 graph_traversal: GraphTraversalConfig {
833 enabled: true,
834 algorithm: "pagerank".to_string(),
835 damping_factor: 0.85,
836 max_iterations: 20,
837 personalized: true,
838 },
839 hybrid_fusion: HybridFusionConfig {
840 enabled: true,
841 weights: FusionWeights {
842 keywords: 0.4,
843 graph: 0.4,
844 bm25: 0.2,
845 },
846 },
847 }
848 }
849}
850impl Default for HybridStrategyConfig {
851 fn default() -> Self {
852 Self {
853 lazy_algorithmic: LazyAlgorithmicConfig {
854 indexing_approach: "e2_graphrag".to_string(),
855 query_approach: "lazy_graphrag".to_string(),
856 cost_optimization: "indexing".to_string(),
857 },
858 progressive: ProgressiveConfig {
859 level_0: "pure_algorithmic".to_string(),
860 level_1: "pure_algorithmic".to_string(),
861 level_2: "e2_graphrag".to_string(),
862 level_3: "lazy_graphrag".to_string(),
863 level_4_plus: "lazy_graphrag".to_string(),
864 },
865 budget_aware: BudgetAwareConfig {
866 daily_budget_usd: 1.0,
867 queries_per_day: 1000,
868 max_llm_cost_per_query: 0.002,
869 strategy: "lazy_graphrag".to_string(),
870 fallback_to_algorithmic: true,
871 },
872 }
873 }
874}
875impl Default for KeywordSearchConfig {
876 fn default() -> Self {
877 Self {
878 enabled: true,
879 algorithm: "bm25".to_string(),
880 k1: 1.2,
881 b: 0.75,
882 }
883 }
884}
885impl Default for GraphTraversalConfig {
886 fn default() -> Self {
887 Self {
888 enabled: true,
889 algorithm: "pagerank".to_string(),
890 damping_factor: 0.85,
891 max_iterations: 20,
892 personalized: true,
893 }
894 }
895}
896impl Default for HybridFusionConfig {
897 fn default() -> Self {
898 Self {
899 enabled: true,
900 weights: FusionWeights {
901 keywords: 0.4,
902 graph: 0.4,
903 bm25: 0.2,
904 },
905 }
906 }
907}
908impl Default for FusionWeights {
909 fn default() -> Self {
910 Self {
911 keywords: 0.4,
912 graph: 0.4,
913 bm25: 0.2,
914 }
915 }
916}
917impl Default for LazyAlgorithmicConfig {
918 fn default() -> Self {
919 Self {
920 indexing_approach: "e2_graphrag".to_string(),
921 query_approach: "lazy_graphrag".to_string(),
922 cost_optimization: "indexing".to_string(),
923 }
924 }
925}
926impl Default for ProgressiveConfig {
927 fn default() -> Self {
928 Self {
929 level_0: "pure_algorithmic".to_string(),
930 level_1: "pure_algorithmic".to_string(),
931 level_2: "e2_graphrag".to_string(),
932 level_3: "lazy_graphrag".to_string(),
933 level_4_plus: "lazy_graphrag".to_string(),
934 }
935 }
936}
937impl Default for BudgetAwareConfig {
938 fn default() -> Self {
939 Self {
940 daily_budget_usd: 1.0,
941 queries_per_day: 1000,
942 max_llm_cost_per_query: 0.002,
943 strategy: "lazy_graphrag".to_string(),
944 fallback_to_algorithmic: true,
945 }
946 }
947}
948
949#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
951pub struct EmbeddingConfig {
952 pub dimension: usize,
954
955 pub backend: String,
957
958 #[serde(default)]
968 pub model: Option<String>,
969
970 pub fallback_to_hash: bool,
972
973 pub api_endpoint: Option<String>,
975
976 pub api_key: Option<String>,
979
980 #[serde(default)]
982 pub cache_dir: Option<String>,
983
984 #[serde(default = "default_batch_size")]
986 pub batch_size: usize,
987}
988
989fn default_batch_size() -> usize {
990 32
991}
992
993#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
995pub struct GraphConfig {
996 pub max_connections: usize,
998
999 pub similarity_threshold: f32,
1001
1002 #[serde(default = "default_true")]
1004 pub extract_relationships: bool,
1005
1006 #[serde(default = "default_relationship_confidence")]
1008 pub relationship_confidence_threshold: f32,
1009
1010 #[serde(default)]
1012 pub traversal: TraversalConfigParams,
1013}
1014
1015#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1017pub struct TraversalConfigParams {
1018 #[serde(default = "default_max_traversal_depth")]
1020 pub max_depth: usize,
1021
1022 #[serde(default = "default_max_paths")]
1024 pub max_paths: usize,
1025
1026 #[serde(default = "default_true")]
1028 pub use_edge_weights: bool,
1029
1030 #[serde(default = "default_min_relationship_strength")]
1032 pub min_relationship_strength: f32,
1033}
1034
1035impl Default for TraversalConfigParams {
1036 fn default() -> Self {
1037 Self {
1038 max_depth: default_max_traversal_depth(),
1039 max_paths: default_max_paths(),
1040 use_edge_weights: true,
1041 min_relationship_strength: default_min_relationship_strength(),
1042 }
1043 }
1044}
1045
1046#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1048pub struct TextConfig {
1049 pub chunk_size: usize,
1051
1052 pub chunk_overlap: usize,
1054
1055 pub languages: Vec<String>,
1057}
1058
1059#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1061pub struct EntityConfig {
1062 pub min_confidence: f32,
1064
1065 pub entity_types: Vec<String>,
1067
1068 #[serde(default)]
1070 pub use_gleaning: bool,
1071
1072 #[serde(default = "default_max_gleaning_rounds")]
1074 pub max_gleaning_rounds: usize,
1075
1076 #[serde(default)]
1079 pub enable_triple_reflection: bool,
1080
1081 #[serde(default = "default_validation_confidence")]
1084 pub validation_min_confidence: f32,
1085
1086 #[serde(default)]
1089 pub use_atomic_facts: bool,
1090
1091 #[serde(default = "default_max_fact_tokens")]
1094 pub max_fact_tokens: usize,
1095}
1096
1097#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1099pub struct AdvancedFeaturesConfig {
1100 #[serde(default)]
1103 pub symbolic_anchoring: SymbolicAnchoringConfig,
1104
1105 #[serde(default)]
1108 pub dynamic_weighting: DynamicWeightingConfig,
1109
1110 #[serde(default)]
1113 pub causal_analysis: CausalAnalysisConfig,
1114
1115 #[serde(default)]
1118 pub hierarchical_clustering: HierarchicalClusteringConfig,
1119
1120 #[serde(default)]
1123 pub weight_optimization: WeightOptimizationConfig,
1124}
1125
1126#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1128pub struct SymbolicAnchoringConfig {
1129 #[serde(default = "default_anchor_min_relevance")]
1131 pub min_relevance: f32,
1132
1133 #[serde(default = "default_max_anchors")]
1135 pub max_anchors: usize,
1136
1137 #[serde(default = "default_max_entities_per_anchor")]
1139 pub max_entities_per_anchor: usize,
1140}
1141
1142#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1144pub struct DynamicWeightingConfig {
1145 #[serde(default = "default_true")]
1147 pub enable_semantic_boost: bool,
1148
1149 #[serde(default = "default_true")]
1151 pub enable_temporal_boost: bool,
1152
1153 #[serde(default = "default_true")]
1155 pub enable_concept_boost: bool,
1156
1157 #[serde(default = "default_true")]
1159 pub enable_causal_boost: bool,
1160}
1161
1162#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1164pub struct CausalAnalysisConfig {
1165 #[serde(default = "default_causal_min_confidence")]
1167 pub min_confidence: f32,
1168
1169 #[serde(default = "default_causal_min_strength")]
1171 pub min_causal_strength: f32,
1172
1173 #[serde(default = "default_max_chain_depth")]
1175 pub max_chain_depth: usize,
1176
1177 #[serde(default = "default_true")]
1179 pub require_temporal_consistency: bool,
1180}
1181
1182#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1184pub struct HierarchicalClusteringConfig {
1185 #[serde(default = "default_num_levels")]
1187 pub num_levels: usize,
1188
1189 #[serde(default = "default_resolutions")]
1192 pub resolutions: Vec<f32>,
1193
1194 #[serde(default = "default_min_cluster_size")]
1196 pub min_cluster_size: usize,
1197
1198 #[serde(default = "default_true")]
1200 pub generate_summaries: bool,
1201}
1202
1203#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1205pub struct WeightOptimizationConfig {
1206 #[serde(default = "default_learning_rate")]
1208 pub learning_rate: f32,
1209
1210 #[serde(default = "default_max_iterations")]
1212 pub max_iterations: usize,
1213
1214 #[serde(default = "default_slope_window")]
1216 pub slope_window: usize,
1217
1218 #[serde(default = "default_stagnation_threshold")]
1220 pub stagnation_threshold: f32,
1221
1222 #[serde(default = "default_true")]
1224 pub use_llm_eval: bool,
1225
1226 #[serde(default)]
1228 pub objective_weights: ObjectiveWeightsConfig,
1229}
1230
1231#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1233pub struct ObjectiveWeightsConfig {
1234 #[serde(default = "default_relevance_weight")]
1236 pub relevance: f32,
1237
1238 #[serde(default = "default_faithfulness_weight")]
1240 pub faithfulness: f32,
1241
1242 #[serde(default = "default_conciseness_weight")]
1244 pub conciseness: f32,
1245}
1246
1247#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1249pub struct RetrievalConfig {
1250 pub top_k: usize,
1252
1253 pub search_algorithm: String,
1255}
1256
1257#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1259pub struct ParallelConfig {
1260 pub num_threads: usize,
1262
1263 pub enabled: bool,
1265
1266 pub min_batch_size: usize,
1268
1269 pub chunk_batch_size: usize,
1271
1272 pub parallel_embeddings: bool,
1274
1275 pub parallel_graph_ops: bool,
1277
1278 pub parallel_vector_ops: bool,
1280}
1281
1282fn default_embedding_dim() -> usize {
1284 384
1285}
1286fn default_embedding_backend() -> String {
1287 "hash".to_string()
1288}
1289fn default_max_connections() -> usize {
1290 10
1291}
1292fn default_similarity_threshold() -> f32 {
1293 0.8
1294}
1295fn default_chunk_size() -> usize {
1296 1000
1297}
1298fn default_chunk_overlap() -> usize {
1299 200
1300}
1301fn default_languages() -> Vec<String> {
1302 vec!["en".to_string()]
1303}
1304fn default_min_confidence() -> f32 {
1305 0.7
1306}
1307fn default_entity_types() -> Vec<String> {
1308 vec![
1309 "PERSON".to_string(),
1310 "ORG".to_string(),
1311 "LOCATION".to_string(),
1312 ]
1313}
1314fn default_top_k() -> usize {
1315 10
1316}
1317fn default_search_algorithm() -> String {
1318 "cosine".to_string()
1319}
1320fn default_num_threads() -> usize {
1321 0
1322} fn default_min_batch_size() -> usize {
1324 10
1325}
1326fn default_chunk_batch_size() -> usize {
1327 100
1328}
1329fn default_true() -> bool {
1330 true
1331}
1332fn default_relationship_confidence() -> f32 {
1333 0.5
1334}
1335fn default_max_gleaning_rounds() -> usize {
1336 3
1337}
1338
1339fn default_validation_confidence() -> f32 {
1340 0.7
1341}
1342
1343fn default_anchor_min_relevance() -> f32 {
1347 0.3
1348}
1349
1350fn default_max_anchors() -> usize {
1351 5
1352}
1353
1354fn default_max_entities_per_anchor() -> usize {
1355 10
1356}
1357
1358fn default_causal_min_confidence() -> f32 {
1360 0.3
1361}
1362
1363fn default_causal_min_strength() -> f32 {
1364 0.5
1365}
1366
1367fn default_max_chain_depth() -> usize {
1368 5
1369}
1370
1371fn default_num_levels() -> usize {
1373 3
1374}
1375
1376fn default_resolutions() -> Vec<f32> {
1377 vec![1.0, 0.5, 0.2]
1378}
1379
1380fn default_min_cluster_size() -> usize {
1381 2
1382}
1383
1384fn default_learning_rate() -> f32 {
1386 0.1
1387}
1388
1389fn default_max_iterations() -> usize {
1390 20
1391}
1392
1393fn default_slope_window() -> usize {
1394 3
1395}
1396
1397fn default_stagnation_threshold() -> f32 {
1398 0.01
1399}
1400
1401fn default_relevance_weight() -> f32 {
1402 0.4
1403}
1404
1405fn default_faithfulness_weight() -> f32 {
1406 0.4
1407}
1408
1409fn default_conciseness_weight() -> f32 {
1410 0.2
1411}
1412
1413fn default_max_fact_tokens() -> usize {
1414 400
1415}
1416
1417fn default_approach() -> String {
1418 "semantic".to_string()
1419}
1420fn default_max_traversal_depth() -> usize {
1421 3
1422}
1423fn default_max_paths() -> usize {
1424 10
1425}
1426fn default_min_relationship_strength() -> f32 {
1427 0.3
1428}
1429fn default_auto_save_interval() -> u64 {
1430 300 }
1432fn default_max_versions() -> usize {
1433 5 }
1435
1436impl Default for Config {
1437 fn default() -> Self {
1438 Self {
1439 output_dir: "./output".to_string(),
1440 chunk_size: default_chunk_size(),
1441 chunk_overlap: default_chunk_overlap(),
1442 max_entities_per_chunk: Some(10),
1443 top_k_results: Some(default_top_k()),
1444 similarity_threshold: Some(default_similarity_threshold()),
1445 approach: default_approach(),
1446 embeddings: EmbeddingConfig {
1447 dimension: default_embedding_dim(),
1448 backend: default_embedding_backend(),
1449 model: Some("sentence-transformers/all-MiniLM-L6-v2".to_string()),
1450 fallback_to_hash: true,
1451 api_endpoint: None,
1452 api_key: None,
1453 cache_dir: None,
1454 batch_size: default_batch_size(),
1455 },
1456 graph: GraphConfig {
1457 max_connections: default_max_connections(),
1458 similarity_threshold: default_similarity_threshold(),
1459 extract_relationships: default_true(),
1460 relationship_confidence_threshold: default_relationship_confidence(),
1461 traversal: TraversalConfigParams::default(),
1462 },
1463 text: TextConfig {
1464 chunk_size: default_chunk_size(),
1465 chunk_overlap: default_chunk_overlap(),
1466 languages: default_languages(),
1467 },
1468 entities: EntityConfig {
1469 min_confidence: default_min_confidence(),
1470 entity_types: default_entity_types(),
1471 use_gleaning: false,
1472 max_gleaning_rounds: default_max_gleaning_rounds(),
1473 enable_triple_reflection: false,
1474 validation_min_confidence: default_validation_confidence(),
1475 use_atomic_facts: false,
1476 max_fact_tokens: default_max_fact_tokens(),
1477 },
1478 retrieval: RetrievalConfig {
1479 top_k: default_top_k(),
1480 search_algorithm: default_search_algorithm(),
1481 },
1482 parallel: ParallelConfig {
1483 num_threads: default_num_threads(),
1484 enabled: true,
1485 min_batch_size: default_min_batch_size(),
1486 chunk_batch_size: default_chunk_batch_size(),
1487 parallel_embeddings: true,
1488 parallel_graph_ops: true,
1489 parallel_vector_ops: true,
1490 },
1491 ollama: crate::ollama::OllamaConfig::default(),
1492 gliner: GlinerConfig::default(),
1493 enhancements: enhancements::EnhancementsConfig::default(),
1494 auto_save: AutoSaveConfig {
1495 enabled: false,
1496 base_dir: None,
1497 interval_seconds: default_auto_save_interval(),
1498 workspace_name: None,
1499 max_versions: default_max_versions(),
1500 },
1501 summarization: crate::summarization::HierarchicalConfig::default(),
1502 zero_cost_approach: ZeroCostApproachConfig::default(),
1503 advanced_features: AdvancedFeaturesConfig::default(),
1504 suppress_progress_bars: false,
1505 }
1506 }
1507}
1508
1509impl Default for AutoSaveConfig {
1510 fn default() -> Self {
1511 Self {
1512 enabled: false,
1513 base_dir: None,
1514 interval_seconds: default_auto_save_interval(),
1515 workspace_name: None,
1516 max_versions: default_max_versions(),
1517 }
1518 }
1519}
1520
1521impl Default for AdvancedFeaturesConfig {
1522 fn default() -> Self {
1523 Self {
1524 symbolic_anchoring: SymbolicAnchoringConfig::default(),
1525 dynamic_weighting: DynamicWeightingConfig::default(),
1526 causal_analysis: CausalAnalysisConfig::default(),
1527 hierarchical_clustering: HierarchicalClusteringConfig::default(),
1528 weight_optimization: WeightOptimizationConfig::default(),
1529 }
1530 }
1531}
1532
1533impl Default for SymbolicAnchoringConfig {
1534 fn default() -> Self {
1535 Self {
1536 min_relevance: default_anchor_min_relevance(),
1537 max_anchors: default_max_anchors(),
1538 max_entities_per_anchor: default_max_entities_per_anchor(),
1539 }
1540 }
1541}
1542
1543impl Default for DynamicWeightingConfig {
1544 fn default() -> Self {
1545 Self {
1546 enable_semantic_boost: default_true(),
1547 enable_temporal_boost: default_true(),
1548 enable_concept_boost: default_true(),
1549 enable_causal_boost: default_true(),
1550 }
1551 }
1552}
1553
1554impl Default for CausalAnalysisConfig {
1555 fn default() -> Self {
1556 Self {
1557 min_confidence: default_causal_min_confidence(),
1558 min_causal_strength: default_causal_min_strength(),
1559 max_chain_depth: default_max_chain_depth(),
1560 require_temporal_consistency: default_true(),
1561 }
1562 }
1563}
1564
1565impl Default for HierarchicalClusteringConfig {
1566 fn default() -> Self {
1567 Self {
1568 num_levels: default_num_levels(),
1569 resolutions: default_resolutions(),
1570 min_cluster_size: default_min_cluster_size(),
1571 generate_summaries: default_true(),
1572 }
1573 }
1574}
1575
1576impl Default for WeightOptimizationConfig {
1577 fn default() -> Self {
1578 Self {
1579 learning_rate: default_learning_rate(),
1580 max_iterations: default_max_iterations(),
1581 slope_window: default_slope_window(),
1582 stagnation_threshold: default_stagnation_threshold(),
1583 use_llm_eval: default_true(),
1584 objective_weights: ObjectiveWeightsConfig::default(),
1585 }
1586 }
1587}
1588
1589impl Default for ObjectiveWeightsConfig {
1590 fn default() -> Self {
1591 Self {
1592 relevance: default_relevance_weight(),
1593 faithfulness: default_faithfulness_weight(),
1594 conciseness: default_conciseness_weight(),
1595 }
1596 }
1597}
1598
1599impl Config {
1600 #[cfg(feature = "hierarchical-config")]
1617 pub fn load() -> Result<Self> {
1618 use figment::{
1619 providers::{Env, Format, Serialized, Toml},
1620 Figment,
1621 };
1622
1623 let mut figment = Figment::new()
1625 .merge(Serialized::defaults(Config::default()));
1627
1628 if let Some(home) = dirs::home_dir() {
1630 let user_config = home.join(".graphrag").join("config.toml");
1631 if user_config.exists() {
1632 figment = figment.merge(Toml::file(user_config));
1633 }
1634 }
1635
1636 let project_config = std::path::Path::new("graphrag.toml");
1638 if project_config.exists() {
1639 figment = figment.merge(Toml::file(project_config));
1640 }
1641
1642 figment = figment.merge(Env::prefixed("GRAPHRAG_").split("_"));
1645
1646 figment
1647 .extract()
1648 .map_err(|e| crate::core::GraphRAGError::Config {
1649 message: format!("Failed to load hierarchical configuration: {}", e),
1650 })
1651 }
1652
1653 #[cfg(not(feature = "hierarchical-config"))]
1657 pub fn load() -> Result<Self> {
1658 Ok(Config::default())
1659 }
1660
1661 pub fn from_toml_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
1674 let content = fs::read_to_string(path.as_ref())?;
1675 let config: Config =
1676 toml::from_str(&content).map_err(|e| crate::core::GraphRAGError::Config {
1677 message: format!("Failed to parse TOML config: {}", e),
1678 })?;
1679 Ok(config)
1680 }
1681
1682 pub fn from_file(path: &str) -> Result<Self> {
1684 let content = fs::read_to_string(path)?;
1685 let parsed = json::parse(&content)?;
1686
1687 let config = Config {
1688 output_dir: parsed["output_dir"]
1689 .as_str()
1690 .unwrap_or("./output")
1691 .to_string(),
1692 suppress_progress_bars: parsed["suppress_progress_bars"].as_bool().unwrap_or(false),
1693 chunk_size: parsed["chunk_size"]
1694 .as_usize()
1695 .unwrap_or(default_chunk_size()),
1696 chunk_overlap: parsed["chunk_overlap"]
1697 .as_usize()
1698 .unwrap_or(default_chunk_overlap()),
1699 max_entities_per_chunk: parsed["max_entities_per_chunk"].as_usize(),
1700 top_k_results: parsed["top_k_results"].as_usize(),
1701 similarity_threshold: parsed["similarity_threshold"].as_f32(),
1702 approach: parsed["approach"]
1703 .as_str()
1704 .unwrap_or(&default_approach())
1705 .to_string(),
1706 embeddings: EmbeddingConfig {
1707 dimension: parsed["embeddings"]["dimension"]
1708 .as_usize()
1709 .unwrap_or(default_embedding_dim()),
1710 backend: parsed["embeddings"]["backend"]
1711 .as_str()
1712 .unwrap_or(&default_embedding_backend())
1713 .to_string(),
1714 model: parsed["embeddings"]["model"]
1715 .as_str()
1716 .map(|s| s.to_string()),
1717 fallback_to_hash: parsed["embeddings"]["fallback_to_hash"]
1718 .as_bool()
1719 .unwrap_or(true),
1720 api_endpoint: parsed["embeddings"]["api_endpoint"]
1721 .as_str()
1722 .map(|s| s.to_string()),
1723 api_key: parsed["embeddings"]["api_key"]
1724 .as_str()
1725 .map(|s| s.to_string()),
1726 cache_dir: parsed["embeddings"]["cache_dir"]
1727 .as_str()
1728 .map(|s| s.to_string()),
1729 batch_size: parsed["embeddings"]["batch_size"]
1730 .as_usize()
1731 .unwrap_or(default_batch_size()),
1732 },
1733 graph: GraphConfig {
1734 max_connections: parsed["graph"]["max_connections"]
1735 .as_usize()
1736 .unwrap_or(default_max_connections()),
1737 similarity_threshold: parsed["graph"]["similarity_threshold"]
1738 .as_f32()
1739 .unwrap_or(default_similarity_threshold()),
1740 extract_relationships: parsed["graph"]["extract_relationships"]
1741 .as_bool()
1742 .unwrap_or(default_true()),
1743 relationship_confidence_threshold: parsed["graph"]
1744 ["relationship_confidence_threshold"]
1745 .as_f32()
1746 .unwrap_or(default_relationship_confidence()),
1747 traversal: TraversalConfigParams {
1748 max_depth: parsed["graph"]["traversal"]["max_depth"]
1749 .as_usize()
1750 .unwrap_or(default_max_traversal_depth()),
1751 max_paths: parsed["graph"]["traversal"]["max_paths"]
1752 .as_usize()
1753 .unwrap_or(default_max_paths()),
1754 use_edge_weights: parsed["graph"]["traversal"]["use_edge_weights"]
1755 .as_bool()
1756 .unwrap_or(default_true()),
1757 min_relationship_strength: parsed["graph"]["traversal"]
1758 ["min_relationship_strength"]
1759 .as_f32()
1760 .unwrap_or(default_min_relationship_strength()),
1761 },
1762 },
1763 text: TextConfig {
1764 chunk_size: parsed["text"]["chunk_size"]
1765 .as_usize()
1766 .unwrap_or(default_chunk_size()),
1767 chunk_overlap: parsed["text"]["chunk_overlap"]
1768 .as_usize()
1769 .unwrap_or(default_chunk_overlap()),
1770 languages: if parsed["text"]["languages"].is_array() {
1771 parsed["text"]["languages"]
1772 .members()
1773 .map(|v| v.as_str().unwrap_or("en").to_string())
1774 .collect()
1775 } else {
1776 default_languages()
1777 },
1778 },
1779 entities: EntityConfig {
1780 min_confidence: parsed["entities"]["min_confidence"]
1781 .as_f32()
1782 .unwrap_or(default_min_confidence()),
1783 entity_types: if parsed["entities"]["entity_types"].is_array() {
1784 parsed["entities"]["entity_types"]
1785 .members()
1786 .map(|v| v.as_str().unwrap_or("PERSON").to_string())
1787 .collect()
1788 } else {
1789 default_entity_types()
1790 },
1791 use_gleaning: parsed["entities"]["use_gleaning"]
1792 .as_bool()
1793 .unwrap_or(false),
1794 max_gleaning_rounds: parsed["entities"]["max_gleaning_rounds"]
1795 .as_usize()
1796 .unwrap_or(default_max_gleaning_rounds()),
1797 enable_triple_reflection: parsed["entities"]["enable_triple_reflection"]
1798 .as_bool()
1799 .unwrap_or(false),
1800 validation_min_confidence: parsed["entities"]["validation_min_confidence"]
1801 .as_f32()
1802 .unwrap_or(default_validation_confidence()),
1803 use_atomic_facts: parsed["entities"]["use_atomic_facts"]
1804 .as_bool()
1805 .unwrap_or(false),
1806 max_fact_tokens: parsed["entities"]["max_fact_tokens"]
1807 .as_usize()
1808 .unwrap_or(default_max_fact_tokens()),
1809 },
1810 retrieval: RetrievalConfig {
1811 top_k: parsed["retrieval"]["top_k"]
1812 .as_usize()
1813 .unwrap_or(default_top_k()),
1814 search_algorithm: parsed["retrieval"]["search_algorithm"]
1815 .as_str()
1816 .unwrap_or(&default_search_algorithm())
1817 .to_string(),
1818 },
1819 parallel: ParallelConfig {
1820 num_threads: parsed["parallel"]["num_threads"]
1821 .as_usize()
1822 .unwrap_or(default_num_threads()),
1823 enabled: parsed["parallel"]["enabled"].as_bool().unwrap_or(true),
1824 min_batch_size: parsed["parallel"]["min_batch_size"]
1825 .as_usize()
1826 .unwrap_or(default_min_batch_size()),
1827 chunk_batch_size: parsed["parallel"]["chunk_batch_size"]
1828 .as_usize()
1829 .unwrap_or(default_chunk_batch_size()),
1830 parallel_embeddings: parsed["parallel"]["parallel_embeddings"]
1831 .as_bool()
1832 .unwrap_or(true),
1833 parallel_graph_ops: parsed["parallel"]["parallel_graph_ops"]
1834 .as_bool()
1835 .unwrap_or(true),
1836 parallel_vector_ops: parsed["parallel"]["parallel_vector_ops"]
1837 .as_bool()
1838 .unwrap_or(true),
1839 },
1840 ollama: crate::ollama::OllamaConfig {
1841 enabled: parsed["ollama"]["enabled"].as_bool().unwrap_or(false),
1842 host: parsed["ollama"]["host"]
1843 .as_str()
1844 .unwrap_or("http://localhost")
1845 .to_string(),
1846 port: parsed["ollama"]["port"].as_u16().unwrap_or(11434),
1847 embedding_model: parsed["ollama"]["embedding_model"]
1848 .as_str()
1849 .unwrap_or("nomic-embed-text")
1850 .to_string(),
1851 chat_model: parsed["ollama"]["chat_model"]
1852 .as_str()
1853 .unwrap_or("llama3.2:3b")
1854 .to_string(),
1855 timeout_seconds: parsed["ollama"]["timeout_seconds"].as_u64().unwrap_or(30),
1856 max_retries: parsed["ollama"]["max_retries"].as_u32().unwrap_or(3),
1857 fallback_to_hash: parsed["ollama"]["fallback_to_hash"]
1858 .as_bool()
1859 .unwrap_or(true),
1860 max_tokens: parsed["ollama"]["max_tokens"].as_u32(),
1861 temperature: parsed["ollama"]["temperature"].as_f32(),
1862 enable_caching: parsed["ollama"]["enable_caching"].as_bool().unwrap_or(true),
1863 keep_alive: parsed["ollama"]["keep_alive"]
1864 .as_str()
1865 .map(|s| s.to_string()),
1866 num_ctx: parsed["ollama"]["num_ctx"].as_u32(),
1867 },
1868 gliner: GlinerConfig {
1869 enabled: parsed["gliner"]["enabled"].as_bool().unwrap_or(false),
1870 model_path: parsed["gliner"]["model_path"]
1871 .as_str()
1872 .unwrap_or("")
1873 .to_string(),
1874 tokenizer_path: parsed["gliner"]["tokenizer_path"]
1875 .as_str()
1876 .unwrap_or("")
1877 .to_string(),
1878 mode: parsed["gliner"]["mode"]
1879 .as_str()
1880 .unwrap_or("span")
1881 .to_string(),
1882 entity_labels: if parsed["gliner"]["entity_labels"].is_array() {
1883 parsed["gliner"]["entity_labels"]
1884 .members()
1885 .filter_map(|v| v.as_str().map(|s| s.to_string()))
1886 .collect()
1887 } else {
1888 vec!["person".into(), "organization".into(), "location".into()]
1889 },
1890 relation_labels: if parsed["gliner"]["relation_labels"].is_array() {
1891 parsed["gliner"]["relation_labels"]
1892 .members()
1893 .filter_map(|v| v.as_str().map(|s| s.to_string()))
1894 .collect()
1895 } else {
1896 vec!["related to".into(), "part of".into()]
1897 },
1898 entity_threshold: parsed["gliner"]["entity_threshold"].as_f32().unwrap_or(0.4),
1899 relation_threshold: parsed["gliner"]["relation_threshold"]
1900 .as_f32()
1901 .unwrap_or(0.5),
1902 use_gpu: parsed["gliner"]["use_gpu"].as_bool().unwrap_or(false),
1903 },
1904 enhancements: enhancements::EnhancementsConfig {
1905 enabled: parsed["enhancements"]["enabled"].as_bool().unwrap_or(true),
1906 query_analysis: enhancements::QueryAnalysisConfig {
1907 enabled: parsed["enhancements"]["query_analysis"]["enabled"]
1908 .as_bool()
1909 .unwrap_or(true),
1910 min_confidence: parsed["enhancements"]["query_analysis"]["min_confidence"]
1911 .as_f32()
1912 .unwrap_or(0.6),
1913 enable_strategy_suggestion: parsed["enhancements"]["query_analysis"]
1914 ["enable_strategy_suggestion"]
1915 .as_bool()
1916 .unwrap_or(true),
1917 enable_keyword_analysis: parsed["enhancements"]["query_analysis"]
1918 ["enable_keyword_analysis"]
1919 .as_bool()
1920 .unwrap_or(true),
1921 enable_complexity_scoring: parsed["enhancements"]["query_analysis"]
1922 ["enable_complexity_scoring"]
1923 .as_bool()
1924 .unwrap_or(true),
1925 },
1926 adaptive_retrieval: enhancements::AdaptiveRetrievalConfig {
1927 enabled: parsed["enhancements"]["adaptive_retrieval"]["enabled"]
1928 .as_bool()
1929 .unwrap_or(true),
1930 use_query_analysis: parsed["enhancements"]["adaptive_retrieval"]
1931 ["use_query_analysis"]
1932 .as_bool()
1933 .unwrap_or(true),
1934 enable_cross_strategy_fusion: parsed["enhancements"]["adaptive_retrieval"]
1935 ["enable_cross_strategy_fusion"]
1936 .as_bool()
1937 .unwrap_or(true),
1938 diversity_threshold: parsed["enhancements"]["adaptive_retrieval"]
1939 ["diversity_threshold"]
1940 .as_f32()
1941 .unwrap_or(0.8),
1942 enable_diversity_selection: parsed["enhancements"]["adaptive_retrieval"]
1943 ["enable_diversity_selection"]
1944 .as_bool()
1945 .unwrap_or(true),
1946 enable_confidence_weighting: parsed["enhancements"]["adaptive_retrieval"]
1947 ["enable_confidence_weighting"]
1948 .as_bool()
1949 .unwrap_or(true),
1950 },
1951 performance_benchmarking: enhancements::BenchmarkingConfig {
1952 enabled: parsed["enhancements"]["performance_benchmarking"]["enabled"]
1953 .as_bool()
1954 .unwrap_or(false),
1955 auto_recommendations: parsed["enhancements"]["performance_benchmarking"]
1956 ["auto_recommendations"]
1957 .as_bool()
1958 .unwrap_or(true),
1959 comprehensive_testing: parsed["enhancements"]["performance_benchmarking"]
1960 ["comprehensive_testing"]
1961 .as_bool()
1962 .unwrap_or(false),
1963 iterations: parsed["enhancements"]["performance_benchmarking"]["iterations"]
1964 .as_usize()
1965 .unwrap_or(3),
1966 include_parallel: parsed["enhancements"]["performance_benchmarking"]
1967 ["include_parallel"]
1968 .as_bool()
1969 .unwrap_or(true),
1970 enable_memory_profiling: parsed["enhancements"]["performance_benchmarking"]
1971 ["enable_memory_profiling"]
1972 .as_bool()
1973 .unwrap_or(false),
1974 },
1975 enhanced_function_registry: enhancements::FunctionRegistryConfig {
1976 enabled: parsed["enhancements"]["enhanced_function_registry"]["enabled"]
1977 .as_bool()
1978 .unwrap_or(true),
1979 categorization: parsed["enhancements"]["enhanced_function_registry"]
1980 ["categorization"]
1981 .as_bool()
1982 .unwrap_or(true),
1983 usage_statistics: parsed["enhancements"]["enhanced_function_registry"]
1984 ["usage_statistics"]
1985 .as_bool()
1986 .unwrap_or(true),
1987 dynamic_registration: parsed["enhancements"]["enhanced_function_registry"]
1988 ["dynamic_registration"]
1989 .as_bool()
1990 .unwrap_or(true),
1991 performance_monitoring: parsed["enhancements"]["enhanced_function_registry"]
1992 ["performance_monitoring"]
1993 .as_bool()
1994 .unwrap_or(false),
1995 recommendation_system: parsed["enhancements"]["enhanced_function_registry"]
1996 ["recommendation_system"]
1997 .as_bool()
1998 .unwrap_or(true),
1999 },
2000 #[cfg(feature = "lightrag")]
2001 lightrag: enhancements::LightRAGConfig {
2002 enabled: parsed["enhancements"]["lightrag"]["enabled"]
2003 .as_bool()
2004 .unwrap_or(true),
2005 max_keywords: parsed["enhancements"]["lightrag"]["max_keywords"]
2006 .as_usize()
2007 .unwrap_or(20),
2008 high_level_weight: parsed["enhancements"]["lightrag"]["high_level_weight"]
2009 .as_f32()
2010 .unwrap_or(0.6),
2011 low_level_weight: parsed["enhancements"]["lightrag"]["low_level_weight"]
2012 .as_f32()
2013 .unwrap_or(0.4),
2014 merge_strategy: parsed["enhancements"]["lightrag"]["merge_strategy"]
2015 .as_str()
2016 .unwrap_or("weighted")
2017 .to_string(),
2018 language: parsed["enhancements"]["lightrag"]["language"]
2019 .as_str()
2020 .unwrap_or("English")
2021 .to_string(),
2022 enable_cache: parsed["enhancements"]["lightrag"]["enable_cache"]
2023 .as_bool()
2024 .unwrap_or(true),
2025 },
2026 #[cfg(feature = "leiden")]
2027 leiden: enhancements::LeidenCommunitiesConfig {
2028 enabled: parsed["enhancements"]["leiden"]["enabled"]
2029 .as_bool()
2030 .unwrap_or(true),
2031 max_cluster_size: parsed["enhancements"]["leiden"]["max_cluster_size"]
2032 .as_usize()
2033 .unwrap_or(10),
2034 use_lcc: parsed["enhancements"]["leiden"]["use_lcc"]
2035 .as_bool()
2036 .unwrap_or(true),
2037 seed: parsed["enhancements"]["leiden"]["seed"].as_u64(),
2038 resolution: parsed["enhancements"]["leiden"]["resolution"]
2039 .as_f32()
2040 .unwrap_or(1.0),
2041 max_levels: parsed["enhancements"]["leiden"]["max_levels"]
2042 .as_usize()
2043 .unwrap_or(5),
2044 min_improvement: parsed["enhancements"]["leiden"]["min_improvement"]
2045 .as_f32()
2046 .unwrap_or(0.001),
2047 enable_hierarchical: parsed["enhancements"]["leiden"]["enable_hierarchical"]
2048 .as_bool()
2049 .unwrap_or(true),
2050 generate_summaries: parsed["enhancements"]["leiden"]["generate_summaries"]
2051 .as_bool()
2052 .unwrap_or(true),
2053 max_summary_length: parsed["enhancements"]["leiden"]["max_summary_length"]
2054 .as_usize()
2055 .unwrap_or(5),
2056 use_extractive_summary: parsed["enhancements"]["leiden"]
2057 ["use_extractive_summary"]
2058 .as_bool()
2059 .unwrap_or(true),
2060 adaptive_routing: enhancements::AdaptiveRoutingConfig {
2061 enabled: parsed["enhancements"]["leiden"]["adaptive_routing"]["enabled"]
2062 .as_bool()
2063 .unwrap_or(true),
2064 default_level: parsed["enhancements"]["leiden"]["adaptive_routing"]
2065 ["default_level"]
2066 .as_usize()
2067 .unwrap_or(1),
2068 keyword_weight: parsed["enhancements"]["leiden"]["adaptive_routing"]
2069 ["keyword_weight"]
2070 .as_f32()
2071 .unwrap_or(0.5),
2072 length_weight: parsed["enhancements"]["leiden"]["adaptive_routing"]
2073 ["length_weight"]
2074 .as_f32()
2075 .unwrap_or(0.3),
2076 entity_weight: parsed["enhancements"]["leiden"]["adaptive_routing"]
2077 ["entity_weight"]
2078 .as_f32()
2079 .unwrap_or(0.2),
2080 },
2081 },
2082 #[cfg(feature = "cross-encoder")]
2083 cross_encoder: enhancements::CrossEncoderConfig {
2084 enabled: parsed["enhancements"]["cross_encoder"]["enabled"]
2085 .as_bool()
2086 .unwrap_or(true),
2087 model_name: parsed["enhancements"]["cross_encoder"]["model_name"]
2088 .as_str()
2089 .unwrap_or("cross-encoder/ms-marco-MiniLM-L-6-v2")
2090 .to_string(),
2091 max_length: parsed["enhancements"]["cross_encoder"]["max_length"]
2092 .as_usize()
2093 .unwrap_or(512),
2094 batch_size: parsed["enhancements"]["cross_encoder"]["batch_size"]
2095 .as_usize()
2096 .unwrap_or(32),
2097 top_k: parsed["enhancements"]["cross_encoder"]["top_k"]
2098 .as_usize()
2099 .unwrap_or(10),
2100 min_confidence: parsed["enhancements"]["cross_encoder"]["min_confidence"]
2101 .as_f32()
2102 .unwrap_or(0.0),
2103 normalize_scores: parsed["enhancements"]["cross_encoder"]["normalize_scores"]
2104 .as_bool()
2105 .unwrap_or(true),
2106 },
2107 #[cfg(feature = "lazygraphrag")]
2108 concept_selection: enhancements::ConceptSelectionConfig {
2109 enabled: parsed["enhancements"]["concept_selection"]["enabled"]
2110 .as_bool()
2111 .unwrap_or(true),
2112 top_k: parsed["enhancements"]["concept_selection"]["top_k"]
2113 .as_usize()
2114 .unwrap_or(20),
2115 min_score: parsed["enhancements"]["concept_selection"]["min_score"]
2116 .as_f32()
2117 .unwrap_or(0.1),
2118 degree_weight: parsed["enhancements"]["concept_selection"]["degree_weight"]
2119 .as_f32()
2120 .unwrap_or(0.4),
2121 pagerank_weight: parsed["enhancements"]["concept_selection"]["pagerank_weight"]
2122 .as_f32()
2123 .unwrap_or(0.4),
2124 idf_weight: parsed["enhancements"]["concept_selection"]["idf_weight"]
2125 .as_f32()
2126 .unwrap_or(0.2),
2127 use_semantic_matching: parsed["enhancements"]["concept_selection"]
2128 ["use_semantic_matching"]
2129 .as_bool()
2130 .unwrap_or(true),
2131 max_query_concepts: parsed["enhancements"]["concept_selection"]
2132 ["max_query_concepts"]
2133 .as_usize()
2134 .unwrap_or(10),
2135 },
2136 },
2137 auto_save: AutoSaveConfig {
2138 enabled: parsed["auto_save"]["enabled"].as_bool().unwrap_or(false),
2139 base_dir: parsed["auto_save"]["base_dir"]
2140 .as_str()
2141 .map(|s| s.to_string()),
2142 interval_seconds: parsed["auto_save"]["interval_seconds"]
2143 .as_u64()
2144 .unwrap_or(default_auto_save_interval()),
2145 workspace_name: parsed["auto_save"]["workspace_name"]
2146 .as_str()
2147 .map(|s| s.to_string()),
2148 max_versions: parsed["auto_save"]["max_versions"]
2149 .as_usize()
2150 .unwrap_or(default_max_versions()),
2151 },
2152 summarization: if parsed["summarization"].is_object() {
2153 crate::summarization::HierarchicalConfig {
2154 merge_size: parsed["summarization"]["merge_size"]
2155 .as_usize()
2156 .unwrap_or(3),
2157 max_summary_length: parsed["summarization"]["max_summary_length"]
2158 .as_usize()
2159 .unwrap_or(250),
2160 min_node_size: parsed["summarization"]["min_node_size"]
2161 .as_usize()
2162 .unwrap_or(50),
2163 overlap_sentences: parsed["summarization"]["overlap_sentences"]
2164 .as_usize()
2165 .unwrap_or(2),
2166 llm_config: if parsed["summarization"]["llm_config"].is_object() {
2167 crate::summarization::LLMConfig {
2168 enabled: parsed["summarization"]["llm_config"]["enabled"]
2169 .as_bool()
2170 .unwrap_or(false),
2171 model_name: parsed["summarization"]["llm_config"]["model_name"]
2172 .as_str()
2173 .unwrap_or("llama3.1:8b")
2174 .to_string(),
2175 temperature: parsed["summarization"]["llm_config"]["temperature"]
2176 .as_f32()
2177 .unwrap_or(0.3),
2178 max_tokens: parsed["summarization"]["llm_config"]["max_tokens"]
2179 .as_usize()
2180 .unwrap_or(180),
2181 strategy: match parsed["summarization"]["llm_config"]["strategy"]
2182 .as_str()
2183 .unwrap_or("progressive")
2184 {
2185 "uniform" => crate::summarization::LLMStrategy::Uniform,
2186 "adaptive" => crate::summarization::LLMStrategy::Adaptive,
2187 "progressive" => crate::summarization::LLMStrategy::Progressive,
2188 _ => crate::summarization::LLMStrategy::Progressive,
2189 },
2190 level_configs: std::collections::HashMap::new(), }
2192 } else {
2193 crate::summarization::LLMConfig::default()
2194 },
2195 }
2196 } else {
2197 crate::summarization::HierarchicalConfig::default()
2198 },
2199 zero_cost_approach: if parsed["zero_cost_approach"].is_object() {
2200 ZeroCostApproachConfig {
2201 approach: parsed["zero_cost_approach"]["approach"]
2202 .as_str()
2203 .unwrap_or("pure_algorithmic")
2204 .to_string(),
2205 lazy_graphrag: if parsed["zero_cost_approach"]["lazy_graphrag"].is_object() {
2206 LazyGraphRAGConfig {
2207 enabled: parsed["zero_cost_approach"]["lazy_graphrag"]["enabled"]
2208 .as_bool()
2209 .unwrap_or(false),
2210 concept_extraction: ConceptExtractionConfig::default(),
2211 co_occurrence: CoOccurrenceConfig::default(),
2212 indexing: LazyIndexingConfig::default(),
2213 query_expansion: LazyQueryExpansionConfig::default(),
2214 relevance_scoring: LazyRelevanceScoringConfig::default(),
2215 }
2216 } else {
2217 LazyGraphRAGConfig::default()
2218 },
2219 e2_graphrag: E2GraphRAGConfig::default(),
2220 pure_algorithmic: PureAlgorithmicConfig::default(),
2221 hybrid_strategy: HybridStrategyConfig::default(),
2222 }
2223 } else {
2224 ZeroCostApproachConfig::default()
2225 },
2226 advanced_features: AdvancedFeaturesConfig::default(),
2227 };
2228
2229 Ok(config)
2230 }
2231
2232 pub fn to_file(&self, path: &str) -> Result<()> {
2234 let mut config_json = json::JsonValue::new_object();
2235
2236 let mut embeddings = json::JsonValue::new_object();
2238 embeddings["dimension"] = json::JsonValue::from(self.embeddings.dimension);
2239 if let Some(endpoint) = &self.embeddings.api_endpoint {
2240 embeddings["api_endpoint"] = json::JsonValue::from(endpoint.as_str());
2241 }
2242 if let Some(key) = &self.embeddings.api_key {
2243 embeddings["api_key"] = json::JsonValue::from(key.as_str());
2244 }
2245 config_json["embeddings"] = embeddings;
2246
2247 let mut graph = json::JsonValue::new_object();
2249 graph["max_connections"] = json::JsonValue::from(self.graph.max_connections);
2250 graph["similarity_threshold"] = json::JsonValue::from(self.graph.similarity_threshold);
2251 graph["extract_relationships"] = json::JsonValue::from(self.graph.extract_relationships);
2252 graph["relationship_confidence_threshold"] =
2253 json::JsonValue::from(self.graph.relationship_confidence_threshold);
2254
2255 let mut traversal = json::JsonValue::new_object();
2256 traversal["max_depth"] = json::JsonValue::from(self.graph.traversal.max_depth);
2257 traversal["max_paths"] = json::JsonValue::from(self.graph.traversal.max_paths);
2258 traversal["use_edge_weights"] =
2259 json::JsonValue::from(self.graph.traversal.use_edge_weights);
2260 traversal["min_relationship_strength"] =
2261 json::JsonValue::from(self.graph.traversal.min_relationship_strength);
2262 graph["traversal"] = traversal;
2263
2264 config_json["graph"] = graph;
2265
2266 let mut text = json::JsonValue::new_object();
2268 text["chunk_size"] = json::JsonValue::from(self.text.chunk_size);
2269 text["chunk_overlap"] = json::JsonValue::from(self.text.chunk_overlap);
2270 let languages_array: Vec<json::JsonValue> = self
2271 .text
2272 .languages
2273 .iter()
2274 .map(|s| json::JsonValue::from(s.as_str()))
2275 .collect();
2276 text["languages"] = json::JsonValue::from(languages_array);
2277 config_json["text"] = text;
2278
2279 let mut entities = json::JsonValue::new_object();
2281 entities["min_confidence"] = json::JsonValue::from(self.entities.min_confidence);
2282 let entity_types_array: Vec<json::JsonValue> = self
2283 .entities
2284 .entity_types
2285 .iter()
2286 .map(|s| json::JsonValue::from(s.as_str()))
2287 .collect();
2288 entities["entity_types"] = json::JsonValue::from(entity_types_array);
2289 entities["use_gleaning"] = json::JsonValue::from(self.entities.use_gleaning);
2290 entities["max_gleaning_rounds"] = json::JsonValue::from(self.entities.max_gleaning_rounds);
2291 config_json["entities"] = entities;
2292
2293 let mut retrieval = json::JsonValue::new_object();
2295 retrieval["top_k"] = json::JsonValue::from(self.retrieval.top_k);
2296 retrieval["search_algorithm"] =
2297 json::JsonValue::from(self.retrieval.search_algorithm.as_str());
2298 config_json["retrieval"] = retrieval;
2299
2300 let mut parallel = json::JsonValue::new_object();
2302 parallel["num_threads"] = json::JsonValue::from(self.parallel.num_threads);
2303 parallel["enabled"] = json::JsonValue::from(self.parallel.enabled);
2304 parallel["min_batch_size"] = json::JsonValue::from(self.parallel.min_batch_size);
2305 parallel["chunk_batch_size"] = json::JsonValue::from(self.parallel.chunk_batch_size);
2306 parallel["parallel_embeddings"] = json::JsonValue::from(self.parallel.parallel_embeddings);
2307 parallel["parallel_graph_ops"] = json::JsonValue::from(self.parallel.parallel_graph_ops);
2308 parallel["parallel_vector_ops"] = json::JsonValue::from(self.parallel.parallel_vector_ops);
2309 config_json["parallel"] = parallel;
2310
2311 let mut enhancements = json::JsonValue::new_object();
2313 enhancements["enabled"] = json::JsonValue::from(self.enhancements.enabled);
2314
2315 let mut query_analysis = json::JsonValue::new_object();
2316 query_analysis["enabled"] = json::JsonValue::from(self.enhancements.query_analysis.enabled);
2317 query_analysis["min_confidence"] =
2318 json::JsonValue::from(self.enhancements.query_analysis.min_confidence);
2319 query_analysis["enable_strategy_suggestion"] =
2320 json::JsonValue::from(self.enhancements.query_analysis.enable_strategy_suggestion);
2321 query_analysis["enable_keyword_analysis"] =
2322 json::JsonValue::from(self.enhancements.query_analysis.enable_keyword_analysis);
2323 query_analysis["enable_complexity_scoring"] =
2324 json::JsonValue::from(self.enhancements.query_analysis.enable_complexity_scoring);
2325 enhancements["query_analysis"] = query_analysis;
2326
2327 let mut adaptive_retrieval = json::JsonValue::new_object();
2328 adaptive_retrieval["enabled"] =
2329 json::JsonValue::from(self.enhancements.adaptive_retrieval.enabled);
2330 adaptive_retrieval["use_query_analysis"] =
2331 json::JsonValue::from(self.enhancements.adaptive_retrieval.use_query_analysis);
2332 adaptive_retrieval["enable_cross_strategy_fusion"] = json::JsonValue::from(
2333 self.enhancements
2334 .adaptive_retrieval
2335 .enable_cross_strategy_fusion,
2336 );
2337 adaptive_retrieval["diversity_threshold"] =
2338 json::JsonValue::from(self.enhancements.adaptive_retrieval.diversity_threshold);
2339 adaptive_retrieval["enable_diversity_selection"] = json::JsonValue::from(
2340 self.enhancements
2341 .adaptive_retrieval
2342 .enable_diversity_selection,
2343 );
2344 adaptive_retrieval["enable_confidence_weighting"] = json::JsonValue::from(
2345 self.enhancements
2346 .adaptive_retrieval
2347 .enable_confidence_weighting,
2348 );
2349 enhancements["adaptive_retrieval"] = adaptive_retrieval;
2350
2351 let mut performance_benchmarking = json::JsonValue::new_object();
2352 performance_benchmarking["enabled"] =
2353 json::JsonValue::from(self.enhancements.performance_benchmarking.enabled);
2354 performance_benchmarking["auto_recommendations"] = json::JsonValue::from(
2355 self.enhancements
2356 .performance_benchmarking
2357 .auto_recommendations,
2358 );
2359 performance_benchmarking["comprehensive_testing"] = json::JsonValue::from(
2360 self.enhancements
2361 .performance_benchmarking
2362 .comprehensive_testing,
2363 );
2364 performance_benchmarking["iterations"] =
2365 json::JsonValue::from(self.enhancements.performance_benchmarking.iterations);
2366 performance_benchmarking["include_parallel"] =
2367 json::JsonValue::from(self.enhancements.performance_benchmarking.include_parallel);
2368 performance_benchmarking["enable_memory_profiling"] = json::JsonValue::from(
2369 self.enhancements
2370 .performance_benchmarking
2371 .enable_memory_profiling,
2372 );
2373 enhancements["performance_benchmarking"] = performance_benchmarking;
2374
2375 let mut enhanced_function_registry = json::JsonValue::new_object();
2376 enhanced_function_registry["enabled"] =
2377 json::JsonValue::from(self.enhancements.enhanced_function_registry.enabled);
2378 enhanced_function_registry["categorization"] =
2379 json::JsonValue::from(self.enhancements.enhanced_function_registry.categorization);
2380 enhanced_function_registry["usage_statistics"] = json::JsonValue::from(
2381 self.enhancements
2382 .enhanced_function_registry
2383 .usage_statistics,
2384 );
2385 enhanced_function_registry["dynamic_registration"] = json::JsonValue::from(
2386 self.enhancements
2387 .enhanced_function_registry
2388 .dynamic_registration,
2389 );
2390 enhanced_function_registry["performance_monitoring"] = json::JsonValue::from(
2391 self.enhancements
2392 .enhanced_function_registry
2393 .performance_monitoring,
2394 );
2395 enhanced_function_registry["recommendation_system"] = json::JsonValue::from(
2396 self.enhancements
2397 .enhanced_function_registry
2398 .recommendation_system,
2399 );
2400 enhancements["enhanced_function_registry"] = enhanced_function_registry;
2401
2402 config_json["enhancements"] = enhancements;
2403
2404 let mut summarization = json::JsonValue::new_object();
2406 summarization["merge_size"] = json::JsonValue::from(self.summarization.merge_size);
2407 summarization["max_summary_length"] =
2408 json::JsonValue::from(self.summarization.max_summary_length);
2409 summarization["min_node_size"] = json::JsonValue::from(self.summarization.min_node_size);
2410 summarization["overlap_sentences"] =
2411 json::JsonValue::from(self.summarization.overlap_sentences);
2412
2413 let mut llm_config = json::JsonValue::new_object();
2414 llm_config["enabled"] = json::JsonValue::from(self.summarization.llm_config.enabled);
2415 llm_config["model_name"] =
2416 json::JsonValue::from(self.summarization.llm_config.model_name.as_str());
2417 llm_config["temperature"] =
2418 json::JsonValue::from(self.summarization.llm_config.temperature);
2419 llm_config["max_tokens"] = json::JsonValue::from(self.summarization.llm_config.max_tokens);
2420 let strategy_str = match self.summarization.llm_config.strategy {
2421 crate::summarization::LLMStrategy::Uniform => "uniform",
2422 crate::summarization::LLMStrategy::Adaptive => "adaptive",
2423 crate::summarization::LLMStrategy::Progressive => "progressive",
2424 };
2425 llm_config["strategy"] = json::JsonValue::from(strategy_str);
2426
2427 summarization["llm_config"] = llm_config;
2428 config_json["summarization"] = summarization;
2429
2430 let content = json::stringify_pretty(config_json, 2);
2431 fs::write(path, content)?;
2432 Ok(())
2433 }
2434}