1use crate::Result;
2use std::fs;
3
4pub mod enhancements;
6pub mod loader;
8pub mod setconfig;
10pub mod validation;
12#[cfg(feature = "json5-support")]
14pub mod json5_loader;
15#[cfg(feature = "json5-support")]
17pub mod schema_validator;
18
19pub use setconfig::{
20 SetConfig,
21 ModeConfig,
23 SemanticPipelineConfig,
25 SemanticEmbeddingsConfig,
26 SemanticEntityConfig,
27 SemanticRetrievalConfig,
28 SemanticGraphConfig,
29 AlgorithmicPipelineConfig,
31 AlgorithmicEmbeddingsConfig,
32 AlgorithmicEntityConfig,
33 AlgorithmicRetrievalConfig,
34 AlgorithmicGraphConfig,
35 HybridPipelineConfig,
37 HybridWeightsConfig,
38 HybridEmbeddingsConfig,
39 HybridEntityConfig,
40 HybridRetrievalConfig,
41 HybridGraphConfig,
42};
43pub use validation::{Validatable, ValidationResult, validate_config_file};
44
45#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
47pub struct Config {
48 pub output_dir: String,
50
51 pub chunk_size: usize,
53
54 pub chunk_overlap: usize,
56
57 pub max_entities_per_chunk: Option<usize>,
59
60 pub top_k_results: Option<usize>,
62
63 pub similarity_threshold: Option<f32>,
65
66 #[serde(default = "default_approach")]
69 pub approach: String,
70
71 pub embeddings: EmbeddingConfig,
73
74 pub graph: GraphConfig,
76
77 pub text: TextConfig,
79
80 pub entities: EntityConfig,
82
83 pub retrieval: RetrievalConfig,
85
86 pub parallel: ParallelConfig,
88
89 pub ollama: crate::ollama::OllamaConfig,
91
92 pub enhancements: enhancements::EnhancementsConfig,
94
95 pub auto_save: AutoSaveConfig,
97
98 pub summarization: crate::summarization::HierarchicalConfig,
100
101 pub zero_cost_approach: ZeroCostApproachConfig,
103}
104
105#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
107pub struct AutoSaveConfig {
108 #[serde(default)]
110 pub enabled: bool,
111
112 #[serde(default = "default_auto_save_interval")]
114 pub interval_seconds: u64,
115
116 #[serde(default)]
118 pub workspace_name: Option<String>,
119
120 #[serde(default = "default_max_versions")]
122 pub max_versions: usize,
123}
124
125#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
127pub struct ZeroCostApproachConfig {
128 #[serde(default = "default_zero_cost_approach")]
130 pub approach: String,
131
132 #[serde(default)]
134 pub lazy_graphrag: LazyGraphRAGConfig,
135
136 #[serde(default)]
138 pub e2_graphrag: E2GraphRAGConfig,
139
140 #[serde(default)]
142 pub pure_algorithmic: PureAlgorithmicConfig,
143
144 #[serde(default)]
146 pub hybrid_strategy: HybridStrategyConfig,
147}
148
149#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
152pub struct LazyGraphRAGConfig {
153 pub enabled: bool,
155 pub concept_extraction: ConceptExtractionConfig,
157 pub co_occurrence: CoOccurrenceConfig,
159 pub indexing: LazyIndexingConfig,
161 pub query_expansion: LazyQueryExpansionConfig,
163 pub relevance_scoring: LazyRelevanceScoringConfig,
165}
166
167#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
170pub struct ConceptExtractionConfig {
171 pub min_concept_length: usize,
173 pub max_concept_words: usize,
175 pub use_noun_phrases: bool,
177 pub use_capitalization: bool,
179 pub use_title_case: bool,
181 pub use_tf_idf_scoring: bool,
183 pub min_term_frequency: usize,
185 pub max_concepts_per_chunk: usize,
187 pub min_concept_score: f32,
189 pub exclude_stopwords: bool,
191 pub custom_stopwords: Vec<String>,
193}
194
195#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
198pub struct CoOccurrenceConfig {
199 pub window_size: usize,
201 pub min_co_occurrence: usize,
203 pub jaccard_threshold: f32,
205 pub max_edges_per_node: usize,
207}
208
209#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
212pub struct LazyIndexingConfig {
213 pub use_bidirectional_index: bool,
215 pub enable_hnsw_index: bool,
217 pub cache_size: usize,
219}
220
221#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
224pub struct LazyQueryExpansionConfig {
225 pub enabled: bool,
227 pub max_expansions: usize,
229 pub expansion_model: String,
231 pub expansion_temperature: f32,
233 pub max_tokens_per_expansion: usize,
235}
236
237#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
240pub struct LazyRelevanceScoringConfig {
241 pub enabled: bool,
243 pub scoring_model: String,
245 pub batch_size: usize,
247 pub temperature: f32,
249 pub max_tokens_per_score: usize,
251}
252
253#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
256pub struct E2GraphRAGConfig {
257 pub enabled: bool,
259
260 pub ner_extraction: NERExtractionConfig,
262
263 pub keyword_extraction: KeywordExtractionConfig,
265
266 pub graph_construction: E2GraphConstructionConfig,
268
269 pub indexing: E2IndexingConfig,
271}
272
273#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
276pub struct NERExtractionConfig {
277 pub entity_types: Vec<String>,
279
280 pub use_capitalized_patterns: bool,
282
283 pub use_title_case_patterns: bool,
285
286 pub use_quoted_patterns: bool,
288
289 pub use_abbreviations: bool,
291
292 pub use_contextual_disambiguation: bool,
294
295 pub min_context_words: usize,
297
298 pub min_confidence: f32,
300
301 pub use_positional_boost: bool,
303
304 pub use_frequency_boost: bool,
306}
307
308impl Default for NERExtractionConfig {
309 fn default() -> Self {
310 Self {
311 entity_types: vec!["PERSON".to_string(), "ORG".to_string(), "LOCATION".to_string()],
312 use_capitalized_patterns: true,
313 use_title_case_patterns: true,
314 use_quoted_patterns: true,
315 use_abbreviations: true,
316 use_contextual_disambiguation: true,
317 min_context_words: 5,
318 min_confidence: 0.7,
319 use_positional_boost: true,
320 use_frequency_boost: true,
321 }
322 }
323}
324
325#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
328pub struct KeywordExtractionConfig {
329 pub algorithms: Vec<String>,
331
332 pub max_keywords_per_chunk: usize,
334
335 pub min_keyword_length: usize,
337
338 pub combine_algorithms: bool,
340}
341
342impl Default for KeywordExtractionConfig {
343 fn default() -> Self {
344 Self {
345 algorithms: vec!["tfidf".to_string(), "yake".to_string()],
346 max_keywords_per_chunk: 10,
347 min_keyword_length: 3,
348 combine_algorithms: true,
349 }
350 }
351}
352
353#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
356pub struct E2GraphConstructionConfig {
357 pub relationship_types: Vec<String>,
359
360 pub min_relationship_score: f32,
362
363 pub max_relationships_per_entity: usize,
365
366 pub use_mutual_information: bool,
368}
369
370impl Default for E2GraphConstructionConfig {
371 fn default() -> Self {
372 Self {
373 relationship_types: vec!["CO_OCCURS_WITH".to_string(), "RELATED_TO".to_string()],
374 min_relationship_score: 0.5,
375 max_relationships_per_entity: 20,
376 use_mutual_information: true,
377 }
378 }
379}
380
381#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
384pub struct E2IndexingConfig {
385 pub batch_size: usize,
387
388 pub enable_parallel_processing: bool,
390
391 pub cache_concept_vectors: bool,
393
394 pub use_hash_embeddings: bool,
396}
397
398impl Default for E2IndexingConfig {
399 fn default() -> Self {
400 Self {
401 batch_size: 32,
402 enable_parallel_processing: true,
403 cache_concept_vectors: true,
404 use_hash_embeddings: false,
405 }
406 }
407}
408
409#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
415pub struct PureAlgorithmicConfig {
416 pub enabled: bool,
418 pub pattern_extraction: PatternExtractionConfig,
420 pub keyword_extraction: PureKeywordExtractionConfig,
422 pub relationship_discovery: RelationshipDiscoveryConfig,
424 pub search_ranking: SearchRankingConfig,
426}
427
428#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
433pub struct PatternExtractionConfig {
434 pub capitalized_patterns: Vec<String>,
436 pub technical_patterns: Vec<String>,
438 pub context_patterns: Vec<String>,
440}
441
442#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
447pub struct PureKeywordExtractionConfig {
448 pub algorithm: String,
450 pub max_keywords: usize,
452 pub min_word_length: usize,
454 pub use_positional_boost: bool,
456 pub use_frequency_filter: bool,
458 pub min_term_frequency: usize,
460 pub max_term_frequency_ratio: f32,
462}
463
464#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
469pub struct RelationshipDiscoveryConfig {
470 pub window_size: usize,
472 pub min_co_occurrence: usize,
474 pub use_mutual_information: bool,
476 pub relationship_types: Vec<String>,
478 pub scoring_method: String,
480 pub min_similarity_score: f32,
482}
483
484#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
489pub struct SearchRankingConfig {
490 pub vector_search: VectorSearchConfig,
492 pub keyword_search: KeywordSearchConfig,
494 pub graph_traversal: GraphTraversalConfig,
496 pub hybrid_fusion: HybridFusionConfig,
498}
499
500#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
505pub struct VectorSearchConfig {
506 pub enabled: bool,
508}
509
510#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
515pub struct KeywordSearchConfig {
516 pub enabled: bool,
518 pub algorithm: String,
520 pub k1: f32,
522 pub b: f32,
524}
525
526#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
531pub struct GraphTraversalConfig {
532 pub enabled: bool,
534 pub algorithm: String,
536 pub damping_factor: f32,
538 pub max_iterations: usize,
540 pub personalized: bool,
542}
543
544#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
549pub struct HybridFusionConfig {
550 pub enabled: bool,
552 pub weights: FusionWeights,
554}
555
556#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
561pub struct FusionWeights {
562 pub keywords: f32,
564 pub graph: f32,
566 pub bm25: f32,
568}
569
570#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
575pub struct HybridStrategyConfig {
576 pub lazy_algorithmic: LazyAlgorithmicConfig,
578 pub progressive: ProgressiveConfig,
580 pub budget_aware: BudgetAwareConfig,
582}
583
584#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
589pub struct LazyAlgorithmicConfig {
590 pub indexing_approach: String,
592 pub query_approach: String,
594 pub cost_optimization: String,
596}
597
598#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
603pub struct ProgressiveConfig {
604 pub level_0: String,
606 pub level_1: String,
608 pub level_2: String,
610 pub level_3: String,
612 pub level_4_plus: String,
614}
615
616#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
621pub struct BudgetAwareConfig {
622 pub daily_budget_usd: f64,
624 pub queries_per_day: usize,
626 pub max_llm_cost_per_query: f64,
628 pub strategy: String,
630 pub fallback_to_algorithmic: bool,
632}
633
634fn default_zero_cost_approach() -> String {
636 "pure_algorithmic".to_string()
637}
638
639impl Default for ZeroCostApproachConfig {
640 fn default() -> Self {
641 Self {
642 approach: default_zero_cost_approach(),
643 lazy_graphrag: LazyGraphRAGConfig::default(),
644 e2_graphrag: E2GraphRAGConfig::default(),
645 pure_algorithmic: PureAlgorithmicConfig::default(),
646 hybrid_strategy: HybridStrategyConfig::default(),
647 }
648 }
649}
650
651impl Default for LazyGraphRAGConfig { fn default() -> Self { Self { enabled: false, concept_extraction: Default::default(), co_occurrence: Default::default(), indexing: Default::default(), query_expansion: Default::default(), relevance_scoring: Default::default() } } }
653impl Default for ConceptExtractionConfig { fn default() -> Self { Self { min_concept_length: 3, max_concept_words: 5, use_noun_phrases: true, use_capitalization: true, use_title_case: true, use_tf_idf_scoring: true, min_term_frequency: 2, max_concepts_per_chunk: 10, min_concept_score: 0.1, exclude_stopwords: true, custom_stopwords: vec!["the".to_string(), "and".to_string(), "or".to_string()] } } }
654impl Default for CoOccurrenceConfig { fn default() -> Self { Self { window_size: 50, min_co_occurrence: 2, jaccard_threshold: 0.2, max_edges_per_node: 25 } } }
655impl Default for LazyIndexingConfig { fn default() -> Self { Self { use_bidirectional_index: true, enable_hnsw_index: false, cache_size: 10000 } } }
656impl Default for LazyQueryExpansionConfig { fn default() -> Self { Self { enabled: true, max_expansions: 3, expansion_model: "llama3.1:8b".to_string(), expansion_temperature: 0.1, max_tokens_per_expansion: 50 } } }
657impl Default for LazyRelevanceScoringConfig { fn default() -> Self { Self { enabled: true, scoring_model: "llama3.1:8b".to_string(), batch_size: 10, temperature: 0.2, max_tokens_per_score: 30 } } }
658impl Default for E2GraphRAGConfig { fn default() -> Self { Self { enabled: false, ner_extraction: Default::default(), keyword_extraction: Default::default(), graph_construction: Default::default(), indexing: Default::default() } } }
659impl Default for PureAlgorithmicConfig { fn default() -> Self { Self { enabled: true, pattern_extraction: Default::default(), keyword_extraction: Default::default(), relationship_discovery: Default::default(), search_ranking: Default::default() } } }
660impl Default for PatternExtractionConfig { fn default() -> Self { Self { capitalized_patterns: vec![r"[A-Z][a-z]+".to_string()], technical_patterns: vec![r"[a-z]+-[a-z]+".to_string()], context_patterns: vec![r"\b(the|this)\s+(\w+)".to_string()] } } }
661impl Default for PureKeywordExtractionConfig { fn default() -> Self { Self { algorithm: "tf_idf".to_string(), max_keywords: 20, min_word_length: 4, use_positional_boost: true, use_frequency_filter: true, min_term_frequency: 2, max_term_frequency_ratio: 0.8 } } }
662impl Default for RelationshipDiscoveryConfig { fn default() -> Self { Self { window_size: 30, min_co_occurrence: 2, use_mutual_information: true, relationship_types: vec!["co_occurs_with".to_string()], scoring_method: "jaccard_similarity".to_string(), min_similarity_score: 0.1 } } }
663impl Default for SearchRankingConfig { fn default() -> Self { Self { vector_search: VectorSearchConfig { enabled: false }, keyword_search: KeywordSearchConfig { enabled: true, algorithm: "bm25".to_string(), k1: 1.2, b: 0.75 }, graph_traversal: GraphTraversalConfig { enabled: true, algorithm: "pagerank".to_string(), damping_factor: 0.85, max_iterations: 20, personalized: true }, hybrid_fusion: HybridFusionConfig { enabled: true, weights: FusionWeights { keywords: 0.4, graph: 0.4, bm25: 0.2 } } } } }
664impl Default for HybridStrategyConfig { fn default() -> Self { Self { lazy_algorithmic: LazyAlgorithmicConfig { indexing_approach: "e2_graphrag".to_string(), query_approach: "lazy_graphrag".to_string(), cost_optimization: "indexing".to_string() }, progressive: ProgressiveConfig { level_0: "pure_algorithmic".to_string(), level_1: "pure_algorithmic".to_string(), level_2: "e2_graphrag".to_string(), level_3: "lazy_graphrag".to_string(), level_4_plus: "lazy_graphrag".to_string() }, budget_aware: BudgetAwareConfig { daily_budget_usd: 1.0, queries_per_day: 1000, max_llm_cost_per_query: 0.002, strategy: "lazy_graphrag".to_string(), fallback_to_algorithmic: true } } } }
665impl Default for VectorSearchConfig { fn default() -> Self { Self { enabled: false } } }
666impl Default for KeywordSearchConfig { fn default() -> Self { Self { enabled: true, algorithm: "bm25".to_string(), k1: 1.2, b: 0.75 } } }
667impl Default for GraphTraversalConfig { fn default() -> Self { Self { enabled: true, algorithm: "pagerank".to_string(), damping_factor: 0.85, max_iterations: 20, personalized: true } } }
668impl Default for HybridFusionConfig { fn default() -> Self { Self { enabled: true, weights: FusionWeights { keywords: 0.4, graph: 0.4, bm25: 0.2 } } } }
669impl Default for FusionWeights { fn default() -> Self { Self { keywords: 0.4, graph: 0.4, bm25: 0.2 } } }
670impl Default for LazyAlgorithmicConfig { fn default() -> Self { Self { indexing_approach: "e2_graphrag".to_string(), query_approach: "lazy_graphrag".to_string(), cost_optimization: "indexing".to_string() } } }
671impl Default for ProgressiveConfig { fn default() -> Self { Self { level_0: "pure_algorithmic".to_string(), level_1: "pure_algorithmic".to_string(), level_2: "e2_graphrag".to_string(), level_3: "lazy_graphrag".to_string(), level_4_plus: "lazy_graphrag".to_string() } } }
672impl Default for BudgetAwareConfig { fn default() -> Self { Self { daily_budget_usd: 1.0, queries_per_day: 1000, max_llm_cost_per_query: 0.002, strategy: "lazy_graphrag".to_string(), fallback_to_algorithmic: true } } }
673
674#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
676pub struct EmbeddingConfig {
677 pub dimension: usize,
679
680 pub backend: String,
682
683 #[serde(default)]
693 pub model: Option<String>,
694
695 pub fallback_to_hash: bool,
697
698 pub api_endpoint: Option<String>,
700
701 pub api_key: Option<String>,
704
705 #[serde(default)]
707 pub cache_dir: Option<String>,
708
709 #[serde(default = "default_batch_size")]
711 pub batch_size: usize,
712}
713
714fn default_batch_size() -> usize {
715 32
716}
717
718#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
720pub struct GraphConfig {
721 pub max_connections: usize,
723
724 pub similarity_threshold: f32,
726
727 #[serde(default = "default_true")]
729 pub extract_relationships: bool,
730
731 #[serde(default = "default_relationship_confidence")]
733 pub relationship_confidence_threshold: f32,
734
735 #[serde(default)]
737 pub traversal: TraversalConfigParams,
738}
739
740#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
742pub struct TraversalConfigParams {
743 #[serde(default = "default_max_traversal_depth")]
745 pub max_depth: usize,
746
747 #[serde(default = "default_max_paths")]
749 pub max_paths: usize,
750
751 #[serde(default = "default_true")]
753 pub use_edge_weights: bool,
754
755 #[serde(default = "default_min_relationship_strength")]
757 pub min_relationship_strength: f32,
758}
759
760impl Default for TraversalConfigParams {
761 fn default() -> Self {
762 Self {
763 max_depth: default_max_traversal_depth(),
764 max_paths: default_max_paths(),
765 use_edge_weights: true,
766 min_relationship_strength: default_min_relationship_strength(),
767 }
768 }
769}
770
771#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
773pub struct TextConfig {
774 pub chunk_size: usize,
776
777 pub chunk_overlap: usize,
779
780 pub languages: Vec<String>,
782}
783
784#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
786pub struct EntityConfig {
787 pub min_confidence: f32,
789
790 pub entity_types: Vec<String>,
792
793 #[serde(default)]
795 pub use_gleaning: bool,
796
797 #[serde(default = "default_max_gleaning_rounds")]
799 pub max_gleaning_rounds: usize,
800}
801
802#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
804pub struct RetrievalConfig {
805 pub top_k: usize,
807
808 pub search_algorithm: String,
810}
811
812#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
814pub struct ParallelConfig {
815 pub num_threads: usize,
817
818 pub enabled: bool,
820
821 pub min_batch_size: usize,
823
824 pub chunk_batch_size: usize,
826
827 pub parallel_embeddings: bool,
829
830 pub parallel_graph_ops: bool,
832
833 pub parallel_vector_ops: bool,
835}
836
837fn default_embedding_dim() -> usize {
839 384
840}
841fn default_embedding_backend() -> String {
842 "hash".to_string()
843}
844fn default_max_connections() -> usize {
845 10
846}
847fn default_similarity_threshold() -> f32 {
848 0.8
849}
850fn default_chunk_size() -> usize {
851 1000
852}
853fn default_chunk_overlap() -> usize {
854 200
855}
856fn default_languages() -> Vec<String> {
857 vec!["en".to_string()]
858}
859fn default_min_confidence() -> f32 {
860 0.7
861}
862fn default_entity_types() -> Vec<String> {
863 vec![
864 "PERSON".to_string(),
865 "ORG".to_string(),
866 "LOCATION".to_string(),
867 ]
868}
869fn default_top_k() -> usize {
870 10
871}
872fn default_search_algorithm() -> String {
873 "cosine".to_string()
874}
875fn default_num_threads() -> usize {
876 0
877} fn default_min_batch_size() -> usize {
879 10
880}
881fn default_chunk_batch_size() -> usize {
882 100
883}
884fn default_true() -> bool {
885 true
886}
887fn default_relationship_confidence() -> f32 {
888 0.5
889}
890fn default_max_gleaning_rounds() -> usize {
891 3
892}
893fn default_approach() -> String {
894 "semantic".to_string()
895}
896fn default_max_traversal_depth() -> usize {
897 3
898}
899fn default_max_paths() -> usize {
900 10
901}
902fn default_min_relationship_strength() -> f32 {
903 0.3
904}
905fn default_auto_save_interval() -> u64 {
906 300 }
908fn default_max_versions() -> usize {
909 5 }
911
912impl Default for Config {
913 fn default() -> Self {
914 Self {
915 output_dir: "./output".to_string(),
916 chunk_size: default_chunk_size(),
917 chunk_overlap: default_chunk_overlap(),
918 max_entities_per_chunk: Some(10),
919 top_k_results: Some(default_top_k()),
920 similarity_threshold: Some(default_similarity_threshold()),
921 approach: default_approach(),
922 embeddings: EmbeddingConfig {
923 dimension: default_embedding_dim(),
924 backend: default_embedding_backend(),
925 model: Some("sentence-transformers/all-MiniLM-L6-v2".to_string()),
926 fallback_to_hash: true,
927 api_endpoint: None,
928 api_key: None,
929 cache_dir: None,
930 batch_size: default_batch_size(),
931 },
932 graph: GraphConfig {
933 max_connections: default_max_connections(),
934 similarity_threshold: default_similarity_threshold(),
935 extract_relationships: default_true(),
936 relationship_confidence_threshold: default_relationship_confidence(),
937 traversal: TraversalConfigParams::default(),
938 },
939 text: TextConfig {
940 chunk_size: default_chunk_size(),
941 chunk_overlap: default_chunk_overlap(),
942 languages: default_languages(),
943 },
944 entities: EntityConfig {
945 min_confidence: default_min_confidence(),
946 entity_types: default_entity_types(),
947 use_gleaning: false,
948 max_gleaning_rounds: default_max_gleaning_rounds(),
949 },
950 retrieval: RetrievalConfig {
951 top_k: default_top_k(),
952 search_algorithm: default_search_algorithm(),
953 },
954 parallel: ParallelConfig {
955 num_threads: default_num_threads(),
956 enabled: true,
957 min_batch_size: default_min_batch_size(),
958 chunk_batch_size: default_chunk_batch_size(),
959 parallel_embeddings: true,
960 parallel_graph_ops: true,
961 parallel_vector_ops: true,
962 },
963 ollama: crate::ollama::OllamaConfig::default(),
964 enhancements: enhancements::EnhancementsConfig::default(),
965 auto_save: AutoSaveConfig {
966 enabled: false,
967 interval_seconds: default_auto_save_interval(),
968 workspace_name: None,
969 max_versions: default_max_versions(),
970 },
971 summarization: crate::summarization::HierarchicalConfig::default(),
972 zero_cost_approach: ZeroCostApproachConfig::default(),
973 }
974 }
975}
976
977impl Default for AutoSaveConfig {
978 fn default() -> Self {
979 Self {
980 enabled: false,
981 interval_seconds: default_auto_save_interval(),
982 workspace_name: None,
983 max_versions: default_max_versions(),
984 }
985 }
986}
987
988impl Config {
989 pub fn from_file(path: &str) -> Result<Self> {
991 let content = fs::read_to_string(path)?;
992 let parsed = json::parse(&content)?;
993
994 let config = Config {
995 output_dir: parsed["output_dir"]
996 .as_str()
997 .unwrap_or("./output")
998 .to_string(),
999 chunk_size: parsed["chunk_size"]
1000 .as_usize()
1001 .unwrap_or(default_chunk_size()),
1002 chunk_overlap: parsed["chunk_overlap"]
1003 .as_usize()
1004 .unwrap_or(default_chunk_overlap()),
1005 max_entities_per_chunk: parsed["max_entities_per_chunk"]
1006 .as_usize(),
1007 top_k_results: parsed["top_k_results"]
1008 .as_usize(),
1009 similarity_threshold: parsed["similarity_threshold"]
1010 .as_f32(),
1011 approach: parsed["approach"]
1012 .as_str()
1013 .unwrap_or(&default_approach())
1014 .to_string(),
1015 embeddings: EmbeddingConfig {
1016 dimension: parsed["embeddings"]["dimension"]
1017 .as_usize()
1018 .unwrap_or(default_embedding_dim()),
1019 backend: parsed["embeddings"]["backend"]
1020 .as_str()
1021 .unwrap_or(&default_embedding_backend())
1022 .to_string(),
1023 model: parsed["embeddings"]["model"]
1024 .as_str()
1025 .map(|s| s.to_string()),
1026 fallback_to_hash: parsed["embeddings"]["fallback_to_hash"]
1027 .as_bool()
1028 .unwrap_or(true),
1029 api_endpoint: parsed["embeddings"]["api_endpoint"]
1030 .as_str()
1031 .map(|s| s.to_string()),
1032 api_key: parsed["embeddings"]["api_key"]
1033 .as_str()
1034 .map(|s| s.to_string()),
1035 cache_dir: parsed["embeddings"]["cache_dir"]
1036 .as_str()
1037 .map(|s| s.to_string()),
1038 batch_size: parsed["embeddings"]["batch_size"]
1039 .as_usize()
1040 .unwrap_or(default_batch_size()),
1041 },
1042 graph: GraphConfig {
1043 max_connections: parsed["graph"]["max_connections"]
1044 .as_usize()
1045 .unwrap_or(default_max_connections()),
1046 similarity_threshold: parsed["graph"]["similarity_threshold"]
1047 .as_f32()
1048 .unwrap_or(default_similarity_threshold()),
1049 extract_relationships: parsed["graph"]["extract_relationships"]
1050 .as_bool()
1051 .unwrap_or(default_true()),
1052 relationship_confidence_threshold: parsed["graph"]["relationship_confidence_threshold"]
1053 .as_f32()
1054 .unwrap_or(default_relationship_confidence()),
1055 traversal: TraversalConfigParams {
1056 max_depth: parsed["graph"]["traversal"]["max_depth"]
1057 .as_usize()
1058 .unwrap_or(default_max_traversal_depth()),
1059 max_paths: parsed["graph"]["traversal"]["max_paths"]
1060 .as_usize()
1061 .unwrap_or(default_max_paths()),
1062 use_edge_weights: parsed["graph"]["traversal"]["use_edge_weights"]
1063 .as_bool()
1064 .unwrap_or(default_true()),
1065 min_relationship_strength: parsed["graph"]["traversal"]["min_relationship_strength"]
1066 .as_f32()
1067 .unwrap_or(default_min_relationship_strength()),
1068 },
1069 },
1070 text: TextConfig {
1071 chunk_size: parsed["text"]["chunk_size"]
1072 .as_usize()
1073 .unwrap_or(default_chunk_size()),
1074 chunk_overlap: parsed["text"]["chunk_overlap"]
1075 .as_usize()
1076 .unwrap_or(default_chunk_overlap()),
1077 languages: if parsed["text"]["languages"].is_array() {
1078 parsed["text"]["languages"]
1079 .members()
1080 .map(|v| v.as_str().unwrap_or("en").to_string())
1081 .collect()
1082 } else {
1083 default_languages()
1084 },
1085 },
1086 entities: EntityConfig {
1087 min_confidence: parsed["entities"]["min_confidence"]
1088 .as_f32()
1089 .unwrap_or(default_min_confidence()),
1090 entity_types: if parsed["entities"]["entity_types"].is_array() {
1091 parsed["entities"]["entity_types"]
1092 .members()
1093 .map(|v| v.as_str().unwrap_or("PERSON").to_string())
1094 .collect()
1095 } else {
1096 default_entity_types()
1097 },
1098 use_gleaning: parsed["entities"]["use_gleaning"]
1099 .as_bool()
1100 .unwrap_or(false),
1101 max_gleaning_rounds: parsed["entities"]["max_gleaning_rounds"]
1102 .as_usize()
1103 .unwrap_or(default_max_gleaning_rounds()),
1104 },
1105 retrieval: RetrievalConfig {
1106 top_k: parsed["retrieval"]["top_k"]
1107 .as_usize()
1108 .unwrap_or(default_top_k()),
1109 search_algorithm: parsed["retrieval"]["search_algorithm"]
1110 .as_str()
1111 .unwrap_or(&default_search_algorithm())
1112 .to_string(),
1113 },
1114 parallel: ParallelConfig {
1115 num_threads: parsed["parallel"]["num_threads"]
1116 .as_usize()
1117 .unwrap_or(default_num_threads()),
1118 enabled: parsed["parallel"]["enabled"].as_bool().unwrap_or(true),
1119 min_batch_size: parsed["parallel"]["min_batch_size"]
1120 .as_usize()
1121 .unwrap_or(default_min_batch_size()),
1122 chunk_batch_size: parsed["parallel"]["chunk_batch_size"]
1123 .as_usize()
1124 .unwrap_or(default_chunk_batch_size()),
1125 parallel_embeddings: parsed["parallel"]["parallel_embeddings"]
1126 .as_bool()
1127 .unwrap_or(true),
1128 parallel_graph_ops: parsed["parallel"]["parallel_graph_ops"]
1129 .as_bool()
1130 .unwrap_or(true),
1131 parallel_vector_ops: parsed["parallel"]["parallel_vector_ops"]
1132 .as_bool()
1133 .unwrap_or(true),
1134 },
1135 ollama: crate::ollama::OllamaConfig {
1136 enabled: parsed["ollama"]["enabled"].as_bool().unwrap_or(false),
1137 host: parsed["ollama"]["host"]
1138 .as_str()
1139 .unwrap_or("http://localhost")
1140 .to_string(),
1141 port: parsed["ollama"]["port"].as_u16().unwrap_or(11434),
1142 embedding_model: parsed["ollama"]["embedding_model"]
1143 .as_str()
1144 .unwrap_or("nomic-embed-text")
1145 .to_string(),
1146 chat_model: parsed["ollama"]["chat_model"]
1147 .as_str()
1148 .unwrap_or("llama3.2:3b")
1149 .to_string(),
1150 timeout_seconds: parsed["ollama"]["timeout_seconds"].as_u64().unwrap_or(30),
1151 max_retries: parsed["ollama"]["max_retries"].as_u32().unwrap_or(3),
1152 fallback_to_hash: parsed["ollama"]["fallback_to_hash"]
1153 .as_bool()
1154 .unwrap_or(true),
1155 max_tokens: parsed["ollama"]["max_tokens"].as_u32(),
1156 temperature: parsed["ollama"]["temperature"].as_f32(),
1157 },
1158 enhancements: enhancements::EnhancementsConfig {
1159 enabled: parsed["enhancements"]["enabled"].as_bool().unwrap_or(true),
1160 query_analysis: enhancements::QueryAnalysisConfig {
1161 enabled: parsed["enhancements"]["query_analysis"]["enabled"]
1162 .as_bool()
1163 .unwrap_or(true),
1164 min_confidence: parsed["enhancements"]["query_analysis"]["min_confidence"]
1165 .as_f32()
1166 .unwrap_or(0.6),
1167 enable_strategy_suggestion: parsed["enhancements"]["query_analysis"]
1168 ["enable_strategy_suggestion"]
1169 .as_bool()
1170 .unwrap_or(true),
1171 enable_keyword_analysis: parsed["enhancements"]["query_analysis"]
1172 ["enable_keyword_analysis"]
1173 .as_bool()
1174 .unwrap_or(true),
1175 enable_complexity_scoring: parsed["enhancements"]["query_analysis"]
1176 ["enable_complexity_scoring"]
1177 .as_bool()
1178 .unwrap_or(true),
1179 },
1180 adaptive_retrieval: enhancements::AdaptiveRetrievalConfig {
1181 enabled: parsed["enhancements"]["adaptive_retrieval"]["enabled"]
1182 .as_bool()
1183 .unwrap_or(true),
1184 use_query_analysis: parsed["enhancements"]["adaptive_retrieval"]
1185 ["use_query_analysis"]
1186 .as_bool()
1187 .unwrap_or(true),
1188 enable_cross_strategy_fusion: parsed["enhancements"]["adaptive_retrieval"]
1189 ["enable_cross_strategy_fusion"]
1190 .as_bool()
1191 .unwrap_or(true),
1192 diversity_threshold: parsed["enhancements"]["adaptive_retrieval"]
1193 ["diversity_threshold"]
1194 .as_f32()
1195 .unwrap_or(0.8),
1196 enable_diversity_selection: parsed["enhancements"]["adaptive_retrieval"]
1197 ["enable_diversity_selection"]
1198 .as_bool()
1199 .unwrap_or(true),
1200 enable_confidence_weighting: parsed["enhancements"]["adaptive_retrieval"]
1201 ["enable_confidence_weighting"]
1202 .as_bool()
1203 .unwrap_or(true),
1204 },
1205 performance_benchmarking: enhancements::BenchmarkingConfig {
1206 enabled: parsed["enhancements"]["performance_benchmarking"]["enabled"]
1207 .as_bool()
1208 .unwrap_or(false),
1209 auto_recommendations: parsed["enhancements"]["performance_benchmarking"]
1210 ["auto_recommendations"]
1211 .as_bool()
1212 .unwrap_or(true),
1213 comprehensive_testing: parsed["enhancements"]["performance_benchmarking"]
1214 ["comprehensive_testing"]
1215 .as_bool()
1216 .unwrap_or(false),
1217 iterations: parsed["enhancements"]["performance_benchmarking"]["iterations"]
1218 .as_usize()
1219 .unwrap_or(3),
1220 include_parallel: parsed["enhancements"]["performance_benchmarking"]
1221 ["include_parallel"]
1222 .as_bool()
1223 .unwrap_or(true),
1224 enable_memory_profiling: parsed["enhancements"]["performance_benchmarking"]
1225 ["enable_memory_profiling"]
1226 .as_bool()
1227 .unwrap_or(false),
1228 },
1229 enhanced_function_registry: enhancements::FunctionRegistryConfig {
1230 enabled: parsed["enhancements"]["enhanced_function_registry"]["enabled"]
1231 .as_bool()
1232 .unwrap_or(true),
1233 categorization: parsed["enhancements"]["enhanced_function_registry"]
1234 ["categorization"]
1235 .as_bool()
1236 .unwrap_or(true),
1237 usage_statistics: parsed["enhancements"]["enhanced_function_registry"]
1238 ["usage_statistics"]
1239 .as_bool()
1240 .unwrap_or(true),
1241 dynamic_registration: parsed["enhancements"]["enhanced_function_registry"]
1242 ["dynamic_registration"]
1243 .as_bool()
1244 .unwrap_or(true),
1245 performance_monitoring: parsed["enhancements"]["enhanced_function_registry"]
1246 ["performance_monitoring"]
1247 .as_bool()
1248 .unwrap_or(false),
1249 recommendation_system: parsed["enhancements"]["enhanced_function_registry"]
1250 ["recommendation_system"]
1251 .as_bool()
1252 .unwrap_or(true),
1253 },
1254 #[cfg(feature = "lightrag")]
1255 lightrag: enhancements::LightRAGConfig {
1256 enabled: parsed["enhancements"]["lightrag"]["enabled"]
1257 .as_bool()
1258 .unwrap_or(true),
1259 max_keywords: parsed["enhancements"]["lightrag"]["max_keywords"]
1260 .as_usize()
1261 .unwrap_or(20),
1262 high_level_weight: parsed["enhancements"]["lightrag"]["high_level_weight"]
1263 .as_f32()
1264 .unwrap_or(0.6),
1265 low_level_weight: parsed["enhancements"]["lightrag"]["low_level_weight"]
1266 .as_f32()
1267 .unwrap_or(0.4),
1268 merge_strategy: parsed["enhancements"]["lightrag"]["merge_strategy"]
1269 .as_str()
1270 .unwrap_or("weighted")
1271 .to_string(),
1272 language: parsed["enhancements"]["lightrag"]["language"]
1273 .as_str()
1274 .unwrap_or("English")
1275 .to_string(),
1276 enable_cache: parsed["enhancements"]["lightrag"]["enable_cache"]
1277 .as_bool()
1278 .unwrap_or(true),
1279 },
1280 #[cfg(feature = "leiden")]
1281 leiden: enhancements::LeidenCommunitiesConfig {
1282 enabled: parsed["enhancements"]["leiden"]["enabled"]
1283 .as_bool()
1284 .unwrap_or(true),
1285 max_cluster_size: parsed["enhancements"]["leiden"]["max_cluster_size"]
1286 .as_usize()
1287 .unwrap_or(10),
1288 use_lcc: parsed["enhancements"]["leiden"]["use_lcc"]
1289 .as_bool()
1290 .unwrap_or(true),
1291 seed: parsed["enhancements"]["leiden"]["seed"]
1292 .as_u64(),
1293 resolution: parsed["enhancements"]["leiden"]["resolution"]
1294 .as_f32()
1295 .unwrap_or(1.0),
1296 max_levels: parsed["enhancements"]["leiden"]["max_levels"]
1297 .as_usize()
1298 .unwrap_or(5),
1299 min_improvement: parsed["enhancements"]["leiden"]["min_improvement"]
1300 .as_f32()
1301 .unwrap_or(0.001),
1302 enable_hierarchical: parsed["enhancements"]["leiden"]["enable_hierarchical"]
1303 .as_bool()
1304 .unwrap_or(true),
1305 generate_summaries: parsed["enhancements"]["leiden"]["generate_summaries"]
1306 .as_bool()
1307 .unwrap_or(true),
1308 max_summary_length: parsed["enhancements"]["leiden"]["max_summary_length"]
1309 .as_usize()
1310 .unwrap_or(5),
1311 use_extractive_summary: parsed["enhancements"]["leiden"]["use_extractive_summary"]
1312 .as_bool()
1313 .unwrap_or(true),
1314 adaptive_routing: enhancements::AdaptiveRoutingConfig {
1315 enabled: parsed["enhancements"]["leiden"]["adaptive_routing"]["enabled"]
1316 .as_bool()
1317 .unwrap_or(true),
1318 default_level: parsed["enhancements"]["leiden"]["adaptive_routing"]["default_level"]
1319 .as_usize()
1320 .unwrap_or(1),
1321 keyword_weight: parsed["enhancements"]["leiden"]["adaptive_routing"]["keyword_weight"]
1322 .as_f32()
1323 .unwrap_or(0.5),
1324 length_weight: parsed["enhancements"]["leiden"]["adaptive_routing"]["length_weight"]
1325 .as_f32()
1326 .unwrap_or(0.3),
1327 entity_weight: parsed["enhancements"]["leiden"]["adaptive_routing"]["entity_weight"]
1328 .as_f32()
1329 .unwrap_or(0.2),
1330 },
1331 },
1332 #[cfg(feature = "cross-encoder")]
1333 cross_encoder: enhancements::CrossEncoderConfig {
1334 enabled: parsed["enhancements"]["cross_encoder"]["enabled"]
1335 .as_bool()
1336 .unwrap_or(true),
1337 model_name: parsed["enhancements"]["cross_encoder"]["model_name"]
1338 .as_str()
1339 .unwrap_or("cross-encoder/ms-marco-MiniLM-L-6-v2")
1340 .to_string(),
1341 max_length: parsed["enhancements"]["cross_encoder"]["max_length"]
1342 .as_usize()
1343 .unwrap_or(512),
1344 batch_size: parsed["enhancements"]["cross_encoder"]["batch_size"]
1345 .as_usize()
1346 .unwrap_or(32),
1347 top_k: parsed["enhancements"]["cross_encoder"]["top_k"]
1348 .as_usize()
1349 .unwrap_or(10),
1350 min_confidence: parsed["enhancements"]["cross_encoder"]["min_confidence"]
1351 .as_f32()
1352 .unwrap_or(0.0),
1353 normalize_scores: parsed["enhancements"]["cross_encoder"]["normalize_scores"]
1354 .as_bool()
1355 .unwrap_or(true),
1356 },
1357 },
1358 auto_save: AutoSaveConfig {
1359 enabled: parsed["auto_save"]["enabled"].as_bool().unwrap_or(false),
1360 interval_seconds: parsed["auto_save"]["interval_seconds"]
1361 .as_u64()
1362 .unwrap_or(default_auto_save_interval()),
1363 workspace_name: parsed["auto_save"]["workspace_name"]
1364 .as_str()
1365 .map(|s| s.to_string()),
1366 max_versions: parsed["auto_save"]["max_versions"]
1367 .as_usize()
1368 .unwrap_or(default_max_versions()),
1369 },
1370 summarization: if parsed["summarization"].is_object() {
1371 crate::summarization::HierarchicalConfig {
1372 merge_size: parsed["summarization"]["merge_size"]
1373 .as_usize()
1374 .unwrap_or(3),
1375 max_summary_length: parsed["summarization"]["max_summary_length"]
1376 .as_usize()
1377 .unwrap_or(250),
1378 min_node_size: parsed["summarization"]["min_node_size"]
1379 .as_usize()
1380 .unwrap_or(50),
1381 overlap_sentences: parsed["summarization"]["overlap_sentences"]
1382 .as_usize()
1383 .unwrap_or(2),
1384 llm_config: if parsed["summarization"]["llm_config"].is_object() {
1385 crate::summarization::LLMConfig {
1386 enabled: parsed["summarization"]["llm_config"]["enabled"]
1387 .as_bool()
1388 .unwrap_or(false),
1389 model_name: parsed["summarization"]["llm_config"]["model_name"]
1390 .as_str()
1391 .unwrap_or("llama3.1:8b")
1392 .to_string(),
1393 temperature: parsed["summarization"]["llm_config"]["temperature"]
1394 .as_f32()
1395 .unwrap_or(0.3),
1396 max_tokens: parsed["summarization"]["llm_config"]["max_tokens"]
1397 .as_usize()
1398 .unwrap_or(180),
1399 strategy: match parsed["summarization"]["llm_config"]["strategy"]
1400 .as_str()
1401 .unwrap_or("progressive") {
1402 "uniform" => crate::summarization::LLMStrategy::Uniform,
1403 "adaptive" => crate::summarization::LLMStrategy::Adaptive,
1404 "progressive" => crate::summarization::LLMStrategy::Progressive,
1405 _ => crate::summarization::LLMStrategy::Progressive,
1406 },
1407 level_configs: std::collections::HashMap::new(), }
1409 } else {
1410 crate::summarization::LLMConfig::default()
1411 },
1412 }
1413 } else {
1414 crate::summarization::HierarchicalConfig::default()
1415 },
1416 zero_cost_approach: if parsed["zero_cost_approach"].is_object() {
1417 ZeroCostApproachConfig {
1418 approach: parsed["zero_cost_approach"]["approach"]
1419 .as_str()
1420 .unwrap_or("pure_algorithmic")
1421 .to_string(),
1422 lazy_graphrag: if parsed["zero_cost_approach"]["lazy_graphrag"].is_object() {
1423 LazyGraphRAGConfig {
1424 enabled: parsed["zero_cost_approach"]["lazy_graphrag"]["enabled"]
1425 .as_bool()
1426 .unwrap_or(false),
1427 concept_extraction: ConceptExtractionConfig::default(),
1428 co_occurrence: CoOccurrenceConfig::default(),
1429 indexing: LazyIndexingConfig::default(),
1430 query_expansion: LazyQueryExpansionConfig::default(),
1431 relevance_scoring: LazyRelevanceScoringConfig::default(),
1432 }
1433 } else {
1434 LazyGraphRAGConfig::default()
1435 },
1436 e2_graphrag: E2GraphRAGConfig::default(),
1437 pure_algorithmic: PureAlgorithmicConfig::default(),
1438 hybrid_strategy: HybridStrategyConfig::default(),
1439 }
1440 } else {
1441 ZeroCostApproachConfig::default()
1442 },
1443 };
1444
1445 Ok(config)
1446 }
1447
1448 pub fn to_file(&self, path: &str) -> Result<()> {
1450 let mut config_json = json::JsonValue::new_object();
1451
1452 let mut embeddings = json::JsonValue::new_object();
1454 embeddings["dimension"] = json::JsonValue::from(self.embeddings.dimension);
1455 if let Some(endpoint) = &self.embeddings.api_endpoint {
1456 embeddings["api_endpoint"] = json::JsonValue::from(endpoint.as_str());
1457 }
1458 if let Some(key) = &self.embeddings.api_key {
1459 embeddings["api_key"] = json::JsonValue::from(key.as_str());
1460 }
1461 config_json["embeddings"] = embeddings;
1462
1463 let mut graph = json::JsonValue::new_object();
1465 graph["max_connections"] = json::JsonValue::from(self.graph.max_connections);
1466 graph["similarity_threshold"] = json::JsonValue::from(self.graph.similarity_threshold);
1467 graph["extract_relationships"] = json::JsonValue::from(self.graph.extract_relationships);
1468 graph["relationship_confidence_threshold"] = json::JsonValue::from(self.graph.relationship_confidence_threshold);
1469
1470 let mut traversal = json::JsonValue::new_object();
1471 traversal["max_depth"] = json::JsonValue::from(self.graph.traversal.max_depth);
1472 traversal["max_paths"] = json::JsonValue::from(self.graph.traversal.max_paths);
1473 traversal["use_edge_weights"] = json::JsonValue::from(self.graph.traversal.use_edge_weights);
1474 traversal["min_relationship_strength"] = json::JsonValue::from(self.graph.traversal.min_relationship_strength);
1475 graph["traversal"] = traversal;
1476
1477 config_json["graph"] = graph;
1478
1479 let mut text = json::JsonValue::new_object();
1481 text["chunk_size"] = json::JsonValue::from(self.text.chunk_size);
1482 text["chunk_overlap"] = json::JsonValue::from(self.text.chunk_overlap);
1483 let languages_array: Vec<json::JsonValue> = self
1484 .text
1485 .languages
1486 .iter()
1487 .map(|s| json::JsonValue::from(s.as_str()))
1488 .collect();
1489 text["languages"] = json::JsonValue::from(languages_array);
1490 config_json["text"] = text;
1491
1492 let mut entities = json::JsonValue::new_object();
1494 entities["min_confidence"] = json::JsonValue::from(self.entities.min_confidence);
1495 let entity_types_array: Vec<json::JsonValue> = self
1496 .entities
1497 .entity_types
1498 .iter()
1499 .map(|s| json::JsonValue::from(s.as_str()))
1500 .collect();
1501 entities["entity_types"] = json::JsonValue::from(entity_types_array);
1502 entities["use_gleaning"] = json::JsonValue::from(self.entities.use_gleaning);
1503 entities["max_gleaning_rounds"] = json::JsonValue::from(self.entities.max_gleaning_rounds);
1504 config_json["entities"] = entities;
1505
1506 let mut retrieval = json::JsonValue::new_object();
1508 retrieval["top_k"] = json::JsonValue::from(self.retrieval.top_k);
1509 retrieval["search_algorithm"] =
1510 json::JsonValue::from(self.retrieval.search_algorithm.as_str());
1511 config_json["retrieval"] = retrieval;
1512
1513 let mut parallel = json::JsonValue::new_object();
1515 parallel["num_threads"] = json::JsonValue::from(self.parallel.num_threads);
1516 parallel["enabled"] = json::JsonValue::from(self.parallel.enabled);
1517 parallel["min_batch_size"] = json::JsonValue::from(self.parallel.min_batch_size);
1518 parallel["chunk_batch_size"] = json::JsonValue::from(self.parallel.chunk_batch_size);
1519 parallel["parallel_embeddings"] = json::JsonValue::from(self.parallel.parallel_embeddings);
1520 parallel["parallel_graph_ops"] = json::JsonValue::from(self.parallel.parallel_graph_ops);
1521 parallel["parallel_vector_ops"] = json::JsonValue::from(self.parallel.parallel_vector_ops);
1522 config_json["parallel"] = parallel;
1523
1524 let mut enhancements = json::JsonValue::new_object();
1526 enhancements["enabled"] = json::JsonValue::from(self.enhancements.enabled);
1527
1528 let mut query_analysis = json::JsonValue::new_object();
1529 query_analysis["enabled"] = json::JsonValue::from(self.enhancements.query_analysis.enabled);
1530 query_analysis["min_confidence"] =
1531 json::JsonValue::from(self.enhancements.query_analysis.min_confidence);
1532 query_analysis["enable_strategy_suggestion"] =
1533 json::JsonValue::from(self.enhancements.query_analysis.enable_strategy_suggestion);
1534 query_analysis["enable_keyword_analysis"] =
1535 json::JsonValue::from(self.enhancements.query_analysis.enable_keyword_analysis);
1536 query_analysis["enable_complexity_scoring"] =
1537 json::JsonValue::from(self.enhancements.query_analysis.enable_complexity_scoring);
1538 enhancements["query_analysis"] = query_analysis;
1539
1540 let mut adaptive_retrieval = json::JsonValue::new_object();
1541 adaptive_retrieval["enabled"] =
1542 json::JsonValue::from(self.enhancements.adaptive_retrieval.enabled);
1543 adaptive_retrieval["use_query_analysis"] =
1544 json::JsonValue::from(self.enhancements.adaptive_retrieval.use_query_analysis);
1545 adaptive_retrieval["enable_cross_strategy_fusion"] = json::JsonValue::from(
1546 self.enhancements
1547 .adaptive_retrieval
1548 .enable_cross_strategy_fusion,
1549 );
1550 adaptive_retrieval["diversity_threshold"] =
1551 json::JsonValue::from(self.enhancements.adaptive_retrieval.diversity_threshold);
1552 adaptive_retrieval["enable_diversity_selection"] = json::JsonValue::from(
1553 self.enhancements
1554 .adaptive_retrieval
1555 .enable_diversity_selection,
1556 );
1557 adaptive_retrieval["enable_confidence_weighting"] = json::JsonValue::from(
1558 self.enhancements
1559 .adaptive_retrieval
1560 .enable_confidence_weighting,
1561 );
1562 enhancements["adaptive_retrieval"] = adaptive_retrieval;
1563
1564 let mut performance_benchmarking = json::JsonValue::new_object();
1565 performance_benchmarking["enabled"] =
1566 json::JsonValue::from(self.enhancements.performance_benchmarking.enabled);
1567 performance_benchmarking["auto_recommendations"] = json::JsonValue::from(
1568 self.enhancements
1569 .performance_benchmarking
1570 .auto_recommendations,
1571 );
1572 performance_benchmarking["comprehensive_testing"] = json::JsonValue::from(
1573 self.enhancements
1574 .performance_benchmarking
1575 .comprehensive_testing,
1576 );
1577 performance_benchmarking["iterations"] =
1578 json::JsonValue::from(self.enhancements.performance_benchmarking.iterations);
1579 performance_benchmarking["include_parallel"] =
1580 json::JsonValue::from(self.enhancements.performance_benchmarking.include_parallel);
1581 performance_benchmarking["enable_memory_profiling"] = json::JsonValue::from(
1582 self.enhancements
1583 .performance_benchmarking
1584 .enable_memory_profiling,
1585 );
1586 enhancements["performance_benchmarking"] = performance_benchmarking;
1587
1588 let mut enhanced_function_registry = json::JsonValue::new_object();
1589 enhanced_function_registry["enabled"] =
1590 json::JsonValue::from(self.enhancements.enhanced_function_registry.enabled);
1591 enhanced_function_registry["categorization"] =
1592 json::JsonValue::from(self.enhancements.enhanced_function_registry.categorization);
1593 enhanced_function_registry["usage_statistics"] = json::JsonValue::from(
1594 self.enhancements
1595 .enhanced_function_registry
1596 .usage_statistics,
1597 );
1598 enhanced_function_registry["dynamic_registration"] = json::JsonValue::from(
1599 self.enhancements
1600 .enhanced_function_registry
1601 .dynamic_registration,
1602 );
1603 enhanced_function_registry["performance_monitoring"] = json::JsonValue::from(
1604 self.enhancements
1605 .enhanced_function_registry
1606 .performance_monitoring,
1607 );
1608 enhanced_function_registry["recommendation_system"] = json::JsonValue::from(
1609 self.enhancements
1610 .enhanced_function_registry
1611 .recommendation_system,
1612 );
1613 enhancements["enhanced_function_registry"] = enhanced_function_registry;
1614
1615 config_json["enhancements"] = enhancements;
1616
1617 let mut summarization = json::JsonValue::new_object();
1619 summarization["merge_size"] = json::JsonValue::from(self.summarization.merge_size);
1620 summarization["max_summary_length"] = json::JsonValue::from(self.summarization.max_summary_length);
1621 summarization["min_node_size"] = json::JsonValue::from(self.summarization.min_node_size);
1622 summarization["overlap_sentences"] = json::JsonValue::from(self.summarization.overlap_sentences);
1623
1624 let mut llm_config = json::JsonValue::new_object();
1625 llm_config["enabled"] = json::JsonValue::from(self.summarization.llm_config.enabled);
1626 llm_config["model_name"] = json::JsonValue::from(self.summarization.llm_config.model_name.as_str());
1627 llm_config["temperature"] = json::JsonValue::from(self.summarization.llm_config.temperature);
1628 llm_config["max_tokens"] = json::JsonValue::from(self.summarization.llm_config.max_tokens);
1629 let strategy_str = match self.summarization.llm_config.strategy {
1630 crate::summarization::LLMStrategy::Uniform => "uniform",
1631 crate::summarization::LLMStrategy::Adaptive => "adaptive",
1632 crate::summarization::LLMStrategy::Progressive => "progressive",
1633 };
1634 llm_config["strategy"] = json::JsonValue::from(strategy_str);
1635
1636 summarization["llm_config"] = llm_config;
1637 config_json["summarization"] = summarization;
1638
1639 let content = json::stringify_pretty(config_json, 2);
1640 fs::write(path, content)?;
1641 Ok(())
1642 }
1643}