1use crate::Result;
5use serde::{Deserialize, Serialize};
6use std::fs;
7use std::path::Path;
8
9#[derive(Debug, Clone, Serialize, Deserialize, Default)]
11pub struct SetConfig {
12 #[serde(default)]
14 pub mode: ModeConfig,
15
16 #[serde(default)]
18 pub semantic: Option<SemanticPipelineConfig>,
19
20 #[serde(default)]
22 pub algorithmic: Option<AlgorithmicPipelineConfig>,
23
24 #[serde(default)]
26 pub hybrid: Option<HybridPipelineConfig>,
27
28 #[serde(default)]
30 pub general: GeneralConfig,
31
32 #[serde(default)]
34 pub pipeline: PipelineConfig,
35
36 #[serde(default)]
38 pub storage: StorageConfig,
39
40 #[serde(default)]
42 pub models: ModelsConfig,
43
44 #[serde(default)]
46 pub performance: PerformanceConfig,
47
48 #[serde(default)]
50 pub ollama: OllamaSetConfig,
51
52 #[serde(default)]
54 pub gliner: GlinerSetConfig,
55
56 #[serde(default)]
58 pub experimental: ExperimentalConfig,
59
60 #[serde(default)]
62 pub entity_extraction: EntityExtractionTopLevelConfig,
63
64 #[serde(default)]
66 pub auto_save: AutoSaveSetConfig,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct AutoSaveSetConfig {
72 #[serde(default)]
76 pub enabled: bool,
77
78 #[serde(default)]
82 pub base_dir: Option<String>,
83
84 #[serde(default = "default_auto_save_interval")]
86 pub interval_seconds: u64,
87
88 #[serde(default)]
90 pub workspace_name: Option<String>,
91
92 #[serde(default = "default_max_auto_save_versions")]
94 pub max_versions: usize,
95}
96
97impl Default for AutoSaveSetConfig {
98 fn default() -> Self {
99 Self {
100 enabled: false,
101 base_dir: None,
102 interval_seconds: default_auto_save_interval(),
103 workspace_name: None,
104 max_versions: default_max_auto_save_versions(),
105 }
106 }
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct GeneralConfig {
112 #[serde(default = "default_log_level")]
114 pub log_level: String,
115
116 #[serde(default = "default_output_dir")]
118 pub output_dir: String,
119
120 #[serde(default)]
122 pub input_document_path: Option<String>,
123
124 #[serde(default)]
126 pub max_threads: Option<usize>,
127
128 #[serde(default)]
130 pub enable_profiling: bool,
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct PipelineConfig {
136 #[serde(default = "default_workflows")]
138 pub workflows: Vec<String>,
139
140 #[serde(default = "default_true")]
142 pub parallel_execution: bool,
143
144 #[serde(default)]
146 pub text_extraction: TextExtractionConfig,
147
148 #[serde(default)]
150 pub entity_extraction: EntityExtractionConfig,
151
152 #[serde(default)]
154 pub graph_building: GraphBuildingConfig,
155
156 #[serde(default)]
158 pub community_detection: CommunityDetectionConfig,
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct TextExtractionConfig {
164 #[serde(default = "default_chunk_size")]
166 pub chunk_size: usize,
167
168 #[serde(default = "default_chunk_overlap")]
170 pub chunk_overlap: usize,
171
172 #[serde(default = "default_true")]
174 pub clean_control_chars: bool,
175
176 #[serde(default = "default_min_chunk_size")]
178 pub min_chunk_size: usize,
179
180 #[serde(default)]
182 pub cleaning: Option<CleaningConfig>,
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct CleaningConfig {
188 #[serde(default)]
190 pub remove_urls: bool,
191
192 #[serde(default)]
194 pub remove_emails: bool,
195
196 #[serde(default = "default_true")]
198 pub normalize_whitespace: bool,
199
200 #[serde(default)]
202 pub remove_special_chars: bool,
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct EntityExtractionConfig {
208 #[serde(default = "default_ner_model")]
210 pub model_name: String,
211
212 #[serde(default = "default_extraction_temperature")]
214 pub temperature: f32,
215
216 #[serde(default = "default_max_tokens")]
218 pub max_tokens: usize,
219
220 pub entity_types: Option<Vec<String>>,
222
223 #[serde(default = "default_confidence_threshold")]
225 pub confidence_threshold: f32,
226
227 pub custom_prompt: Option<String>,
229
230 #[serde(default)]
232 pub filters: Option<EntityFiltersConfig>,
233}
234
235#[derive(Debug, Clone, Serialize, Deserialize)]
237pub struct EntityFiltersConfig {
238 #[serde(default = "default_min_entity_length")]
240 pub min_entity_length: usize,
241
242 #[serde(default = "default_max_entity_length")]
244 pub max_entity_length: usize,
245
246 pub allowed_entity_types: Option<Vec<String>>,
248
249 #[serde(default = "default_confidence_threshold")]
251 pub confidence_threshold: f32,
252
253 pub allowed_patterns: Option<Vec<String>>,
255
256 pub excluded_patterns: Option<Vec<String>>,
258
259 #[serde(default)]
261 pub enable_fuzzy_matching: bool,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize)]
266pub struct GraphBuildingConfig {
267 #[serde(default = "default_relation_scorer")]
269 pub relation_scorer: String,
270
271 #[serde(default = "default_min_relation_score")]
273 pub min_relation_score: f32,
274
275 #[serde(default = "default_max_connections")]
277 pub max_connections_per_node: usize,
278
279 #[serde(default = "default_true")]
281 pub bidirectional_relations: bool,
282}
283
284#[derive(Debug, Clone, Serialize, Deserialize)]
286pub struct CommunityDetectionConfig {
287 #[serde(default = "default_community_algorithm")]
289 pub algorithm: String,
290
291 #[serde(default = "default_resolution")]
293 pub resolution: f32,
294
295 #[serde(default = "default_min_community_size")]
297 pub min_community_size: usize,
298
299 #[serde(default)]
301 pub max_community_size: usize,
302}
303
304#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct StorageConfig {
307 #[serde(default = "default_database_type")]
309 pub database_type: String,
310
311 #[serde(default = "default_database_path")]
313 pub database_path: String,
314
315 #[serde(default = "default_true")]
317 pub enable_wal: bool,
318
319 pub postgresql: Option<PostgreSQLConfig>,
321
322 pub neo4j: Option<Neo4jConfig>,
324}
325
326#[derive(Debug, Clone, Serialize, Deserialize)]
328pub struct PostgreSQLConfig {
329 pub host: String,
331 pub port: u16,
333 pub database: String,
335 pub username: String,
337 pub password: String,
339 #[serde(default = "default_pool_size")]
341 pub pool_size: usize,
342}
343
344#[derive(Debug, Clone, Serialize, Deserialize)]
346pub struct Neo4jConfig {
347 pub uri: String,
349 pub username: String,
351 pub password: String,
353 #[serde(default)]
355 pub encrypted: bool,
356}
357
358#[derive(Debug, Clone, Serialize, Deserialize)]
360pub struct ModelsConfig {
361 #[serde(default = "default_primary_llm")]
363 pub primary_llm: String,
364
365 #[serde(default = "default_embedding_model")]
367 pub embedding_model: String,
368
369 #[serde(default = "default_max_context")]
371 pub max_context_length: usize,
372
373 #[serde(default)]
375 pub llm_params: Option<LLMParamsConfig>,
376
377 #[serde(default)]
379 pub local: Option<LocalModelsConfig>,
380}
381
382#[derive(Debug, Clone, Serialize, Deserialize)]
384pub struct LLMParamsConfig {
385 #[serde(default = "default_temperature")]
387 pub temperature: f32,
388
389 #[serde(default = "default_top_p")]
391 pub top_p: f32,
392
393 #[serde(default)]
395 pub frequency_penalty: f32,
396
397 #[serde(default)]
399 pub presence_penalty: f32,
400
401 pub stop_sequences: Option<Vec<String>>,
403}
404
405#[derive(Debug, Clone, Serialize, Deserialize)]
407pub struct LocalModelsConfig {
408 #[serde(default = "default_ollama_url")]
410 pub ollama_base_url: String,
411
412 #[serde(default = "default_ollama_model")]
414 pub model_name: String,
415
416 #[serde(default = "default_ollama_embedding")]
418 pub embedding_model: String,
419}
420
421#[derive(Debug, Clone, Serialize, Deserialize)]
423pub struct PerformanceConfig {
424 #[serde(default = "default_true")]
426 pub batch_processing: bool,
427
428 #[serde(default = "default_batch_size")]
430 pub batch_size: usize,
431
432 #[serde(default = "default_worker_threads")]
434 pub worker_threads: usize,
435
436 #[serde(default = "default_memory_limit")]
438 pub memory_limit_mb: usize,
439}
440
441#[derive(Debug, Clone, Serialize, Deserialize)]
443pub struct OllamaSetConfig {
444 #[serde(default = "default_true")]
446 pub enabled: bool,
447
448 #[serde(default = "default_ollama_host")]
450 pub host: String,
451
452 #[serde(default = "default_ollama_port")]
454 pub port: u16,
455
456 #[serde(default = "default_chat_model")]
458 pub chat_model: String,
459
460 #[serde(default = "default_embedding_model_ollama")]
462 pub embedding_model: String,
463
464 #[serde(default = "default_timeout")]
466 pub timeout_seconds: u64,
467
468 #[serde(default = "default_max_retries")]
470 pub max_retries: u32,
471
472 #[serde(default)]
474 pub fallback_to_hash: bool,
475
476 pub max_tokens: Option<u32>,
478
479 pub temperature: Option<f32>,
481
482 pub keep_alive: Option<String>,
486
487 pub num_ctx: Option<u32>,
492}
493
494#[derive(Debug, Clone, Serialize, Deserialize)]
496pub struct GlinerSetConfig {
497 #[serde(default)]
499 pub enabled: bool,
500 #[serde(default)]
502 pub model_path: String,
503 #[serde(default)]
505 pub tokenizer_path: String,
506 #[serde(default = "default_gliner_mode")]
508 pub mode: String,
509 #[serde(default = "default_gliner_entity_labels")]
511 pub entity_labels: Vec<String>,
512 #[serde(default = "default_gliner_relation_labels")]
514 pub relation_labels: Vec<String>,
515 #[serde(default = "default_entity_threshold")]
517 pub entity_threshold: f32,
518 #[serde(default = "default_relation_threshold")]
520 pub relation_threshold: f32,
521 #[serde(default)]
523 pub use_gpu: bool,
524 #[serde(default)]
526 pub max_concurrent_chunks: Option<usize>,
527}
528
529fn default_gliner_mode() -> String {
530 "span".to_string()
531}
532fn default_gliner_entity_labels() -> Vec<String> {
535 vec![
536 "person".into(),
537 "organization".into(),
538 "location".into(),
539 "concept".into(),
540 ]
541}
542fn default_gliner_relation_labels() -> Vec<String> {
543 vec!["related to".into(), "part of".into(), "causes".into()]
544}
545fn default_entity_threshold() -> f32 {
546 0.4
547}
548fn default_relation_threshold() -> f32 {
549 0.5
550}
551
552impl Default for GlinerSetConfig {
553 fn default() -> Self {
554 Self {
555 enabled: false,
556 model_path: String::new(),
557 tokenizer_path: String::new(),
558 mode: default_gliner_mode(),
559 entity_labels: default_gliner_entity_labels(),
560 relation_labels: default_gliner_relation_labels(),
561 entity_threshold: default_entity_threshold(),
562 relation_threshold: default_relation_threshold(),
563 use_gpu: false,
564 max_concurrent_chunks: None,
565 }
566 }
567}
568
569#[derive(Debug, Clone, Serialize, Deserialize, Default)]
571pub struct ExperimentalConfig {
572 #[serde(default)]
574 pub neural_reranking: bool,
575
576 #[serde(default)]
578 pub federated_learning: bool,
579
580 #[serde(default)]
582 pub real_time_updates: bool,
583
584 #[serde(default)]
586 pub distributed_processing: bool,
587
588 #[serde(default)]
590 pub lazy_graphrag: bool,
591
592 #[serde(default)]
594 pub e2_graphrag: bool,
595
596 #[serde(default)]
598 pub lazy_graphrag_config: Option<LazyGraphRAGConfig>,
599
600 #[serde(default)]
602 pub e2_graphrag_config: Option<E2GraphRAGConfig>,
603}
604
605#[derive(Debug, Clone, Serialize, Deserialize)]
609pub struct LazyGraphRAGConfig {
610 #[serde(default = "default_true")]
612 pub use_concept_extraction: bool,
613
614 #[serde(default = "default_min_concept_length")]
616 pub min_concept_length: usize,
617
618 #[serde(default = "default_max_concept_words")]
620 pub max_concept_words: usize,
621
622 #[serde(default = "default_co_occurrence_threshold")]
624 pub co_occurrence_threshold: usize,
625
626 #[serde(default = "default_true")]
628 pub use_query_refinement: bool,
629
630 #[serde(default = "default_max_refinement_iterations")]
632 pub max_refinement_iterations: usize,
633
634 #[serde(default = "default_true")]
636 pub use_bidirectional_index: bool,
637}
638
639impl Default for LazyGraphRAGConfig {
640 fn default() -> Self {
641 Self {
642 use_concept_extraction: true,
643 min_concept_length: 3,
644 max_concept_words: 5,
645 co_occurrence_threshold: 1,
646 use_query_refinement: true,
647 max_refinement_iterations: 3,
648 use_bidirectional_index: true,
649 }
650 }
651}
652
653#[derive(Debug, Clone, Serialize, Deserialize)]
657pub struct E2GraphRAGConfig {
658 #[serde(default = "default_true")]
660 pub use_lightweight_ner: bool,
661
662 #[serde(default = "default_e2_entity_types")]
664 pub entity_types: Vec<String>,
665
666 #[serde(default = "default_e2_min_confidence")]
668 pub min_confidence: f32,
669
670 #[serde(default = "default_true")]
672 pub use_capitalization_detection: bool,
673
674 #[serde(default = "default_true")]
676 pub use_noun_phrase_extraction: bool,
677
678 #[serde(default = "default_min_entity_frequency")]
680 pub min_entity_frequency: usize,
681
682 #[serde(default = "default_true")]
684 pub use_fast_cooccurrence: bool,
685
686 #[serde(default = "default_true")]
688 pub use_bidirectional_index: bool,
689}
690
691impl Default for E2GraphRAGConfig {
692 fn default() -> Self {
693 Self {
694 use_lightweight_ner: true,
695 entity_types: default_e2_entity_types(),
696 min_confidence: 0.6,
697 use_capitalization_detection: true,
698 use_noun_phrase_extraction: true,
699 min_entity_frequency: 1,
700 use_fast_cooccurrence: true,
701 use_bidirectional_index: true,
702 }
703 }
704}
705
706#[derive(Debug, Clone, Serialize, Deserialize)]
713pub struct ModeConfig {
714 #[serde(default = "default_approach")]
719 pub approach: String,
720}
721
722impl Default for ModeConfig {
723 fn default() -> Self {
724 Self {
725 approach: default_approach(),
726 }
727 }
728}
729
730#[derive(Debug, Clone, Serialize, Deserialize)]
733pub struct SemanticPipelineConfig {
734 #[serde(default)]
736 pub enabled: bool,
737
738 pub embeddings: SemanticEmbeddingsConfig,
740
741 pub entity_extraction: SemanticEntityConfig,
743
744 pub retrieval: SemanticRetrievalConfig,
746
747 pub graph_construction: SemanticGraphConfig,
749}
750
751#[derive(Debug, Clone, Serialize, Deserialize)]
753pub struct SemanticEmbeddingsConfig {
754 #[serde(default = "default_semantic_embedding_backend")]
756 pub backend: String,
757
758 #[serde(default = "default_semantic_embedding_model")]
760 pub model: String,
761
762 #[serde(default = "default_semantic_embedding_dim")]
764 pub dimension: usize,
765
766 #[serde(default = "default_true")]
768 pub use_gpu: bool,
769
770 #[serde(default = "default_similarity_metric")]
772 pub similarity_metric: String,
773
774 #[serde(default = "default_batch_size")]
776 pub batch_size: usize,
777}
778
779#[derive(Debug, Clone, Serialize, Deserialize)]
781pub struct SemanticEntityConfig {
782 #[serde(default = "default_semantic_entity_method")]
784 pub method: String,
785
786 #[serde(default = "default_true")]
788 pub use_gleaning: bool,
789
790 #[serde(default = "default_max_gleaning_rounds")]
792 pub max_gleaning_rounds: usize,
793
794 #[serde(default = "default_chat_model")]
796 pub model: String,
797
798 #[serde(default = "default_semantic_temperature")]
800 pub temperature: f32,
801
802 #[serde(default = "default_semantic_confidence")]
804 pub confidence_threshold: f32,
805}
806
807#[derive(Debug, Clone, Serialize, Deserialize)]
809pub struct SemanticRetrievalConfig {
810 #[serde(default = "default_semantic_retrieval_strategy")]
812 pub strategy: String,
813
814 #[serde(default = "default_true")]
816 pub use_hnsw: bool,
817
818 #[serde(default = "default_hnsw_ef_construction")]
820 pub hnsw_ef_construction: usize,
821
822 #[serde(default = "default_hnsw_m")]
824 pub hnsw_m: usize,
825
826 #[serde(default = "default_top_k")]
828 pub top_k: usize,
829
830 #[serde(default = "default_semantic_similarity_threshold")]
832 pub similarity_threshold: f32,
833}
834
835#[derive(Debug, Clone, Serialize, Deserialize)]
837pub struct SemanticGraphConfig {
838 #[serde(default = "default_semantic_relation_scorer")]
840 pub relation_scorer: String,
841
842 #[serde(default = "default_true")]
844 pub use_transformer_embeddings: bool,
845
846 #[serde(default = "default_min_relation_score")]
848 pub min_relation_score: f32,
849}
850
851#[derive(Debug, Clone, Serialize, Deserialize, Default)]
854pub struct AlgorithmicPipelineConfig {
855 #[serde(default)]
857 pub enabled: bool,
858
859 pub embeddings: AlgorithmicEmbeddingsConfig,
861
862 pub entity_extraction: AlgorithmicEntityConfig,
864
865 pub retrieval: AlgorithmicRetrievalConfig,
867
868 pub graph_construction: AlgorithmicGraphConfig,
870}
871
872#[derive(Debug, Clone, Serialize, Deserialize)]
874pub struct AlgorithmicEmbeddingsConfig {
875 #[serde(default = "default_algorithmic_embedding_backend")]
877 pub backend: String,
878
879 #[serde(default = "default_algorithmic_embedding_dim")]
881 pub dimension: usize,
882
883 #[serde(default = "default_true")]
885 pub use_tfidf: bool,
886
887 #[serde(default = "default_vocabulary_size")]
889 pub vocabulary_size: usize,
890
891 #[serde(default = "default_min_term_frequency")]
893 pub min_term_frequency: usize,
894
895 #[serde(default = "default_max_document_frequency")]
897 pub max_document_frequency: f32,
898}
899
900#[derive(Debug, Clone, Serialize, Deserialize)]
902pub struct AlgorithmicEntityConfig {
903 #[serde(default = "default_algorithmic_entity_method")]
905 pub method: String,
906
907 #[serde(default = "default_true")]
909 pub use_ner_rules: bool,
910
911 #[serde(default)]
913 pub use_pos_tagging: bool,
914
915 #[serde(default = "default_min_entity_length")]
917 pub min_entity_length: usize,
918
919 #[serde(default = "default_algorithmic_confidence")]
921 pub confidence_threshold: f32,
922
923 pub patterns: Option<Vec<String>>,
925}
926
927#[derive(Debug, Clone, Serialize, Deserialize)]
929pub struct AlgorithmicRetrievalConfig {
930 #[serde(default = "default_algorithmic_retrieval_strategy")]
932 pub strategy: String,
933
934 #[serde(default = "default_bm25_k1")]
936 pub k1: f32,
937
938 #[serde(default = "default_bm25_b")]
940 pub b: f32,
941
942 #[serde(default = "default_true")]
944 pub use_stemming: bool,
945
946 #[serde(default = "default_language")]
948 pub language: String,
949
950 #[serde(default = "default_top_k")]
952 pub top_k: usize,
953}
954
955#[derive(Debug, Clone, Serialize, Deserialize)]
957pub struct AlgorithmicGraphConfig {
958 #[serde(default = "default_algorithmic_relation_scorer")]
960 pub relation_scorer: String,
961
962 #[serde(default = "default_true")]
964 pub use_cooccurrence: bool,
965
966 #[serde(default = "default_cooccurrence_window")]
968 pub window_size: usize,
969
970 #[serde(default = "default_algorithmic_min_relation_score")]
972 pub min_relation_score: f32,
973}
974
975#[derive(Debug, Clone, Serialize, Deserialize)]
978pub struct HybridPipelineConfig {
979 #[serde(default)]
981 pub enabled: bool,
982
983 pub weights: HybridWeightsConfig,
985
986 pub embeddings: HybridEmbeddingsConfig,
988
989 pub entity_extraction: HybridEntityConfig,
991
992 pub retrieval: HybridRetrievalConfig,
994
995 pub graph_construction: HybridGraphConfig,
997
998 #[serde(default = "default_hybrid_fallback_strategy")]
1000 pub fallback_strategy: String,
1001
1002 #[serde(default = "default_true")]
1004 pub cross_validation: bool,
1005}
1006
1007#[derive(Debug, Clone, Serialize, Deserialize)]
1009pub struct HybridWeightsConfig {
1010 #[serde(default = "default_hybrid_semantic_weight")]
1012 pub semantic_weight: f32,
1013
1014 #[serde(default = "default_hybrid_algorithmic_weight")]
1016 pub algorithmic_weight: f32,
1017}
1018
1019#[derive(Debug, Clone, Serialize, Deserialize)]
1021pub struct HybridEmbeddingsConfig {
1022 #[serde(default = "default_semantic_embedding_backend")]
1024 pub primary: String,
1025
1026 #[serde(default = "default_algorithmic_embedding_backend")]
1028 pub fallback: String,
1029
1030 #[serde(default = "default_true")]
1032 pub combine_scores: bool,
1033
1034 #[serde(default = "default_true")]
1036 pub auto_fallback: bool,
1037}
1038
1039#[derive(Debug, Clone, Serialize, Deserialize)]
1041pub struct HybridEntityConfig {
1042 #[serde(default = "default_true")]
1044 pub use_both: bool,
1045
1046 #[serde(default = "default_hybrid_llm_weight")]
1048 pub llm_weight: f32,
1049
1050 #[serde(default = "default_hybrid_pattern_weight")]
1052 pub pattern_weight: f32,
1053
1054 #[serde(default = "default_true")]
1056 pub cross_validate: bool,
1057
1058 #[serde(default = "default_hybrid_confidence_boost")]
1060 pub confidence_boost: f32,
1061}
1062
1063#[derive(Debug, Clone, Serialize, Deserialize)]
1065pub struct HybridRetrievalConfig {
1066 #[serde(default = "default_hybrid_retrieval_strategy")]
1068 pub strategy: String,
1069
1070 #[serde(default = "default_true")]
1072 pub combine_vector_bm25: bool,
1073
1074 #[serde(default = "default_hybrid_vector_weight")]
1076 pub vector_weight: f32,
1077
1078 #[serde(default = "default_hybrid_bm25_weight")]
1080 pub bm25_weight: f32,
1081
1082 #[serde(default = "default_rrf_constant")]
1084 pub rrf_constant: usize,
1085}
1086
1087#[derive(Debug, Clone, Serialize, Deserialize)]
1089pub struct HybridGraphConfig {
1090 #[serde(default = "default_semantic_relation_scorer")]
1092 pub primary_scorer: String,
1093
1094 #[serde(default = "default_algorithmic_relation_scorer")]
1096 pub fallback_scorer: String,
1097
1098 #[serde(default = "default_true")]
1100 pub combine_scores: bool,
1101}
1102
1103#[derive(Debug, Clone, Serialize, Deserialize)]
1105pub struct EntityExtractionTopLevelConfig {
1106 #[serde(default = "default_true")]
1108 pub enabled: bool,
1109
1110 #[serde(default = "default_confidence_threshold")]
1112 pub min_confidence: f32,
1113
1114 #[serde(default)]
1116 pub use_gleaning: bool,
1117
1118 #[serde(default = "default_gleaning_rounds")]
1120 pub max_gleaning_rounds: usize,
1121
1122 #[serde(default = "default_gleaning_improvement")]
1124 pub gleaning_improvement_threshold: f32,
1125
1126 #[serde(default)]
1128 pub semantic_merging: bool,
1129
1130 #[serde(default = "default_merge_threshold")]
1132 pub merge_similarity_threshold: f32,
1133
1134 #[serde(default)]
1136 pub automatic_linking: bool,
1137
1138 #[serde(default = "default_confidence_threshold")]
1140 pub linking_confidence_threshold: f32,
1141}
1142
1143impl Default for EntityExtractionTopLevelConfig {
1144 fn default() -> Self {
1145 Self {
1146 enabled: true,
1147 min_confidence: default_confidence_threshold(),
1148 use_gleaning: false,
1149 max_gleaning_rounds: default_gleaning_rounds(),
1150 gleaning_improvement_threshold: default_gleaning_improvement(),
1151 semantic_merging: false,
1152 merge_similarity_threshold: default_merge_threshold(),
1153 automatic_linking: false,
1154 linking_confidence_threshold: default_confidence_threshold(),
1155 }
1156 }
1157}
1158
1159fn default_log_level() -> String {
1161 "info".to_string()
1162}
1163fn default_output_dir() -> String {
1164 "./output".to_string()
1165}
1166fn default_true() -> bool {
1167 true
1168}
1169fn default_workflows() -> Vec<String> {
1170 vec![
1171 "extract_text".to_string(),
1172 "extract_entities".to_string(),
1173 "build_graph".to_string(),
1174 "detect_communities".to_string(),
1175 ]
1176}
1177fn default_chunk_size() -> usize {
1178 512
1179}
1180fn default_chunk_overlap() -> usize {
1181 64
1182}
1183fn default_min_chunk_size() -> usize {
1184 50
1185}
1186fn default_ner_model() -> String {
1187 "microsoft/DialoGPT-medium".to_string()
1188}
1189fn default_temperature() -> f32 {
1190 0.1
1191}
1192fn default_extraction_temperature() -> f32 {
1193 0.0
1194}
1195fn default_max_tokens() -> usize {
1196 2048
1197}
1198fn default_min_entity_length() -> usize {
1199 3
1200}
1201fn default_max_entity_length() -> usize {
1202 100
1203}
1204fn default_confidence_threshold() -> f32 {
1205 0.8
1206}
1207fn default_relation_scorer() -> String {
1208 "cosine_similarity".to_string()
1209}
1210fn default_min_relation_score() -> f32 {
1211 0.7
1212}
1213fn default_max_connections() -> usize {
1214 10
1215}
1216fn default_community_algorithm() -> String {
1217 "leiden".to_string()
1218}
1219fn default_resolution() -> f32 {
1220 1.0
1221}
1222fn default_min_community_size() -> usize {
1223 3
1224}
1225fn default_database_type() -> String {
1226 "sqlite".to_string()
1227}
1228fn default_database_path() -> String {
1229 "./graphrag.db".to_string()
1230}
1231fn default_pool_size() -> usize {
1232 10
1233}
1234fn default_primary_llm() -> String {
1235 "gpt-4".to_string()
1236}
1237fn default_embedding_model() -> String {
1238 "text-embedding-ada-002".to_string()
1239}
1240fn default_max_context() -> usize {
1241 4096
1242}
1243fn default_top_p() -> f32 {
1244 0.9
1245}
1246fn default_ollama_url() -> String {
1247 "http://localhost:11434".to_string()
1248}
1249fn default_ollama_model() -> String {
1250 "llama2:7b".to_string()
1251}
1252fn default_ollama_embedding() -> String {
1253 "nomic-embed-text".to_string()
1254}
1255fn default_batch_size() -> usize {
1256 100
1257}
1258fn default_worker_threads() -> usize {
1259 4
1260}
1261fn default_memory_limit() -> usize {
1262 1024
1263}
1264fn default_ollama_host() -> String {
1265 "http://localhost".to_string()
1266}
1267fn default_ollama_port() -> u16 {
1268 11434
1269}
1270fn default_chat_model() -> String {
1271 "llama3.1:8b".to_string()
1272}
1273fn default_embedding_model_ollama() -> String {
1274 "nomic-embed-text".to_string()
1275}
1276fn default_timeout() -> u64 {
1277 60
1278}
1279fn default_max_retries() -> u32 {
1280 3
1281}
1282fn default_gleaning_rounds() -> usize {
1283 3
1284}
1285fn default_gleaning_improvement() -> f32 {
1286 0.1
1287}
1288fn default_merge_threshold() -> f32 {
1289 0.85
1290}
1291
1292fn default_approach() -> String {
1298 "semantic".to_string() }
1300
1301fn default_semantic_embedding_backend() -> String {
1303 "huggingface".to_string()
1304}
1305fn default_semantic_embedding_model() -> String {
1306 "sentence-transformers/all-MiniLM-L6-v2".to_string()
1307}
1308fn default_semantic_embedding_dim() -> usize {
1309 384 }
1311fn default_similarity_metric() -> String {
1312 "cosine".to_string()
1313}
1314fn default_semantic_entity_method() -> String {
1315 "llm".to_string()
1316}
1317fn default_max_gleaning_rounds() -> usize {
1318 3
1319}
1320fn default_semantic_temperature() -> f32 {
1321 0.1
1322}
1323fn default_semantic_confidence() -> f32 {
1324 0.7
1325}
1326fn default_semantic_retrieval_strategy() -> String {
1327 "vector".to_string()
1328}
1329fn default_hnsw_ef_construction() -> usize {
1330 200
1331}
1332fn default_hnsw_m() -> usize {
1333 16
1334}
1335fn default_top_k() -> usize {
1336 10
1337}
1338fn default_semantic_similarity_threshold() -> f32 {
1339 0.7
1340}
1341fn default_semantic_relation_scorer() -> String {
1342 "embedding_similarity".to_string()
1343}
1344
1345fn default_algorithmic_embedding_backend() -> String {
1347 "hash".to_string()
1348}
1349fn default_algorithmic_embedding_dim() -> usize {
1350 128
1351}
1352fn default_vocabulary_size() -> usize {
1353 10000
1354}
1355fn default_min_term_frequency() -> usize {
1356 2
1357}
1358fn default_max_document_frequency() -> f32 {
1359 0.8
1360}
1361fn default_algorithmic_entity_method() -> String {
1362 "pattern".to_string()
1363}
1364fn default_algorithmic_confidence() -> f32 {
1365 0.75
1366}
1367fn default_algorithmic_retrieval_strategy() -> String {
1368 "bm25".to_string()
1369}
1370fn default_bm25_k1() -> f32 {
1371 1.5
1372}
1373fn default_bm25_b() -> f32 {
1374 0.75
1375}
1376fn default_language() -> String {
1377 "english".to_string()
1378}
1379fn default_algorithmic_relation_scorer() -> String {
1380 "jaccard".to_string()
1381}
1382fn default_cooccurrence_window() -> usize {
1383 10
1384}
1385fn default_algorithmic_min_relation_score() -> f32 {
1386 0.6
1387}
1388
1389fn default_hybrid_semantic_weight() -> f32 {
1391 0.6
1392}
1393fn default_hybrid_algorithmic_weight() -> f32 {
1394 0.4
1395}
1396fn default_hybrid_llm_weight() -> f32 {
1397 0.7
1398}
1399fn default_hybrid_pattern_weight() -> f32 {
1400 0.3
1401}
1402fn default_hybrid_confidence_boost() -> f32 {
1403 0.15
1404}
1405fn default_hybrid_retrieval_strategy() -> String {
1406 "fusion".to_string()
1407}
1408fn default_hybrid_vector_weight() -> f32 {
1409 0.6
1410}
1411fn default_hybrid_bm25_weight() -> f32 {
1412 0.4
1413}
1414fn default_rrf_constant() -> usize {
1415 60
1416}
1417fn default_hybrid_fallback_strategy() -> String {
1418 "semantic_first".to_string()
1419}
1420fn default_auto_save_interval() -> u64 {
1421 300 }
1423fn default_max_auto_save_versions() -> usize {
1424 5 }
1426
1427fn default_min_concept_length() -> usize {
1429 3 }
1431fn default_max_concept_words() -> usize {
1432 5 }
1434fn default_co_occurrence_threshold() -> usize {
1435 1 }
1437fn default_max_refinement_iterations() -> usize {
1438 3 }
1440
1441fn default_e2_entity_types() -> Vec<String> {
1443 vec![
1444 "PERSON".to_string(),
1445 "ORGANIZATION".to_string(),
1446 "LOCATION".to_string(),
1447 "CONCEPT".to_string(),
1448 ]
1449}
1450fn default_e2_min_confidence() -> f32 {
1451 0.6 }
1453fn default_min_entity_frequency() -> usize {
1454 1 }
1456
1457impl Default for GeneralConfig {
1458 fn default() -> Self {
1459 Self {
1460 log_level: default_log_level(),
1461 output_dir: default_output_dir(),
1462 input_document_path: None,
1463 max_threads: None,
1464 enable_profiling: false,
1465 }
1466 }
1467}
1468
1469impl Default for PipelineConfig {
1470 fn default() -> Self {
1471 Self {
1472 workflows: default_workflows(),
1473 parallel_execution: default_true(),
1474 text_extraction: TextExtractionConfig::default(),
1475 entity_extraction: EntityExtractionConfig::default(),
1476 graph_building: GraphBuildingConfig::default(),
1477 community_detection: CommunityDetectionConfig::default(),
1478 }
1479 }
1480}
1481
1482impl Default for TextExtractionConfig {
1483 fn default() -> Self {
1484 Self {
1485 chunk_size: default_chunk_size(),
1486 chunk_overlap: default_chunk_overlap(),
1487 clean_control_chars: default_true(),
1488 min_chunk_size: default_min_chunk_size(),
1489 cleaning: None,
1490 }
1491 }
1492}
1493
1494impl Default for EntityExtractionConfig {
1495 fn default() -> Self {
1496 Self {
1497 model_name: default_ner_model(),
1498 temperature: default_temperature(),
1499 max_tokens: default_max_tokens(),
1500 entity_types: None,
1501 confidence_threshold: default_confidence_threshold(),
1502 custom_prompt: None,
1503 filters: None,
1504 }
1505 }
1506}
1507
1508impl Default for GraphBuildingConfig {
1509 fn default() -> Self {
1510 Self {
1511 relation_scorer: default_relation_scorer(),
1512 min_relation_score: default_min_relation_score(),
1513 max_connections_per_node: default_max_connections(),
1514 bidirectional_relations: default_true(),
1515 }
1516 }
1517}
1518
1519impl Default for CommunityDetectionConfig {
1520 fn default() -> Self {
1521 Self {
1522 algorithm: default_community_algorithm(),
1523 resolution: default_resolution(),
1524 min_community_size: default_min_community_size(),
1525 max_community_size: 0,
1526 }
1527 }
1528}
1529
1530impl Default for StorageConfig {
1531 fn default() -> Self {
1532 Self {
1533 database_type: default_database_type(),
1534 database_path: default_database_path(),
1535 enable_wal: default_true(),
1536 postgresql: None,
1537 neo4j: None,
1538 }
1539 }
1540}
1541
1542impl Default for ModelsConfig {
1543 fn default() -> Self {
1544 Self {
1545 primary_llm: default_primary_llm(),
1546 embedding_model: default_embedding_model(),
1547 max_context_length: default_max_context(),
1548 llm_params: None,
1549 local: None,
1550 }
1551 }
1552}
1553
1554impl Default for PerformanceConfig {
1555 fn default() -> Self {
1556 Self {
1557 batch_processing: default_true(),
1558 batch_size: default_batch_size(),
1559 worker_threads: default_worker_threads(),
1560 memory_limit_mb: default_memory_limit(),
1561 }
1562 }
1563}
1564
1565impl Default for OllamaSetConfig {
1566 fn default() -> Self {
1567 Self {
1568 enabled: default_true(),
1569 host: default_ollama_host(),
1570 port: default_ollama_port(),
1571 chat_model: default_chat_model(),
1572 embedding_model: default_embedding_model_ollama(),
1573 timeout_seconds: default_timeout(),
1574 max_retries: default_max_retries(),
1575 fallback_to_hash: false,
1576 max_tokens: Some(800),
1577 temperature: Some(0.3),
1578 keep_alive: None,
1579 num_ctx: None,
1580 }
1581 }
1582}
1583
1584impl Default for SemanticPipelineConfig {
1589 fn default() -> Self {
1590 Self {
1591 enabled: true,
1592 embeddings: SemanticEmbeddingsConfig::default(),
1593 entity_extraction: SemanticEntityConfig::default(),
1594 retrieval: SemanticRetrievalConfig::default(),
1595 graph_construction: SemanticGraphConfig::default(),
1596 }
1597 }
1598}
1599
1600impl Default for SemanticEmbeddingsConfig {
1601 fn default() -> Self {
1602 Self {
1603 backend: default_semantic_embedding_backend(),
1604 model: default_semantic_embedding_model(),
1605 dimension: default_semantic_embedding_dim(),
1606 use_gpu: default_true(),
1607 similarity_metric: default_similarity_metric(),
1608 batch_size: default_batch_size(),
1609 }
1610 }
1611}
1612
1613impl Default for SemanticEntityConfig {
1614 fn default() -> Self {
1615 Self {
1616 method: default_semantic_entity_method(),
1617 use_gleaning: default_true(),
1618 max_gleaning_rounds: default_max_gleaning_rounds(),
1619 model: default_chat_model(),
1620 temperature: default_semantic_temperature(),
1621 confidence_threshold: default_semantic_confidence(),
1622 }
1623 }
1624}
1625
1626impl Default for SemanticRetrievalConfig {
1627 fn default() -> Self {
1628 Self {
1629 strategy: default_semantic_retrieval_strategy(),
1630 use_hnsw: default_true(),
1631 hnsw_ef_construction: default_hnsw_ef_construction(),
1632 hnsw_m: default_hnsw_m(),
1633 top_k: default_top_k(),
1634 similarity_threshold: default_semantic_similarity_threshold(),
1635 }
1636 }
1637}
1638
1639impl Default for SemanticGraphConfig {
1640 fn default() -> Self {
1641 Self {
1642 relation_scorer: default_semantic_relation_scorer(),
1643 use_transformer_embeddings: default_true(),
1644 min_relation_score: default_min_relation_score(),
1645 }
1646 }
1647}
1648
1649impl Default for AlgorithmicEmbeddingsConfig {
1650 fn default() -> Self {
1651 Self {
1652 backend: default_algorithmic_embedding_backend(),
1653 dimension: default_algorithmic_embedding_dim(),
1654 use_tfidf: default_true(),
1655 vocabulary_size: default_vocabulary_size(),
1656 min_term_frequency: default_min_term_frequency(),
1657 max_document_frequency: default_max_document_frequency(),
1658 }
1659 }
1660}
1661
1662impl Default for AlgorithmicEntityConfig {
1663 fn default() -> Self {
1664 Self {
1665 method: default_algorithmic_entity_method(),
1666 use_ner_rules: default_true(),
1667 use_pos_tagging: false,
1668 min_entity_length: default_min_entity_length(),
1669 confidence_threshold: default_algorithmic_confidence(),
1670 patterns: None,
1671 }
1672 }
1673}
1674
1675impl Default for AlgorithmicRetrievalConfig {
1676 fn default() -> Self {
1677 Self {
1678 strategy: default_algorithmic_retrieval_strategy(),
1679 k1: default_bm25_k1(),
1680 b: default_bm25_b(),
1681 use_stemming: default_true(),
1682 language: default_language(),
1683 top_k: default_top_k(),
1684 }
1685 }
1686}
1687
1688impl Default for AlgorithmicGraphConfig {
1689 fn default() -> Self {
1690 Self {
1691 relation_scorer: default_algorithmic_relation_scorer(),
1692 use_cooccurrence: default_true(),
1693 window_size: default_cooccurrence_window(),
1694 min_relation_score: default_algorithmic_min_relation_score(),
1695 }
1696 }
1697}
1698
1699impl Default for HybridPipelineConfig {
1700 fn default() -> Self {
1701 Self {
1702 enabled: false,
1703 weights: HybridWeightsConfig::default(),
1704 embeddings: HybridEmbeddingsConfig::default(),
1705 entity_extraction: HybridEntityConfig::default(),
1706 retrieval: HybridRetrievalConfig::default(),
1707 graph_construction: HybridGraphConfig::default(),
1708 fallback_strategy: default_hybrid_fallback_strategy(),
1709 cross_validation: default_true(),
1710 }
1711 }
1712}
1713
1714impl Default for HybridWeightsConfig {
1715 fn default() -> Self {
1716 Self {
1717 semantic_weight: default_hybrid_semantic_weight(),
1718 algorithmic_weight: default_hybrid_algorithmic_weight(),
1719 }
1720 }
1721}
1722
1723impl Default for HybridEmbeddingsConfig {
1724 fn default() -> Self {
1725 Self {
1726 primary: default_semantic_embedding_backend(),
1727 fallback: default_algorithmic_embedding_backend(),
1728 combine_scores: default_true(),
1729 auto_fallback: default_true(),
1730 }
1731 }
1732}
1733
1734impl Default for HybridEntityConfig {
1735 fn default() -> Self {
1736 Self {
1737 use_both: default_true(),
1738 llm_weight: default_hybrid_llm_weight(),
1739 pattern_weight: default_hybrid_pattern_weight(),
1740 cross_validate: default_true(),
1741 confidence_boost: default_hybrid_confidence_boost(),
1742 }
1743 }
1744}
1745
1746impl Default for HybridRetrievalConfig {
1747 fn default() -> Self {
1748 Self {
1749 strategy: default_hybrid_retrieval_strategy(),
1750 combine_vector_bm25: default_true(),
1751 vector_weight: default_hybrid_vector_weight(),
1752 bm25_weight: default_hybrid_bm25_weight(),
1753 rrf_constant: default_rrf_constant(),
1754 }
1755 }
1756}
1757
1758impl Default for HybridGraphConfig {
1759 fn default() -> Self {
1760 Self {
1761 primary_scorer: default_semantic_relation_scorer(),
1762 fallback_scorer: default_algorithmic_relation_scorer(),
1763 combine_scores: default_true(),
1764 }
1765 }
1766}
1767
1768impl SetConfig {
1769 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
1771 let path_ref = path.as_ref();
1772 let content = fs::read_to_string(path_ref)?;
1773
1774 let extension = path_ref.extension().and_then(|e| e.to_str()).unwrap_or("");
1776
1777 let config: SetConfig = match extension {
1778 #[cfg(feature = "json5-support")]
1779 "json5" | "json" => {
1780 json5::from_str(&content).map_err(|e| crate::core::GraphRAGError::Config {
1781 message: format!("JSON5 parse error: {e}"),
1782 })?
1783 },
1784 #[cfg(not(feature = "json5-support"))]
1785 "json5" | "json" => {
1786 return Err(crate::core::GraphRAGError::Config {
1787 message: "JSON5 support not enabled. Rebuild with --features json5-support"
1788 .to_string(),
1789 });
1790 },
1791 _ => toml::from_str(&content).map_err(|e| crate::core::GraphRAGError::Config {
1792 message: format!("TOML parse error: {e}"),
1793 })?,
1794 };
1795
1796 Ok(config)
1797 }
1798
1799 pub fn save_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
1801 let toml_string =
1802 toml::to_string_pretty(&self).map_err(|e| crate::core::GraphRAGError::Config {
1803 message: format!("TOML serialize error: {e}"),
1804 })?;
1805
1806 let commented_toml = format!(
1808 "# =============================================================================\n\
1809 # GraphRAG Configuration File\n\
1810 # Complete configuration with extensive parameters for easy customization\n\
1811 # =============================================================================\n\n{toml_string}"
1812 );
1813
1814 fs::write(path, commented_toml)?;
1815 Ok(())
1816 }
1817
1818 pub fn to_graphrag_config(&self) -> crate::Config {
1820 let mut config = crate::Config {
1821 approach: self.mode.approach.clone(),
1822 ..Default::default()
1823 };
1824
1825 config.text.chunk_size = self.pipeline.text_extraction.chunk_size;
1827 config.text.chunk_overlap = self.pipeline.text_extraction.chunk_overlap;
1828
1829 config.entities.min_confidence = self.entity_extraction.min_confidence;
1831
1832 if let Some(ref types) = self.pipeline.entity_extraction.entity_types {
1834 config.entities.entity_types = types.clone();
1835 }
1836
1837 match self.mode.approach.as_str() {
1842 "semantic" => {
1843 if let Some(ref semantic) = self.semantic {
1844 config.entities.use_gleaning = semantic.entity_extraction.use_gleaning;
1845 config.entities.max_gleaning_rounds =
1846 semantic.entity_extraction.max_gleaning_rounds;
1847 config.entities.min_confidence =
1848 semantic.entity_extraction.confidence_threshold;
1849 } else {
1850 config.entities.use_gleaning = self.entity_extraction.use_gleaning;
1852 config.entities.max_gleaning_rounds =
1853 self.entity_extraction.max_gleaning_rounds;
1854 config.entities.min_confidence = self.entity_extraction.min_confidence;
1855 }
1856 },
1857 "algorithmic" => {
1858 config.entities.use_gleaning = false;
1860 if let Some(ref algorithmic) = self.algorithmic {
1861 config.entities.min_confidence =
1862 algorithmic.entity_extraction.confidence_threshold;
1863 }
1864 },
1865 "hybrid" => {
1866 config.entities.use_gleaning = true;
1868 if self.hybrid.is_some() {
1869 config.entities.max_gleaning_rounds = 2; }
1872 },
1873 _ => {
1874 config.entities.use_gleaning = self.entity_extraction.use_gleaning;
1876 config.entities.max_gleaning_rounds = self.entity_extraction.max_gleaning_rounds;
1877 },
1878 }
1879
1880 config.graph.similarity_threshold = self.pipeline.graph_building.min_relation_score;
1882 config.graph.max_connections = self.pipeline.graph_building.max_connections_per_node;
1883 config.graph.extract_relationships = true; config.graph.relationship_confidence_threshold = 0.5; config.retrieval.top_k = 10; config.embeddings.dimension = 768; config.embeddings.backend = "ollama".to_string();
1892 config.embeddings.fallback_to_hash = self.ollama.fallback_to_hash;
1893
1894 config.parallel.enabled = self.pipeline.parallel_execution;
1896 config.parallel.num_threads = self.performance.worker_threads;
1897
1898 config.ollama = crate::ollama::OllamaConfig {
1900 enabled: self.ollama.enabled,
1901 host: self.ollama.host.clone(),
1902 port: self.ollama.port,
1903 chat_model: self.ollama.chat_model.clone(),
1904 embedding_model: self.ollama.embedding_model.clone(),
1905 timeout_seconds: self.ollama.timeout_seconds,
1906 max_retries: self.ollama.max_retries,
1907 fallback_to_hash: self.ollama.fallback_to_hash,
1908 max_tokens: self.ollama.max_tokens,
1909 temperature: self.ollama.temperature,
1910 enable_caching: true,
1911 keep_alive: self.ollama.keep_alive.clone(),
1912 num_ctx: self.ollama.num_ctx,
1913 };
1914
1915 config.gliner = crate::config::GlinerConfig {
1917 enabled: self.gliner.enabled,
1918 model_path: self.gliner.model_path.clone(),
1919 tokenizer_path: self.gliner.tokenizer_path.clone(),
1920 mode: self.gliner.mode.clone(),
1921 entity_labels: self.gliner.entity_labels.clone(),
1922 relation_labels: self.gliner.relation_labels.clone(),
1923 entity_threshold: self.gliner.entity_threshold,
1924 relation_threshold: self.gliner.relation_threshold,
1925 use_gpu: self.gliner.use_gpu,
1926 max_concurrent_chunks: self.gliner.max_concurrent_chunks,
1927 };
1928
1929 config.auto_save = crate::config::AutoSaveConfig {
1931 enabled: self.auto_save.enabled,
1932 base_dir: self.auto_save.base_dir.clone(),
1933 interval_seconds: self.auto_save.interval_seconds,
1934 workspace_name: self.auto_save.workspace_name.clone(),
1935 max_versions: self.auto_save.max_versions,
1936 };
1937
1938 config
1939 }
1940}
1941
1942#[cfg(test)]
1943mod drift_guard_tests {
1944 use super::*;
1956 use crate::config::{AutoSaveConfig, GlinerConfig};
1957
1958 #[test]
1959 fn gliner_setconfig_default_matches_runtime() {
1960 let set = GlinerSetConfig::default();
1961 let runtime = GlinerConfig::default();
1962 assert_eq!(set.mode, runtime.mode, "gliner.mode drifted");
1963 assert_eq!(
1964 set.entity_labels, runtime.entity_labels,
1965 "gliner.entity_labels drifted"
1966 );
1967 assert_eq!(
1968 set.relation_labels, runtime.relation_labels,
1969 "gliner.relation_labels drifted"
1970 );
1971 assert_eq!(
1972 set.entity_threshold, runtime.entity_threshold,
1973 "gliner.entity_threshold drifted"
1974 );
1975 assert_eq!(
1976 set.relation_threshold, runtime.relation_threshold,
1977 "gliner.relation_threshold drifted"
1978 );
1979 assert_eq!(set.use_gpu, runtime.use_gpu, "gliner.use_gpu drifted");
1980 }
1981
1982 #[test]
1983 fn autosave_setconfig_default_matches_runtime() {
1984 let set = AutoSaveSetConfig::default();
1985 let runtime = AutoSaveConfig::default();
1986 assert_eq!(set.enabled, runtime.enabled, "auto_save.enabled drifted");
1987 assert_eq!(
1988 set.interval_seconds, runtime.interval_seconds,
1989 "auto_save.interval_seconds drifted"
1990 );
1991 assert_eq!(
1992 set.max_versions, runtime.max_versions,
1993 "auto_save.max_versions drifted"
1994 );
1995 }
1996}