1#![allow(
2 clippy::expect_used,
3 reason = "RwLock/Mutex expect calls — lock poison is unrecoverable"
4)]
5use std::collections::HashMap;
8use std::sync::atomic::{AtomicU64, Ordering};
9use std::sync::{Arc, RwLock, RwLockReadGuard};
10
11use serde::{Deserialize, Serialize};
12
13pub const DEFAULT_SYSTEM_PROMPT_PATH: &str = "answer_simple_question.txt";
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
16#[serde(default)]
17pub struct Settings {
18 pub default_user_id: String,
19 pub default_dataset_name: String,
20
21 pub system_root_directory: String,
22 pub data_root_directory: String,
23 pub cache_root_directory: String,
24 pub logs_root_directory: String,
25 pub monitoring_tool: String,
26
27 pub classification_model: String,
28 pub summarization_model: String,
29 pub graph_model: String,
30
31 #[serde(skip)]
34 pub summarization_schema: Option<serde_json::Value>,
35
36 pub llm_provider: String,
37 pub llm_model: String,
38 pub llm_api_key: String,
39 pub llm_endpoint: String,
40 pub llm_api_version: String,
41 pub llm_temperature: f64,
42 pub llm_streaming: bool,
43 pub llm_max_completion_tokens: u32,
44 pub llm_max_retries: u32,
45 pub llm_max_parallel_requests: u32,
46
47 pub llm_mock: bool,
50 pub llm_cassette: String,
53 pub llm_record_path: String,
56
57 pub graph_prompt_path: String,
58
59 pub llm_fallback_model: String,
62 pub llm_fallback_provider: String,
64 pub llm_fallback_endpoint: String,
66 pub llm_fallback_api_key: String,
68
69 pub graph_database_provider: String,
70 pub graph_database_url: String,
71 pub graph_database_name: String,
72 pub graph_database_username: String,
73 pub graph_database_password: String,
74 pub graph_database_port: u16,
75 pub graph_database_host: String,
76 pub graph_database_key: String,
77 pub graph_file_path: String,
78 pub graph_filename: String,
79
80 pub vector_db_provider: String,
81 pub vector_db_url: String,
82 pub vector_db_port: u16,
83 pub vector_db_name: String,
84 pub vector_db_key: String,
85 pub vector_db_username: String,
86 pub vector_db_password: String,
87 pub vector_db_host: String,
88
89 pub chunk_strategy: String,
90 pub chunk_engine: String,
91 pub chunk_size: u32,
92 pub chunk_overlap: u32,
93
94 pub relational_db_url: String,
95 pub migration_db_url: String,
96
97 pub db_provider: String,
102 pub db_host: String,
103 pub db_port: u16,
104 pub db_name: String,
105 pub db_username: String,
106 pub db_password: String,
107
108 pub default_system_prompt_path: String,
109
110 pub embedding_provider: String,
111 pub embedding_model_path: String,
112 pub embedding_tokenizer_path: String,
113 pub embedding_model_name: String,
114 pub embedding_dimensions: u32,
115 pub embedding_max_sequence_length: u32,
116 pub embedding_batch_size: u32,
117 pub embedding_endpoint: String,
120 pub embedding_api_key: String,
122 pub embedding_api_version: String,
124 pub transcription_model: String,
126
127 pub ontology_file_path: String,
128 pub ontology_resolver: String,
132 pub ontology_matching_strategy: String,
136
137 pub cache_backend: String,
140 pub cache_host: String,
141 pub cache_port: u16,
142 pub cache_username: String,
143 pub cache_password: String,
144 pub session_ttl_seconds: u64,
146 pub enable_caching: bool,
147 pub auto_feedback: bool,
148
149 pub default_user_email: String,
151 pub default_user_password: String,
152 pub enable_access_control: bool,
153
154 pub log_level: String,
156
157 pub llm_rate_limit_enabled: bool,
159 pub llm_rate_limit_requests: u32,
160 pub llm_rate_limit_interval: u32,
161 pub embedding_rate_limit_enabled: bool,
162 pub embedding_rate_limit_requests: u32,
163 pub embedding_rate_limit_interval: u32,
164
165 pub storage_backend: String,
168 pub storage_bucket_name: String,
169
170 pub cognee_tracing_enabled: bool,
172 pub otel_service_name: String,
173 pub otel_exporter_otlp_endpoint: String,
174 pub otel_exporter_otlp_headers: String,
175
176 pub otel_exporter_otlp_protocol: String,
179
180 pub otel_span_processor: String,
184
185 pub otel_traces_sampler: String,
192
193 pub otel_traces_sampler_arg: String,
197
198 pub enable_last_accessed: bool,
200}
201
202impl Settings {
203 pub fn load_from_env() -> Self {
211 let mut s = Self::default();
212 s.overlay_from_env();
213 s
214 }
215
216 pub fn overlay_from_env(&mut self) {
227 let _ = dotenv::dotenv();
229
230 let str_var =
232 |name: &str| -> Option<String> { std::env::var(name).ok().filter(|v| !v.is_empty()) };
233 let str_alias = |primary: &str, alias: &str| -> Option<String> {
235 str_var(primary).or_else(|| str_var(alias))
236 };
237
238 if let Some(v) = str_var("LLM_PROVIDER") {
240 self.llm_provider = v;
241 }
242 if let Some(v) = str_alias("LLM_MODEL", "OPENAI_MODEL") {
243 self.llm_model = v;
244 }
245 if let Some(v) = str_alias("LLM_API_KEY", "OPENAI_TOKEN") {
246 self.llm_api_key = v;
247 }
248 if let Some(v) = str_alias("LLM_ENDPOINT", "OPENAI_URL") {
249 self.llm_endpoint = v;
250 }
251 if let Some(v) = str_var("LLM_API_VERSION") {
252 self.llm_api_version = v;
253 }
254 if let Some(v) = str_var("LLM_TEMPERATURE")
255 && let Ok(f) = v.parse::<f64>()
256 {
257 self.llm_temperature = f;
258 }
259 if let Some(v) = str_alias("LLM_MAX_COMPLETION_TOKENS", "LLM_MAX_TOKENS")
260 && let Ok(n) = v.parse::<u32>()
261 {
262 self.llm_max_completion_tokens = n;
263 }
264 if let Some(v) = str_var("LLM_STREAMING") {
265 self.llm_streaming = cognee_utils::parse_env_bool(&v);
266 }
267 if let Some(v) = str_var("LLM_MAX_RETRIES")
268 && let Ok(n) = v.parse::<u32>()
269 {
270 self.llm_max_retries = n;
271 }
272 if let Some(v) = str_var("LLM_MAX_PARALLEL_REQUESTS")
273 && let Ok(n) = v.parse::<u32>()
274 {
275 self.llm_max_parallel_requests = n;
276 }
277 if let Some(v) = str_var("MOCK_LLM") {
279 let v = v.to_lowercase();
280 self.llm_mock = v == "true" || v == "1" || v == "yes";
281 }
282 if let Some(v) = str_var("MOCK_LLM_CASSETTE") {
283 self.llm_cassette = v;
284 }
285 if let Some(v) = str_var("COGNEE_RECORD_LLM") {
286 self.llm_record_path = v;
287 }
288
289 if let Some(v) = str_var("GRAPH_DATABASE_PROVIDER") {
291 self.graph_database_provider = v;
292 }
293 if let Some(v) = str_var("GRAPH_DATABASE_URL") {
294 self.graph_database_url = v;
295 }
296 if let Some(v) = str_var("GRAPH_DATABASE_NAME") {
297 self.graph_database_name = v;
298 }
299 if let Some(v) = str_var("GRAPH_DATABASE_USERNAME") {
300 self.graph_database_username = v;
301 }
302 if let Some(v) = str_var("GRAPH_DATABASE_PASSWORD") {
303 self.graph_database_password = v;
304 }
305 if let Some(v) = str_var("GRAPH_DATABASE_PORT")
306 && let Ok(n) = v.parse::<u16>()
307 {
308 self.graph_database_port = n;
309 }
310 if let Some(v) = str_var("GRAPH_DATABASE_HOST") {
311 self.graph_database_host = v;
312 }
313 if let Some(v) = str_var("GRAPH_DATABASE_KEY") {
314 self.graph_database_key = v;
315 }
316 if let Some(v) = str_var("GRAPH_FILE_PATH") {
317 self.graph_file_path = v;
318 }
319
320 if let Some(v) = str_var("VECTOR_DB_PROVIDER") {
322 self.vector_db_provider = v;
323 }
324 if let Some(v) = str_var("VECTOR_DB_URL") {
325 self.vector_db_url = v;
326 }
327 if let Some(v) = str_var("VECTOR_DB_PORT")
328 && let Ok(n) = v.parse::<u16>()
329 {
330 self.vector_db_port = n;
331 }
332 if let Some(v) = str_var("VECTOR_DB_NAME") {
333 self.vector_db_name = v;
334 }
335 if let Some(v) = str_var("VECTOR_DB_KEY") {
336 self.vector_db_key = v;
337 }
338 if let Some(v) = str_var("VECTOR_DB_USERNAME") {
339 self.vector_db_username = v;
340 }
341 if let Some(v) = str_var("VECTOR_DB_PASSWORD") {
342 self.vector_db_password = v;
343 }
344 if let Some(v) = str_var("VECTOR_DB_HOST") {
345 self.vector_db_host = v;
346 }
347
348 if let Some(v) = str_var("DB_PROVIDER") {
350 self.db_provider = v;
351 }
352 if let Some(v) = str_var("DB_HOST") {
353 self.db_host = v;
354 }
355 if let Some(v) = str_var("DB_PORT")
356 && let Ok(n) = v.parse::<u16>()
357 {
358 self.db_port = n;
359 }
360 if let Some(v) = str_var("DB_NAME") {
361 self.db_name = v;
362 }
363 if let Some(v) = str_var("DB_USERNAME") {
364 self.db_username = v;
365 }
366 if let Some(v) = str_var("DB_PASSWORD") {
367 self.db_password = v;
368 }
369 if let Some(v) = str_var("DATABASE_URL") {
370 self.relational_db_url = v;
371 }
372
373 if let Some(v) = str_var("EMBEDDING_PROVIDER") {
375 self.embedding_provider = v;
376 }
377 if let Some(v) = str_var("EMBEDDING_ENDPOINT") {
378 self.embedding_endpoint = v;
379 }
380 if let Some(v) = str_alias("EMBEDDING_API_KEY", "LLM_API_KEY") {
381 self.embedding_api_key = v;
382 }
383 if let Some(v) = str_var("EMBEDDING_MODEL") {
384 self.embedding_model_name = v;
385 }
386 if let Some(v) = str_var("EMBEDDING_DIMENSIONS")
387 && let Ok(n) = v.parse::<u32>()
388 {
389 self.embedding_dimensions = n;
390 }
391 if let Some(v) = str_var("EMBEDDING_BATCH_SIZE")
392 && let Ok(n) = v.parse::<u32>()
393 {
394 self.embedding_batch_size = n;
395 }
396 if let Some(v) = str_var("EMBEDDING_MAX_SEQUENCE_LENGTH")
397 && let Ok(n) = v.parse::<u32>()
398 {
399 self.embedding_max_sequence_length = n;
400 }
401 if let Some(v) = str_alias("EMBEDDING_MODEL_PATH", "COGNEE_E2E_EMBED_MODEL_PATH") {
402 self.embedding_model_path = v;
403 }
404 if let Some(v) = str_alias("EMBEDDING_TOKENIZER_PATH", "COGNEE_E2E_TOKENIZER_PATH") {
405 self.embedding_tokenizer_path = v;
406 }
407
408 if let Some(v) = str_var("COGNEE_SYSTEM_ROOT_DIRECTORY") {
410 self.system_root_directory = v;
411 }
412 if let Some(v) = str_var("COGNEE_DATA_ROOT_DIRECTORY") {
413 self.data_root_directory = v;
414 }
415 if let Some(v) = str_var("COGNEE_DEFAULT_DATASET_NAME") {
416 self.default_dataset_name = v;
417 }
418 if let Some(v) = str_var("COGNEE_DEFAULT_USER_ID") {
419 self.default_user_id = v;
420 }
421
422 if let Some(v) = str_var("ONTOLOGY_FILE_PATH") {
428 self.ontology_file_path = v;
429 }
430 if let Some(v) = str_var("ONTOLOGY_RESOLVER") {
431 self.ontology_resolver = v;
432 }
433 if let Some(v) = str_var("ONTOLOGY_MATCHING_STRATEGY") {
434 self.ontology_matching_strategy = v;
435 }
436
437 if let Some(v) = str_var("CACHE_BACKEND") {
439 self.cache_backend = v;
440 }
441 if let Some(v) = str_var("CACHE_HOST") {
442 self.cache_host = v;
443 }
444 if let Some(v) = str_var("CACHE_PORT")
445 && let Ok(n) = v.parse::<u16>()
446 {
447 self.cache_port = n;
448 }
449 if let Some(v) = str_var("CACHE_USERNAME") {
450 self.cache_username = v;
451 }
452 if let Some(v) = str_var("CACHE_PASSWORD") {
453 self.cache_password = v;
454 }
455 if let Some(v) = str_var("SESSION_TTL_SECONDS")
456 && let Ok(n) = v.parse::<u64>()
457 {
458 self.session_ttl_seconds = n;
459 }
460 if let Some(v) = str_var("CACHING") {
461 self.enable_caching = cognee_utils::parse_env_bool(&v);
462 }
463 if let Some(v) = str_var("AUTO_FEEDBACK") {
464 self.auto_feedback = cognee_utils::parse_env_bool(&v);
465 }
466
467 if let Some(v) = str_var("DEFAULT_USER_EMAIL") {
469 self.default_user_email = v;
470 }
471 if let Some(v) = str_var("DEFAULT_USER_PASSWORD") {
472 self.default_user_password = v;
473 }
474 if let Some(v) = str_var("ENABLE_BACKEND_ACCESS_CONTROL") {
475 self.enable_access_control = cognee_utils::parse_env_bool(&v);
476 }
477
478 if let Some(v) = str_var("LOG_LEVEL") {
480 self.log_level = v;
481 }
482 if let Some(v) = str_var("COGNEE_LOGS_DIR") {
484 self.logs_root_directory = v;
485 }
486 if let Some(v) = str_var("CACHE_ROOT_DIRECTORY") {
488 self.cache_root_directory = v;
489 }
490
491 if let Some(v) = str_var("LLM_RATE_LIMIT_ENABLED") {
493 self.llm_rate_limit_enabled = cognee_utils::parse_env_bool(&v);
494 }
495 if let Some(v) = str_var("LLM_RATE_LIMIT_REQUESTS")
496 && let Ok(n) = v.parse::<u32>()
497 {
498 self.llm_rate_limit_requests = n;
499 }
500 if let Some(v) = str_var("LLM_RATE_LIMIT_INTERVAL")
501 && let Ok(n) = v.parse::<u32>()
502 {
503 self.llm_rate_limit_interval = n;
504 }
505 if let Some(v) = str_var("EMBEDDING_RATE_LIMIT_ENABLED") {
506 self.embedding_rate_limit_enabled = cognee_utils::parse_env_bool(&v);
507 }
508 if let Some(v) = str_var("EMBEDDING_RATE_LIMIT_REQUESTS")
509 && let Ok(n) = v.parse::<u32>()
510 {
511 self.embedding_rate_limit_requests = n;
512 }
513 if let Some(v) = str_var("EMBEDDING_RATE_LIMIT_INTERVAL")
514 && let Ok(n) = v.parse::<u32>()
515 {
516 self.embedding_rate_limit_interval = n;
517 }
518
519 if let Some(v) = str_var("STORAGE_BACKEND") {
521 self.storage_backend = v;
522 }
523 if let Some(v) = str_var("STORAGE_BUCKET_NAME") {
524 self.storage_bucket_name = v;
525 }
526
527 if let Some(v) = str_var("COGNEE_TRACING_ENABLED") {
529 self.cognee_tracing_enabled = cognee_utils::parse_env_bool(&v);
530 }
531 if let Some(v) = str_var("OTEL_SERVICE_NAME") {
532 self.otel_service_name = v;
533 }
534 if let Some(v) = str_var("OTEL_EXPORTER_OTLP_ENDPOINT") {
535 self.otel_exporter_otlp_endpoint = v;
536 }
537 if let Some(v) = str_var("OTEL_EXPORTER_OTLP_HEADERS") {
538 self.otel_exporter_otlp_headers = v;
539 }
540 if let Some(v) = str_var("OTEL_EXPORTER_OTLP_PROTOCOL") {
541 self.otel_exporter_otlp_protocol = v;
542 }
543 if let Some(v) = str_var("OTEL_SPAN_PROCESSOR") {
544 self.otel_span_processor = v;
545 }
546 if let Some(v) = str_var("OTEL_TRACES_SAMPLER") {
547 self.otel_traces_sampler = v;
548 }
549 if let Some(v) = str_var("OTEL_TRACES_SAMPLER_ARG") {
550 self.otel_traces_sampler_arg = v;
551 }
552
553 if let Some(v) = str_var("ENABLE_LAST_ACCESSED") {
555 self.enable_last_accessed = cognee_utils::parse_env_bool(&v);
556 }
557 }
558
559 pub fn resolved_relational_db_url(&self) -> String {
566 if self.db_provider == "postgres" {
567 format!(
568 "postgres://{}:{}@{}:{}/{}",
569 self.db_username, self.db_password, self.db_host, self.db_port, self.db_name
570 )
571 } else {
572 self.relational_db_url.clone()
573 }
574 }
575
576 pub fn telemetry_snapshot(&self) -> serde_json::Map<String, serde_json::Value> {
589 use serde_json::Value;
590 let mut m = serde_json::Map::new();
591 m.insert("sdk_runtime".into(), Value::String("rust".into()));
592 m.insert(
593 "vector_db_provider".into(),
594 Value::String(self.vector_db_provider.clone()),
595 );
596 m.insert(
597 "graph_db_provider".into(),
598 Value::String(self.graph_database_provider.clone()),
599 );
600 m.insert(
601 "relational_db_provider".into(),
602 Value::String(self.db_provider.clone()),
603 );
604 m.insert(
605 "llm_provider".into(),
606 Value::String(self.llm_provider.clone()),
607 );
608 m.insert("llm_model".into(), Value::String(self.llm_model.clone()));
609 m.insert(
614 "embedding_provider".into(),
615 Value::String(self.embedding_provider.clone()),
616 );
617 m.insert(
618 "embedding_model".into(),
619 Value::String(self.embedding_model_name.clone()),
620 );
621 m.insert(
622 "embedding_dimensions".into(),
623 Value::Number(self.embedding_dimensions.into()),
624 );
625 m.insert(
626 "chunk_strategy".into(),
627 Value::String(self.chunk_strategy.clone()),
628 );
629 m
630 }
631}
632
633impl Default for Settings {
634 fn default() -> Self {
635 #[cfg(target_os = "android")]
641 let (embedding_provider, embedding_model_name, embedding_dimensions) =
642 ("onnx", "BGE-Small-v1.5", 384u32);
643 #[cfg(not(target_os = "android"))]
644 let (embedding_provider, embedding_model_name, embedding_dimensions) =
645 ("openai", "text-embedding-3-small", 1536u32);
646
647 Self {
648 default_user_id: "00000000-0000-0000-0000-000000000000".to_string(),
649 default_dataset_name: "main_dataset".to_string(),
650 system_root_directory: "./.cognee_system".to_string(),
651 data_root_directory: "./.data_storage".to_string(),
652 cache_root_directory: "./.cognee_cache".to_string(),
653 logs_root_directory: "./logs".to_string(),
655 monitoring_tool: "none".to_string(),
656
657 classification_model: String::new(),
658 summarization_model: String::new(),
659 graph_model: "KnowledgeGraph".to_string(),
660 summarization_schema: None,
661
662 llm_provider: "openai".to_string(),
663 llm_model: "openai/gpt-5-mini".to_string(),
664 llm_api_key: String::new(),
665 llm_endpoint: String::new(),
666 llm_api_version: String::new(),
667 llm_temperature: 0.0,
668 llm_streaming: false,
669 llm_max_completion_tokens: 16384,
670 llm_max_retries: 2,
671 llm_max_parallel_requests: 20,
672 llm_mock: false,
673 llm_cassette: String::new(),
674 llm_record_path: String::new(),
675 graph_prompt_path: "generate_graph_prompt.txt".to_string(),
676
677 llm_fallback_model: String::new(),
678 llm_fallback_provider: String::new(),
679 llm_fallback_endpoint: String::new(),
680 llm_fallback_api_key: String::new(),
681
682 graph_database_provider: "ladybug".to_string(),
683 graph_database_url: String::new(),
684 graph_database_name: String::new(),
685 graph_database_username: String::new(),
686 graph_database_password: String::new(),
687 graph_database_port: 123,
688 graph_database_host: String::new(),
689 graph_database_key: String::new(),
690 graph_file_path: String::new(),
691 graph_filename: String::new(),
692
693 vector_db_provider: "lancedb".to_string(),
704 vector_db_url: String::new(),
705 vector_db_port: 1234,
706 vector_db_name: String::new(),
707 vector_db_key: String::new(),
708 vector_db_username: String::new(),
709 vector_db_password: String::new(),
710 vector_db_host: String::new(),
711
712 chunk_strategy: "PARAGRAPH".to_string(),
713 chunk_engine: "DEFAULT_ENGINE".to_string(),
714 chunk_size: 1500,
715 chunk_overlap: 10,
716
717 relational_db_url: "sqlite:./cognee.db?mode=rwc".to_string(),
718 migration_db_url: String::new(),
719
720 db_provider: "sqlite".to_string(),
721 db_host: "localhost".to_string(),
722 db_port: 5432,
723 db_name: "cognee_db".to_string(),
724 db_username: String::new(),
725 db_password: String::new(),
726
727 default_system_prompt_path: DEFAULT_SYSTEM_PROMPT_PATH.to_string(),
728
729 embedding_provider: embedding_provider.to_string(),
730 embedding_model_path: "./target/models/BGE-Small-v1.5-model_quantized.onnx".to_string(),
733 embedding_tokenizer_path: "./target/models/bge-small-tokenizer.json".to_string(),
734 embedding_model_name: embedding_model_name.to_string(),
735 embedding_dimensions,
740 embedding_max_sequence_length: 512,
741 embedding_batch_size: 32,
742 embedding_endpoint: String::new(),
743 embedding_api_key: String::new(),
744 embedding_api_version: String::new(),
745 transcription_model: String::new(),
746
747 ontology_file_path: String::new(),
748 ontology_resolver: "rdflib".to_string(),
749 ontology_matching_strategy: "fuzzy".to_string(),
750
751 cache_backend: "fs".to_string(),
753 cache_host: "localhost".to_string(),
754 cache_port: 6379,
755 cache_username: String::new(),
756 cache_password: String::new(),
757 session_ttl_seconds: 604800,
758 enable_caching: true,
759 auto_feedback: false,
760
761 default_user_email: "default_user@example.com".to_string(),
763 default_user_password: String::new(),
764 enable_access_control: false,
765
766 log_level: "info".to_string(),
768
769 llm_rate_limit_enabled: false,
771 llm_rate_limit_requests: 60,
772 llm_rate_limit_interval: 60,
773 embedding_rate_limit_enabled: false,
774 embedding_rate_limit_requests: 60,
775 embedding_rate_limit_interval: 60,
776
777 storage_backend: "local".to_string(),
779 storage_bucket_name: String::new(),
780
781 cognee_tracing_enabled: false,
783 otel_service_name: "cognee".to_string(),
784 otel_exporter_otlp_endpoint: String::new(),
785 otel_exporter_otlp_headers: String::new(),
786 otel_exporter_otlp_protocol: "grpc".to_string(),
787 otel_span_processor: "batch".to_string(),
788 otel_traces_sampler: String::new(),
789 otel_traces_sampler_arg: String::new(),
790
791 enable_last_accessed: false,
793 }
794 }
795}
796
797#[derive(Debug, thiserror::Error)]
803pub enum ConfigError {
804 #[error("Unknown config key: {0}")]
805 UnknownKey(String),
806 #[error("Type mismatch for key '{key}': {reason}")]
807 TypeMismatch { key: String, reason: String },
808}
809
810pub struct ConfigManager {
833 inner: Arc<RwLock<Settings>>,
834 version: Arc<AtomicU64>,
835}
836
837impl ConfigManager {
838 pub fn new(settings: Settings) -> Self {
840 Self {
841 inner: Arc::new(RwLock::new(settings)),
842 version: Arc::new(AtomicU64::new(0)),
843 }
844 }
845
846 pub fn from_env() -> Self {
848 Self::new(Settings::load_from_env())
849 }
850
851 pub fn read(&self) -> RwLockReadGuard<'_, Settings> {
853 self.inner.read().expect("lock poison is unrecoverable") }
855
856 pub fn version(&self) -> u64 {
858 self.version.load(Ordering::Acquire)
859 }
860
861 fn bump_version(&self) {
863 self.version.fetch_add(1, Ordering::Release);
864 }
865}
866
867impl Clone for ConfigManager {
868 fn clone(&self) -> Self {
869 Self {
870 inner: Arc::clone(&self.inner),
871 version: Arc::clone(&self.version),
872 }
873 }
874}
875
876impl ConfigManager {
879 pub fn set_llm_provider(&self, provider: &str) {
882 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_provider = provider.to_string();
884 drop(s);
885 self.bump_version();
886 }
887
888 pub fn set_llm_model(&self, model: &str) {
889 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_model = model.to_string();
891 drop(s);
892 self.bump_version();
893 }
894
895 pub fn set_llm_api_key(&self, key: &str) {
896 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_api_key = key.to_string();
898 drop(s);
899 self.bump_version();
900 }
901
902 pub fn set_llm_endpoint(&self, endpoint: &str) {
903 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_endpoint = endpoint.to_string();
905 drop(s);
906 self.bump_version();
907 }
908
909 pub fn set_llm_fallback_model(&self, model: &str) {
912 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_fallback_model = model.to_string();
914 drop(s);
915 self.bump_version();
916 }
917
918 pub fn set_llm_fallback_provider(&self, provider: &str) {
919 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_fallback_provider = provider.to_string();
921 drop(s);
922 self.bump_version();
923 }
924
925 pub fn set_llm_fallback_endpoint(&self, endpoint: &str) {
926 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_fallback_endpoint = endpoint.to_string();
928 drop(s);
929 self.bump_version();
930 }
931
932 pub fn set_llm_fallback_api_key(&self, key: &str) {
933 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_fallback_api_key = key.to_string();
935 drop(s);
936 self.bump_version();
937 }
938
939 pub fn set_embedding_provider(&self, provider: &str) {
942 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.embedding_provider = provider.to_string();
944 drop(s);
945 self.bump_version();
946 }
947
948 pub fn set_embedding_model(&self, model: &str) {
949 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.embedding_model_name = model.to_string();
951 drop(s);
952 self.bump_version();
953 }
954
955 pub fn set_embedding_dimensions(&self, dims: u32) {
956 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.embedding_dimensions = dims;
958 drop(s);
959 self.bump_version();
960 }
961
962 pub fn set_embedding_endpoint(&self, endpoint: &str) {
963 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.embedding_endpoint = endpoint.to_string();
965 drop(s);
966 self.bump_version();
967 }
968
969 pub fn set_embedding_api_key(&self, key: &str) {
970 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.embedding_api_key = key.to_string();
972 drop(s);
973 self.bump_version();
974 }
975
976 pub fn set_embedding_api_version(&self, version: &str) {
977 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.embedding_api_version = version.to_string();
979 drop(s);
980 self.bump_version();
981 }
982
983 pub fn set_transcription_model(&self, model: &str) {
984 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.transcription_model = model.to_string();
986 drop(s);
987 self.bump_version();
988 }
989
990 pub fn set_vector_db_provider(&self, provider: &str) {
993 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.vector_db_provider = provider.to_string();
995 drop(s);
996 self.bump_version();
997 }
998
999 pub fn set_vector_db_url(&self, url: &str) {
1000 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.vector_db_url = url.to_string();
1002 drop(s);
1003 self.bump_version();
1004 }
1005
1006 pub fn set_relational_db_url(&self, url: &str) {
1011 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.relational_db_url = url.to_string();
1013 drop(s);
1014 self.bump_version();
1015 }
1016
1017 #[allow(clippy::too_many_arguments)]
1020 pub fn set_relational_db_config(
1021 &self,
1022 url: Option<&str>,
1023 provider: Option<&str>,
1024 host: Option<&str>,
1025 port: Option<u16>,
1026 name: Option<&str>,
1027 username: Option<&str>,
1028 password: Option<&str>,
1029 ) {
1030 let mut s = self.inner.write().expect("lock poison is unrecoverable"); if let Some(v) = url {
1032 s.relational_db_url = v.to_string();
1033 }
1034 if let Some(v) = provider {
1035 s.db_provider = v.to_string();
1036 }
1037 if let Some(v) = host {
1038 s.db_host = v.to_string();
1039 }
1040 if let Some(v) = port {
1041 s.db_port = v;
1042 }
1043 if let Some(v) = name {
1044 s.db_name = v.to_string();
1045 }
1046 if let Some(v) = username {
1047 s.db_username = v.to_string();
1048 }
1049 if let Some(v) = password {
1050 s.db_password = v.to_string();
1051 }
1052 drop(s);
1053 self.bump_version();
1054 }
1055
1056 pub fn set_migration_db_config(&self, url: &str) {
1058 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.migration_db_url = url.to_string();
1060 drop(s);
1061 self.bump_version();
1062 }
1063
1064 pub fn set_vector_db_key(&self, key: &str) {
1065 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.vector_db_key = key.to_string();
1067 drop(s);
1068 self.bump_version();
1069 }
1070
1071 pub fn set_graph_database_provider(&self, provider: &str) {
1074 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.graph_database_provider = provider.to_string();
1076 drop(s);
1077 self.bump_version();
1078 }
1079
1080 pub fn set_graph_model(&self, model: &str) {
1081 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.graph_model = model.to_string();
1083 drop(s);
1084 self.bump_version();
1085 }
1086
1087 pub fn set_chunk_strategy(&self, strategy: &str) {
1090 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.chunk_strategy = strategy.to_string();
1092 drop(s);
1093 self.bump_version();
1094 }
1095
1096 pub fn set_chunk_engine(&self, engine: &str) {
1097 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.chunk_engine = engine.to_string();
1099 drop(s);
1100 self.bump_version();
1101 }
1102
1103 pub fn set_chunk_size(&self, size: u32) {
1104 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.chunk_size = size;
1106 drop(s);
1107 self.bump_version();
1108 }
1109
1110 pub fn set_chunk_overlap(&self, overlap: u32) {
1111 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.chunk_overlap = overlap;
1113 drop(s);
1114 self.bump_version();
1115 }
1116
1117 pub fn set_data_root_directory(&self, path: &str) {
1120 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.data_root_directory = path.to_string();
1122 drop(s);
1123 self.bump_version();
1124 }
1125
1126 pub fn set_system_root_directory(&self, path: &str) {
1132 let mut s = self.inner.write().expect("lock poison is unrecoverable"); let old_root = s.system_root_directory.clone();
1134 s.system_root_directory = path.to_string();
1135
1136 if s.graph_file_path.is_empty() || s.graph_file_path.starts_with(&old_root) {
1138 let suffix = if s.graph_file_path.is_empty() {
1139 "/graph".to_string()
1140 } else {
1141 s.graph_file_path[old_root.len()..].to_string()
1142 };
1143 s.graph_file_path = format!("{path}{suffix}");
1144 }
1145
1146 if s.vector_db_url.is_empty() || s.vector_db_url.starts_with(&old_root) {
1148 let suffix = if s.vector_db_url.is_empty() {
1149 "/vectors".to_string()
1150 } else {
1151 s.vector_db_url[old_root.len()..].to_string()
1152 };
1153 s.vector_db_url = format!("{path}{suffix}");
1154 }
1155
1156 drop(s);
1157 self.bump_version();
1158 }
1159
1160 pub fn set_monitoring_tool(&self, tool: &str) {
1161 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.monitoring_tool = tool.to_string();
1163 drop(s);
1164 self.bump_version();
1165 }
1166
1167 pub fn set_classification_model(&self, model: &str) {
1168 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.classification_model = model.to_string();
1170 drop(s);
1171 self.bump_version();
1172 }
1173
1174 pub fn set_summarization_model(&self, model: &str) {
1175 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.summarization_model = model.to_string();
1177 drop(s);
1178 self.bump_version();
1179 }
1180
1181 pub fn set_summarization_schema(
1191 &self,
1192 schema: serde_json::Value,
1193 ) -> Result<(), cognee_cognify::config::ConfigError> {
1194 cognee_cognify::config::validate_summary_schema(&schema)?;
1195 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.summarization_schema = Some(schema);
1197 drop(s);
1198 self.bump_version();
1199 Ok(())
1200 }
1201
1202 pub fn set_llm_api_version(&self, version: &str) {
1205 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_api_version = version.to_string();
1207 drop(s);
1208 self.bump_version();
1209 }
1210
1211 pub fn set_llm_temperature(&self, temperature: f64) {
1212 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_temperature = temperature;
1214 drop(s);
1215 self.bump_version();
1216 }
1217
1218 pub fn set_llm_streaming(&self, streaming: bool) {
1219 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_streaming = streaming;
1221 drop(s);
1222 self.bump_version();
1223 }
1224
1225 pub fn set_llm_max_completion_tokens(&self, tokens: u32) {
1226 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_max_completion_tokens = tokens;
1228 drop(s);
1229 self.bump_version();
1230 }
1231
1232 pub fn set_llm_max_retries(&self, retries: u32) {
1233 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_max_retries = retries;
1235 drop(s);
1236 self.bump_version();
1237 }
1238
1239 pub fn set_llm_max_parallel_requests(&self, parallel: u32) {
1240 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_max_parallel_requests = parallel;
1242 drop(s);
1243 self.bump_version();
1244 }
1245
1246 pub fn set_llm_mock(&self, mock: bool) {
1248 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_mock = mock;
1250 drop(s);
1251 self.bump_version();
1252 }
1253
1254 pub fn set_llm_cassette(&self, cassette: &str) {
1256 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_cassette = cassette.to_string();
1258 drop(s);
1259 self.bump_version();
1260 }
1261
1262 pub fn set_llm_record_path(&self, path: &str) {
1264 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.llm_record_path = path.to_string();
1266 drop(s);
1267 self.bump_version();
1268 }
1269
1270 pub fn set_embedding_model_path(&self, path: &str) {
1273 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.embedding_model_path = path.to_string();
1275 drop(s);
1276 self.bump_version();
1277 }
1278
1279 pub fn set_embedding_tokenizer_path(&self, path: &str) {
1280 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.embedding_tokenizer_path = path.to_string();
1282 drop(s);
1283 self.bump_version();
1284 }
1285
1286 pub fn set_vector_db_host(&self, host: &str) {
1289 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.vector_db_host = host.to_string();
1291 drop(s);
1292 self.bump_version();
1293 }
1294
1295 pub fn set_vector_db_port(&self, port: u16) {
1296 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.vector_db_port = port;
1298 drop(s);
1299 self.bump_version();
1300 }
1301
1302 pub fn set_vector_db_name(&self, name: &str) {
1303 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.vector_db_name = name.to_string();
1305 drop(s);
1306 self.bump_version();
1307 }
1308
1309 pub fn set_graph_file_path(&self, path: &str) {
1316 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.graph_file_path = path.to_string();
1318 drop(s);
1319 self.bump_version();
1320 }
1321
1322 pub fn set_cache_root_directory(&self, path: &str) {
1325 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.cache_root_directory = path.to_string();
1327 drop(s);
1328 self.bump_version();
1329 }
1330
1331 pub fn set_logs_root_directory(&self, path: &str) {
1332 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.logs_root_directory = path.to_string();
1334 drop(s);
1335 self.bump_version();
1336 }
1337
1338 pub fn set_ontology_file_path(&self, path: &str) {
1341 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.ontology_file_path = path.to_string();
1343 drop(s);
1344 self.bump_version();
1345 }
1346
1347 pub fn set_ontology_resolver(&self, resolver: &str) {
1348 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.ontology_resolver = resolver.to_string();
1350 drop(s);
1351 self.bump_version();
1352 }
1353
1354 pub fn set_ontology_matching_strategy(&self, strategy: &str) {
1355 let mut s = self.inner.write().expect("lock poison is unrecoverable"); s.ontology_matching_strategy = strategy.to_string();
1357 drop(s);
1358 self.bump_version();
1359 }
1360
1361 pub fn get_settings(&self) -> std::collections::HashMap<String, serde_json::Value> {
1370 use serde_json::Value;
1371
1372 let s = self.inner.read().expect("lock poison is unrecoverable"); let mut m = std::collections::HashMap::new();
1374
1375 let mask = |v: &String| -> String {
1379 if v.is_empty() {
1380 String::new()
1381 } else {
1382 "<redacted>".to_string()
1383 }
1384 };
1385
1386 let mask_url = |v: &String| -> String {
1392 if let Some(scheme_end) = v.find("://") {
1394 let after_scheme = scheme_end + 3;
1395 if let Some(at_rel) = v[after_scheme..].find('@') {
1396 let at_abs = after_scheme + at_rel;
1397 if at_abs > after_scheme {
1400 return format!("{}<redacted>{}", &v[..after_scheme], &v[at_abs..]);
1401 }
1402 }
1403 }
1404 v.clone()
1405 };
1406
1407 m.insert("llm_provider".into(), Value::String(s.llm_provider.clone()));
1409 m.insert("llm_model".into(), Value::String(s.llm_model.clone()));
1410 m.insert("llm_api_key".into(), Value::String(mask(&s.llm_api_key)));
1411 m.insert("llm_endpoint".into(), Value::String(s.llm_endpoint.clone()));
1412 m.insert(
1413 "llm_api_version".into(),
1414 Value::String(s.llm_api_version.clone()),
1415 );
1416 m.insert(
1417 "llm_temperature".into(),
1418 Value::Number(
1419 serde_json::Number::from_f64(s.llm_temperature)
1420 .unwrap_or(serde_json::Number::from(0)),
1421 ),
1422 );
1423 m.insert(
1424 "llm_max_completion_tokens".into(),
1425 Value::Number(s.llm_max_completion_tokens.into()),
1426 );
1427
1428 m.insert(
1430 "embedding_provider".into(),
1431 Value::String(s.embedding_provider.clone()),
1432 );
1433 m.insert(
1434 "embedding_model_name".into(),
1435 Value::String(s.embedding_model_name.clone()),
1436 );
1437 m.insert(
1438 "embedding_api_key".into(),
1439 Value::String(mask(&s.embedding_api_key)),
1440 );
1441 m.insert(
1442 "embedding_endpoint".into(),
1443 Value::String(s.embedding_endpoint.clone()),
1444 );
1445 m.insert(
1446 "embedding_dimensions".into(),
1447 Value::Number(s.embedding_dimensions.into()),
1448 );
1449
1450 m.insert(
1452 "graph_database_provider".into(),
1453 Value::String(s.graph_database_provider.clone()),
1454 );
1455 m.insert(
1456 "graph_database_url".into(),
1457 Value::String(mask_url(&s.graph_database_url)),
1458 );
1459 m.insert(
1460 "graph_database_password".into(),
1461 Value::String(mask(&s.graph_database_password)),
1462 );
1463 m.insert(
1464 "graph_database_key".into(),
1465 Value::String(mask(&s.graph_database_key)),
1466 );
1467
1468 m.insert(
1470 "vector_db_provider".into(),
1471 Value::String(s.vector_db_provider.clone()),
1472 );
1473 m.insert(
1474 "vector_db_url".into(),
1475 Value::String(mask_url(&s.vector_db_url)),
1476 );
1477 m.insert(
1478 "vector_db_key".into(),
1479 Value::String(mask(&s.vector_db_key)),
1480 );
1481 m.insert(
1482 "vector_db_password".into(),
1483 Value::String(mask(&s.vector_db_password)),
1484 );
1485
1486 m.insert("db_provider".into(), Value::String(s.db_provider.clone()));
1488 m.insert(
1489 "relational_db_url".into(),
1490 Value::String(mask_url(&s.relational_db_url)),
1491 );
1492 m.insert("db_password".into(), Value::String(mask(&s.db_password)));
1493
1494 m.insert(
1496 "system_root_directory".into(),
1497 Value::String(s.system_root_directory.clone()),
1498 );
1499 m.insert(
1500 "data_root_directory".into(),
1501 Value::String(s.data_root_directory.clone()),
1502 );
1503 m.insert(
1504 "logs_root_directory".into(),
1505 Value::String(s.logs_root_directory.clone()),
1506 );
1507
1508 m.insert(
1510 "chunk_strategy".into(),
1511 Value::String(s.chunk_strategy.clone()),
1512 );
1513 m.insert("chunk_size".into(), Value::Number(s.chunk_size.into()));
1514 m.insert(
1515 "chunk_overlap".into(),
1516 Value::Number(s.chunk_overlap.into()),
1517 );
1518
1519 m
1520 }
1521}
1522
1523fn as_string(key: &str, value: &serde_json::Value) -> Result<String, ConfigError> {
1527 value
1528 .as_str()
1529 .map(ToString::to_string)
1530 .ok_or_else(|| ConfigError::TypeMismatch {
1531 key: key.to_string(),
1532 reason: "expected a string".to_string(),
1533 })
1534}
1535
1536fn as_u32(key: &str, value: &serde_json::Value) -> Result<u32, ConfigError> {
1538 value
1539 .as_u64()
1540 .and_then(|n| u32::try_from(n).ok())
1541 .ok_or_else(|| ConfigError::TypeMismatch {
1542 key: key.to_string(),
1543 reason: "expected a positive integer (u32)".to_string(),
1544 })
1545}
1546
1547fn as_f64(key: &str, value: &serde_json::Value) -> Result<f64, ConfigError> {
1549 value.as_f64().ok_or_else(|| ConfigError::TypeMismatch {
1550 key: key.to_string(),
1551 reason: "expected a number".to_string(),
1552 })
1553}
1554
1555fn as_u16(key: &str, value: &serde_json::Value) -> Result<u16, ConfigError> {
1557 value
1558 .as_u64()
1559 .and_then(|n| u16::try_from(n).ok())
1560 .ok_or_else(|| ConfigError::TypeMismatch {
1561 key: key.to_string(),
1562 reason: "expected a positive integer (u16)".to_string(),
1563 })
1564}
1565
1566fn as_bool(key: &str, value: &serde_json::Value) -> Result<bool, ConfigError> {
1568 value.as_bool().ok_or_else(|| ConfigError::TypeMismatch {
1569 key: key.to_string(),
1570 reason: "expected a boolean".to_string(),
1571 })
1572}
1573
1574impl ConfigManager {
1575 pub fn set_llm_config(
1577 &self,
1578 values: &HashMap<String, serde_json::Value>,
1579 ) -> Result<(), ConfigError> {
1580 let mut s = self.inner.write().expect("lock poison is unrecoverable"); for (key, value) in values {
1582 match key.as_str() {
1583 "llm_provider" => s.llm_provider = as_string(key, value)?,
1584 "llm_model" => s.llm_model = as_string(key, value)?,
1585 "llm_api_key" => s.llm_api_key = as_string(key, value)?,
1586 "llm_endpoint" => s.llm_endpoint = as_string(key, value)?,
1587 "llm_api_version" => s.llm_api_version = as_string(key, value)?,
1588 "llm_temperature" => s.llm_temperature = as_f64(key, value)?,
1589 "llm_max_completion_tokens" => s.llm_max_completion_tokens = as_u32(key, value)?,
1590 "llm_streaming" => s.llm_streaming = as_bool(key, value)?,
1591 "llm_max_retries" => s.llm_max_retries = as_u32(key, value)?,
1592 "llm_max_parallel_requests" => {
1593 s.llm_max_parallel_requests = as_u32(key, value)?;
1594 }
1595 "llm_mock" => s.llm_mock = as_bool(key, value)?,
1596 "llm_cassette" => s.llm_cassette = as_string(key, value)?,
1597 "llm_record_path" => s.llm_record_path = as_string(key, value)?,
1598 other => return Err(ConfigError::UnknownKey(other.to_string())),
1599 }
1600 }
1601 drop(s);
1602 self.bump_version();
1603 Ok(())
1604 }
1605
1606 pub fn set_embedding_config(
1608 &self,
1609 values: &HashMap<String, serde_json::Value>,
1610 ) -> Result<(), ConfigError> {
1611 let mut s = self.inner.write().expect("lock poison is unrecoverable"); for (key, value) in values {
1613 match key.as_str() {
1614 "embedding_provider" => s.embedding_provider = as_string(key, value)?,
1615 "embedding_model" | "embedding_model_name" => {
1616 s.embedding_model_name = as_string(key, value)?;
1617 }
1618 "embedding_dimensions" => s.embedding_dimensions = as_u32(key, value)?,
1619 "embedding_endpoint" => s.embedding_endpoint = as_string(key, value)?,
1620 "embedding_api_key" => s.embedding_api_key = as_string(key, value)?,
1621 "embedding_model_path" => s.embedding_model_path = as_string(key, value)?,
1622 "embedding_tokenizer_path" => {
1623 s.embedding_tokenizer_path = as_string(key, value)?;
1624 }
1625 "embedding_api_version" => s.embedding_api_version = as_string(key, value)?,
1626 other => return Err(ConfigError::UnknownKey(other.to_string())),
1627 }
1628 }
1629 drop(s);
1630 self.bump_version();
1631 Ok(())
1632 }
1633
1634 pub fn set_vector_db_config(
1636 &self,
1637 values: &HashMap<String, serde_json::Value>,
1638 ) -> Result<(), ConfigError> {
1639 let mut s = self.inner.write().expect("lock poison is unrecoverable"); for (key, value) in values {
1641 match key.as_str() {
1642 "vector_db_provider" => s.vector_db_provider = as_string(key, value)?,
1643 "vector_db_url" => s.vector_db_url = as_string(key, value)?,
1644 "vector_db_key" => s.vector_db_key = as_string(key, value)?,
1645 "vector_db_host" => s.vector_db_host = as_string(key, value)?,
1646 "vector_db_port" => s.vector_db_port = as_u16(key, value)?,
1647 "vector_db_name" => s.vector_db_name = as_string(key, value)?,
1648 other => return Err(ConfigError::UnknownKey(other.to_string())),
1649 }
1650 }
1651 drop(s);
1652 self.bump_version();
1653 Ok(())
1654 }
1655
1656 pub fn set_graph_db_config(
1658 &self,
1659 values: &HashMap<String, serde_json::Value>,
1660 ) -> Result<(), ConfigError> {
1661 let mut s = self.inner.write().expect("lock poison is unrecoverable"); for (key, value) in values {
1663 match key.as_str() {
1664 "graph_database_provider" => s.graph_database_provider = as_string(key, value)?,
1665 "graph_model" => s.graph_model = as_string(key, value)?,
1666 "graph_file_path" => s.graph_file_path = as_string(key, value)?,
1667 other => return Err(ConfigError::UnknownKey(other.to_string())),
1668 }
1669 }
1670 drop(s);
1671 self.bump_version();
1672 Ok(())
1673 }
1674
1675 pub fn set(&self, key: &str, value: serde_json::Value) -> Result<(), ConfigError> {
1681 match key {
1682 "llm_provider" => self.set_llm_provider(as_string(key, &value)?.as_str()),
1684 "llm_model" => self.set_llm_model(as_string(key, &value)?.as_str()),
1685 "llm_api_key" => self.set_llm_api_key(as_string(key, &value)?.as_str()),
1686 "llm_endpoint" => self.set_llm_endpoint(as_string(key, &value)?.as_str()),
1687 "llm_api_version" => self.set_llm_api_version(as_string(key, &value)?.as_str()),
1689 "llm_temperature" => self.set_llm_temperature(as_f64(key, &value)?),
1690 "llm_streaming" => self.set_llm_streaming(as_bool(key, &value)?),
1691 "llm_max_completion_tokens" => {
1692 self.set_llm_max_completion_tokens(as_u32(key, &value)?);
1693 }
1694 "llm_max_retries" => self.set_llm_max_retries(as_u32(key, &value)?),
1695 "llm_max_parallel_requests" => {
1696 self.set_llm_max_parallel_requests(as_u32(key, &value)?);
1697 }
1698 "llm_mock" => self.set_llm_mock(as_bool(key, &value)?),
1699 "llm_cassette" => self.set_llm_cassette(as_string(key, &value)?.as_str()),
1700 "llm_record_path" => self.set_llm_record_path(as_string(key, &value)?.as_str()),
1701 "embedding_provider" => {
1703 self.set_embedding_provider(as_string(key, &value)?.as_str());
1704 }
1705 "embedding_model" | "embedding_model_name" => {
1706 self.set_embedding_model(as_string(key, &value)?.as_str());
1707 }
1708 "embedding_dimensions" => self.set_embedding_dimensions(as_u32(key, &value)?),
1709 "embedding_endpoint" => {
1710 self.set_embedding_endpoint(as_string(key, &value)?.as_str());
1711 }
1712 "embedding_api_key" => self.set_embedding_api_key(as_string(key, &value)?.as_str()),
1713 "embedding_model_path" => {
1714 self.set_embedding_model_path(as_string(key, &value)?.as_str());
1715 }
1716 "embedding_tokenizer_path" => {
1717 self.set_embedding_tokenizer_path(as_string(key, &value)?.as_str());
1718 }
1719 "vector_db_provider" => {
1721 self.set_vector_db_provider(as_string(key, &value)?.as_str());
1722 }
1723 "vector_db_url" => self.set_vector_db_url(as_string(key, &value)?.as_str()),
1724 "vector_db_key" => self.set_vector_db_key(as_string(key, &value)?.as_str()),
1725 "vector_db_host" => self.set_vector_db_host(as_string(key, &value)?.as_str()),
1726 "vector_db_port" => self.set_vector_db_port(as_u16(key, &value)?),
1727 "vector_db_name" => self.set_vector_db_name(as_string(key, &value)?.as_str()),
1728 "graph_database_provider" => {
1730 self.set_graph_database_provider(as_string(key, &value)?.as_str());
1731 }
1732 "graph_model" => self.set_graph_model(as_string(key, &value)?.as_str()),
1733 "graph_file_path" => self.set_graph_file_path(as_string(key, &value)?.as_str()),
1734 "chunk_strategy" => self.set_chunk_strategy(as_string(key, &value)?.as_str()),
1736 "chunk_engine" => self.set_chunk_engine(as_string(key, &value)?.as_str()),
1737 "chunk_size" => self.set_chunk_size(as_u32(key, &value)?),
1738 "chunk_overlap" => self.set_chunk_overlap(as_u32(key, &value)?),
1739 "system_root_directory" => {
1741 self.set_system_root_directory(as_string(key, &value)?.as_str());
1742 }
1743 "data_root_directory" => {
1744 self.set_data_root_directory(as_string(key, &value)?.as_str());
1745 }
1746 "cache_root_directory" => {
1747 self.set_cache_root_directory(as_string(key, &value)?.as_str());
1748 }
1749 "logs_root_directory" => {
1750 self.set_logs_root_directory(as_string(key, &value)?.as_str());
1751 }
1752 "monitoring_tool" => self.set_monitoring_tool(as_string(key, &value)?.as_str()),
1753 "ontology_file_path" => {
1755 self.set_ontology_file_path(as_string(key, &value)?.as_str());
1756 }
1757 "ontology_resolver" => {
1758 self.set_ontology_resolver(as_string(key, &value)?.as_str());
1759 }
1760 "ontology_matching_strategy" => {
1761 self.set_ontology_matching_strategy(as_string(key, &value)?.as_str());
1762 }
1763 "embedding_api_version" => {
1765 self.set_embedding_api_version(as_string(key, &value)?.as_str());
1766 }
1767 "transcription_model" => {
1768 self.set_transcription_model(as_string(key, &value)?.as_str());
1769 }
1770 "llm_fallback_model" => {
1772 self.set_llm_fallback_model(as_string(key, &value)?.as_str());
1773 }
1774 "llm_fallback_provider" => {
1775 self.set_llm_fallback_provider(as_string(key, &value)?.as_str());
1776 }
1777 "llm_fallback_endpoint" => {
1778 self.set_llm_fallback_endpoint(as_string(key, &value)?.as_str());
1779 }
1780 "llm_fallback_api_key" => {
1781 self.set_llm_fallback_api_key(as_string(key, &value)?.as_str());
1782 }
1783 "relational_db_url" => {
1785 self.set_relational_db_url(as_string(key, &value)?.as_str());
1786 }
1787 "migration_db_url" => {
1788 self.set_migration_db_config(as_string(key, &value)?.as_str());
1789 }
1790 "classification_model" => {
1792 self.set_classification_model(as_string(key, &value)?.as_str());
1793 }
1794 "summarization_model" => {
1795 self.set_summarization_model(as_string(key, &value)?.as_str());
1796 }
1797 _ => return Err(ConfigError::UnknownKey(key.to_string())),
1798 }
1799 Ok(())
1800 }
1801}
1802
1803#[cfg(test)]
1804#[allow(
1805 clippy::unwrap_used,
1806 clippy::expect_used,
1807 reason = "test code — panics are acceptable failures"
1808)]
1809mod tests {
1810 use super::*;
1811
1812 #[test]
1816 #[serial_test::serial]
1817 fn overlay_picks_up_ontology_file_path() {
1818 unsafe { std::env::set_var("ONTOLOGY_FILE_PATH", "/tmp/test.owl") };
1820 let mut s = Settings::default();
1821 s.overlay_from_env();
1822 unsafe { std::env::remove_var("ONTOLOGY_FILE_PATH") };
1823
1824 assert_eq!(s.ontology_file_path, "/tmp/test.owl");
1825 assert_eq!(s.ontology_resolver, "rdflib");
1827 assert_eq!(s.ontology_matching_strategy, "fuzzy");
1828 }
1829
1830 #[test]
1831 #[serial_test::serial]
1832 fn overlay_picks_up_ontology_resolver() {
1833 unsafe { std::env::set_var("ONTOLOGY_RESOLVER", "custom") };
1835 let mut s = Settings::default();
1836 s.overlay_from_env();
1837 unsafe { std::env::remove_var("ONTOLOGY_RESOLVER") };
1838
1839 assert_eq!(s.ontology_resolver, "custom");
1840 }
1841
1842 #[test]
1843 #[serial_test::serial]
1844 fn overlay_picks_up_ontology_matching_strategy() {
1845 unsafe { std::env::set_var("ONTOLOGY_MATCHING_STRATEGY", "exact") };
1847 let mut s = Settings::default();
1848 s.overlay_from_env();
1849 unsafe { std::env::remove_var("ONTOLOGY_MATCHING_STRATEGY") };
1850
1851 assert_eq!(s.ontology_matching_strategy, "exact");
1852 }
1853
1854 #[test]
1855 #[serial_test::serial]
1856 fn overlay_ignores_empty_ontology_file_path() {
1857 unsafe { std::env::set_var("ONTOLOGY_FILE_PATH", "") };
1859 let mut s = Settings::default();
1860 s.overlay_from_env();
1861 unsafe { std::env::remove_var("ONTOLOGY_FILE_PATH") };
1862
1863 assert_eq!(s.ontology_file_path, "");
1866 }
1867
1868 #[test]
1869 #[serial_test::serial]
1870 fn overlay_picks_up_cache_backend() {
1871 unsafe { std::env::set_var("CACHE_BACKEND", "redis") };
1873 let mut s = Settings::default();
1874 s.overlay_from_env();
1875 unsafe { std::env::remove_var("CACHE_BACKEND") };
1876
1877 assert_eq!(s.cache_backend, "redis");
1878 }
1879
1880 #[test]
1881 #[serial_test::serial]
1882 fn overlay_llm_max_completion_tokens_primary() {
1883 unsafe { std::env::set_var("LLM_MAX_COMPLETION_TOKENS", "4096") };
1886 unsafe { std::env::set_var("LLM_MAX_TOKENS", "8192") };
1887 let mut s = Settings::default();
1888 s.overlay_from_env();
1889 unsafe { std::env::remove_var("LLM_MAX_COMPLETION_TOKENS") };
1890 unsafe { std::env::remove_var("LLM_MAX_TOKENS") };
1891
1892 assert_eq!(s.llm_max_completion_tokens, 4096);
1893 }
1894
1895 #[test]
1896 #[serial_test::serial]
1897 fn overlay_llm_max_completion_tokens_alias_fallback() {
1898 unsafe { std::env::remove_var("LLM_MAX_COMPLETION_TOKENS") };
1901 unsafe { std::env::set_var("LLM_MAX_TOKENS", "2048") };
1902 let mut s = Settings::default();
1903 s.overlay_from_env();
1904 unsafe { std::env::remove_var("LLM_MAX_TOKENS") };
1905
1906 assert_eq!(s.llm_max_completion_tokens, 2048);
1907 }
1908
1909 #[test]
1910 #[serial_test::serial]
1911 fn overlay_llm_streaming_bool_parsing() {
1912 for (input, expected) in [
1914 ("true", true),
1915 ("True", true),
1916 ("TRUE", true),
1917 ("1", true),
1918 ("yes", true),
1919 ("false", false),
1920 ("0", false),
1921 ("no", false),
1922 ] {
1923 unsafe { std::env::set_var("LLM_STREAMING", input) };
1924 let mut s = Settings::default();
1925 s.overlay_from_env();
1926 unsafe { std::env::remove_var("LLM_STREAMING") };
1927
1928 assert_eq!(
1929 s.llm_streaming, expected,
1930 "LLM_STREAMING={input} should parse to {expected}"
1931 );
1932 }
1933 }
1934
1935 #[test]
1938 fn config_manager_version_starts_at_zero() {
1939 let cm = ConfigManager::new(Settings::default());
1940 assert_eq!(cm.version(), 0);
1941 }
1942
1943 #[test]
1944 fn config_manager_setter_bumps_version() {
1945 let cm = ConfigManager::new(Settings::default());
1946 cm.set_llm_model("gpt-4o");
1947 assert_eq!(cm.version(), 1);
1948 assert_eq!(cm.read().llm_model, "gpt-4o");
1949
1950 cm.set_llm_api_key("sk-test");
1951 assert_eq!(cm.version(), 2);
1952 assert_eq!(cm.read().llm_api_key, "sk-test");
1953 }
1954
1955 #[test]
1956 fn config_manager_clone_shares_state() {
1957 let cm1 = ConfigManager::new(Settings::default());
1958 let cm2 = cm1.clone();
1959
1960 cm1.set_llm_model("shared-model");
1961 assert_eq!(cm2.read().llm_model, "shared-model");
1962 assert_eq!(cm2.version(), 1);
1963 }
1964
1965 #[test]
1966 fn config_manager_cascading_system_root() {
1967 let settings = Settings {
1968 system_root_directory: "/old/root".to_string(),
1969 graph_file_path: "/old/root/graph".to_string(),
1970 vector_db_url: "/old/root/vectors".to_string(),
1971 ..Default::default()
1972 };
1973
1974 let cm = ConfigManager::new(settings);
1975 cm.set_system_root_directory("/new/root");
1976
1977 let s = cm.read();
1978 assert_eq!(s.system_root_directory, "/new/root");
1979 assert_eq!(s.graph_file_path, "/new/root/graph");
1980 assert_eq!(s.vector_db_url, "/new/root/vectors");
1981 }
1982
1983 #[test]
1984 fn config_manager_cascading_empty_graph_and_vector() {
1985 let cm = ConfigManager::new(Settings::default());
1988 cm.set_system_root_directory("/data/cognee");
1989
1990 let s = cm.read();
1991 assert_eq!(s.graph_file_path, "/data/cognee/graph");
1992 assert_eq!(s.vector_db_url, "/data/cognee/vectors");
1993 }
1994
1995 #[test]
1996 fn config_manager_no_cascade_when_custom_paths() {
1997 let settings = Settings {
1998 system_root_directory: "/old".to_string(),
1999 graph_file_path: "/custom/graph".to_string(), vector_db_url: "/custom/vectors".to_string(), ..Default::default()
2002 };
2003
2004 let cm = ConfigManager::new(settings);
2005 cm.set_system_root_directory("/new");
2006
2007 let s = cm.read();
2008 assert_eq!(s.graph_file_path, "/custom/graph");
2010 assert_eq!(s.vector_db_url, "/custom/vectors");
2011 }
2012
2013 #[test]
2014 fn config_manager_generic_set_string() {
2015 let cm = ConfigManager::new(Settings::default());
2016 cm.set("llm_model", serde_json::Value::String("test-model".into()))
2017 .expect("set should succeed");
2018 assert_eq!(cm.read().llm_model, "test-model");
2019 }
2020
2021 #[test]
2022 fn config_manager_generic_set_u32() {
2023 let cm = ConfigManager::new(Settings::default());
2024 cm.set("chunk_size", serde_json::json!(2048))
2025 .expect("set should succeed");
2026 assert_eq!(cm.read().chunk_size, 2048);
2027 }
2028
2029 #[test]
2030 fn config_manager_generic_set_unknown_key() {
2031 let cm = ConfigManager::new(Settings::default());
2032 let result = cm.set("nonexistent_key", serde_json::json!("value"));
2033 assert!(result.is_err());
2034 match result.unwrap_err() {
2035 ConfigError::UnknownKey(k) => assert_eq!(k, "nonexistent_key"),
2036 other => panic!("expected UnknownKey, got: {other}"),
2037 }
2038 }
2039
2040 #[test]
2041 #[serial_test::serial]
2042 fn overlay_enable_backend_access_control() {
2043 unsafe { std::env::set_var("ENABLE_BACKEND_ACCESS_CONTROL", "true") };
2045 let mut s = Settings::default();
2046 s.overlay_from_env();
2047 unsafe { std::env::remove_var("ENABLE_BACKEND_ACCESS_CONTROL") };
2048
2049 assert!(s.enable_access_control);
2050
2051 unsafe { std::env::set_var("ENABLE_BACKEND_ACCESS_CONTROL", "1") };
2053 let mut s2 = Settings::default();
2054 s2.overlay_from_env();
2055 unsafe { std::env::remove_var("ENABLE_BACKEND_ACCESS_CONTROL") };
2056
2057 assert!(s2.enable_access_control);
2058 }
2059
2060 #[test]
2061 fn config_manager_generic_set_type_mismatch() {
2062 let cm = ConfigManager::new(Settings::default());
2063 let result = cm.set("chunk_size", serde_json::json!("not a number"));
2064 assert!(result.is_err());
2065 match result.unwrap_err() {
2066 ConfigError::TypeMismatch { key, .. } => assert_eq!(key, "chunk_size"),
2067 other => panic!("expected TypeMismatch, got: {other}"),
2068 }
2069 }
2070
2071 #[test]
2072 fn config_manager_bulk_llm_config() {
2073 let cm = ConfigManager::new(Settings::default());
2074 let mut map = HashMap::new();
2075 map.insert("llm_model".into(), serde_json::json!("gpt-4o"));
2076 map.insert("llm_provider".into(), serde_json::json!("openai"));
2077 cm.set_llm_config(&map).expect("bulk set should succeed");
2078
2079 let s = cm.read();
2080 assert_eq!(s.llm_model, "gpt-4o");
2081 assert_eq!(s.llm_provider, "openai");
2082 }
2083
2084 #[test]
2085 fn config_manager_bulk_embedding_config() {
2086 let cm = ConfigManager::new(Settings::default());
2087 let mut map = HashMap::new();
2088 map.insert("embedding_provider".into(), serde_json::json!("openai"));
2089 map.insert("embedding_dimensions".into(), serde_json::json!(1536));
2090 cm.set_embedding_config(&map)
2091 .expect("bulk set should succeed");
2092
2093 let s = cm.read();
2094 assert_eq!(s.embedding_provider, "openai");
2095 assert_eq!(s.embedding_dimensions, 1536);
2096 }
2097
2098 #[test]
2101 fn config_manager_new_granular_setters_bump_version() {
2102 let cm = ConfigManager::new(Settings::default());
2103 let mut expected_version = 0u64;
2104
2105 cm.set_llm_api_version("2024-02-15");
2106 expected_version += 1;
2107 assert_eq!(cm.read().llm_api_version, "2024-02-15");
2108
2109 cm.set_llm_temperature(0.7);
2110 expected_version += 1;
2111 assert!((cm.read().llm_temperature - 0.7).abs() < f64::EPSILON);
2112
2113 cm.set_llm_streaming(true);
2114 expected_version += 1;
2115 assert!(cm.read().llm_streaming);
2116
2117 cm.set_llm_max_completion_tokens(2048);
2118 expected_version += 1;
2119 assert_eq!(cm.read().llm_max_completion_tokens, 2048);
2120
2121 cm.set_llm_max_retries(5);
2122 expected_version += 1;
2123 assert_eq!(cm.read().llm_max_retries, 5);
2124
2125 cm.set_llm_max_parallel_requests(8);
2126 expected_version += 1;
2127 assert_eq!(cm.read().llm_max_parallel_requests, 8);
2128
2129 cm.set_embedding_model_path("/models/m.onnx");
2130 expected_version += 1;
2131 assert_eq!(cm.read().embedding_model_path, "/models/m.onnx");
2132
2133 cm.set_embedding_tokenizer_path("/models/t.json");
2134 expected_version += 1;
2135 assert_eq!(cm.read().embedding_tokenizer_path, "/models/t.json");
2136
2137 cm.set_vector_db_host("localhost");
2138 expected_version += 1;
2139 assert_eq!(cm.read().vector_db_host, "localhost");
2140
2141 cm.set_vector_db_port(6333);
2142 expected_version += 1;
2143 assert_eq!(cm.read().vector_db_port, 6333);
2144
2145 cm.set_vector_db_name("my_collection");
2146 expected_version += 1;
2147 assert_eq!(cm.read().vector_db_name, "my_collection");
2148
2149 cm.set_graph_file_path("/data/graph");
2150 expected_version += 1;
2151 assert_eq!(cm.read().graph_file_path, "/data/graph");
2152
2153 cm.set_cache_root_directory("/tmp/cache");
2154 expected_version += 1;
2155 assert_eq!(cm.read().cache_root_directory, "/tmp/cache");
2156
2157 cm.set_logs_root_directory("/tmp/logs");
2158 expected_version += 1;
2159 assert_eq!(cm.read().logs_root_directory, "/tmp/logs");
2160
2161 cm.set_ontology_file_path("/onto.owl");
2162 expected_version += 1;
2163 assert_eq!(cm.read().ontology_file_path, "/onto.owl");
2164
2165 cm.set_ontology_resolver("custom");
2166 expected_version += 1;
2167 assert_eq!(cm.read().ontology_resolver, "custom");
2168
2169 cm.set_ontology_matching_strategy("exact");
2170 expected_version += 1;
2171 assert_eq!(cm.read().ontology_matching_strategy, "exact");
2172
2173 assert_eq!(cm.version(), expected_version);
2175 }
2176
2177 #[test]
2178 fn config_manager_set_graph_file_path_does_not_cascade() {
2179 let settings = Settings {
2180 system_root_directory: "/root".to_string(),
2181 vector_db_url: "/root/vectors".to_string(),
2182 ..Default::default()
2183 };
2184 let cm = ConfigManager::new(settings);
2185 cm.set_graph_file_path("/elsewhere/graph");
2186
2187 let s = cm.read();
2188 assert_eq!(s.graph_file_path, "/elsewhere/graph");
2189 assert_eq!(s.vector_db_url, "/root/vectors");
2191 assert_eq!(s.system_root_directory, "/root");
2192 }
2193
2194 #[test]
2197 fn config_manager_generic_set_new_keys() {
2198 let cm = ConfigManager::new(Settings::default());
2199
2200 cm.set("llm_temperature", serde_json::json!(0.5))
2201 .expect("llm_temperature should be settable");
2202 assert!((cm.read().llm_temperature - 0.5).abs() < f64::EPSILON);
2203
2204 cm.set("llm_streaming", serde_json::json!(true))
2205 .expect("llm_streaming should be settable");
2206 assert!(cm.read().llm_streaming);
2207
2208 cm.set("llm_max_retries", serde_json::json!(7))
2209 .expect("llm_max_retries should be settable");
2210 assert_eq!(cm.read().llm_max_retries, 7);
2211
2212 cm.set("vector_db_host", serde_json::json!("host"))
2213 .expect("vector_db_host should be settable");
2214 assert_eq!(cm.read().vector_db_host, "host");
2215
2216 cm.set("vector_db_port", serde_json::json!(6333))
2217 .expect("vector_db_port should be settable");
2218 assert_eq!(cm.read().vector_db_port, 6333);
2219
2220 cm.set("graph_file_path", serde_json::json!("/g"))
2221 .expect("graph_file_path should be settable");
2222 assert_eq!(cm.read().graph_file_path, "/g");
2223
2224 cm.set("cache_root_directory", serde_json::json!("/c"))
2225 .expect("cache_root_directory should be settable");
2226 assert_eq!(cm.read().cache_root_directory, "/c");
2227
2228 cm.set("logs_root_directory", serde_json::json!("/l"))
2229 .expect("logs_root_directory should be settable");
2230 assert_eq!(cm.read().logs_root_directory, "/l");
2231
2232 cm.set("ontology_file_path", serde_json::json!("/o.owl"))
2233 .expect("ontology_file_path should be settable");
2234 assert_eq!(cm.read().ontology_file_path, "/o.owl");
2235
2236 cm.set("embedding_model_path", serde_json::json!("/m.onnx"))
2237 .expect("embedding_model_path should be settable");
2238 assert_eq!(cm.read().embedding_model_path, "/m.onnx");
2239 }
2240
2241 #[test]
2242 fn config_manager_generic_set_u16_type_mismatch() {
2243 let cm = ConfigManager::new(Settings::default());
2244 let result = cm.set("vector_db_port", serde_json::json!("not a number"));
2245 match result.unwrap_err() {
2246 ConfigError::TypeMismatch { key, .. } => assert_eq!(key, "vector_db_port"),
2247 other => panic!("expected TypeMismatch, got: {other}"),
2248 }
2249 }
2250
2251 #[test]
2254 fn config_manager_bulk_llm_config_new_keys() {
2255 let cm = ConfigManager::new(Settings::default());
2256 let mut map = HashMap::new();
2257 map.insert("llm_streaming".into(), serde_json::json!(true));
2258 map.insert("llm_max_retries".into(), serde_json::json!(9));
2259 map.insert("llm_max_parallel_requests".into(), serde_json::json!(3));
2260 cm.set_llm_config(&map).expect("bulk set should succeed");
2261
2262 let s = cm.read();
2263 assert!(s.llm_streaming);
2264 assert_eq!(s.llm_max_retries, 9);
2265 assert_eq!(s.llm_max_parallel_requests, 3);
2266 }
2267
2268 #[test]
2269 fn config_manager_bulk_vector_db_config_new_keys() {
2270 let cm = ConfigManager::new(Settings::default());
2271 let mut map = HashMap::new();
2272 map.insert("vector_db_host".into(), serde_json::json!("vhost"));
2273 map.insert("vector_db_port".into(), serde_json::json!(6333));
2274 map.insert("vector_db_name".into(), serde_json::json!("coll"));
2275 cm.set_vector_db_config(&map)
2276 .expect("bulk set should succeed");
2277
2278 let s = cm.read();
2279 assert_eq!(s.vector_db_host, "vhost");
2280 assert_eq!(s.vector_db_port, 6333);
2281 assert_eq!(s.vector_db_name, "coll");
2282 }
2283
2284 #[test]
2285 fn config_manager_bulk_embedding_config_new_keys() {
2286 let cm = ConfigManager::new(Settings::default());
2287 let mut map = HashMap::new();
2288 map.insert("embedding_model_path".into(), serde_json::json!("/m.onnx"));
2289 map.insert(
2290 "embedding_tokenizer_path".into(),
2291 serde_json::json!("/t.json"),
2292 );
2293 cm.set_embedding_config(&map)
2294 .expect("bulk set should succeed");
2295
2296 let s = cm.read();
2297 assert_eq!(s.embedding_model_path, "/m.onnx");
2298 assert_eq!(s.embedding_tokenizer_path, "/t.json");
2299 }
2300
2301 #[test]
2302 fn config_manager_bulk_llm_config_rejects_out_of_subset_key() {
2303 let cm = ConfigManager::new(Settings::default());
2305 let mut map = HashMap::new();
2306 map.insert("vector_db_url".into(), serde_json::json!("/v"));
2307 match cm.set_llm_config(&map).unwrap_err() {
2308 ConfigError::UnknownKey(k) => assert_eq!(k, "vector_db_url"),
2309 other => panic!("expected UnknownKey, got: {other}"),
2310 }
2311 }
2312
2313 #[test]
2314 fn config_manager_embedding_fields_default() {
2315 let s = Settings::default();
2316 #[cfg(not(target_os = "android"))]
2318 {
2319 assert_eq!(s.embedding_provider, "openai");
2320 assert_eq!(s.embedding_model_name, "text-embedding-3-small");
2321 assert_eq!(s.embedding_dimensions, 1536);
2322 }
2323 #[cfg(target_os = "android")]
2324 {
2325 assert_eq!(s.embedding_provider, "onnx");
2326 assert_eq!(s.embedding_dimensions, 384);
2327 }
2328 assert_eq!(s.embedding_endpoint, "");
2331 assert_eq!(s.embedding_api_key, "");
2332 }
2333
2334 #[test]
2335 #[serial_test::serial]
2336 fn overlay_picks_up_embedding_provider() {
2337 unsafe { std::env::set_var("EMBEDDING_PROVIDER", "openai") };
2339 let mut s = Settings::default();
2340 s.overlay_from_env();
2341 unsafe { std::env::remove_var("EMBEDDING_PROVIDER") };
2342
2343 assert_eq!(s.embedding_provider, "openai");
2344 }
2345
2346 #[test]
2347 #[serial_test::serial]
2348 fn overlay_picks_up_log_level() {
2349 unsafe { std::env::set_var("LOG_LEVEL", "debug") };
2351 let mut s = Settings::default();
2352 s.overlay_from_env();
2353 unsafe { std::env::remove_var("LOG_LEVEL") };
2354
2355 assert_eq!(s.log_level, "debug");
2356 }
2357
2358 #[test]
2359 #[serial_test::serial]
2360 fn overlay_picks_up_cognee_logs_dir() {
2361 unsafe { std::env::set_var("COGNEE_LOGS_DIR", "/tmp/logs") };
2363 let mut s = Settings::default();
2364 s.overlay_from_env();
2365 unsafe { std::env::remove_var("COGNEE_LOGS_DIR") };
2366
2367 assert_eq!(s.logs_root_directory, "/tmp/logs");
2368 }
2369
2370 #[test]
2371 #[serial_test::serial]
2372 fn overlay_picks_up_cache_root_directory() {
2373 unsafe { std::env::set_var("CACHE_ROOT_DIRECTORY", "/tmp/cache") };
2375 let mut s = Settings::default();
2376 s.overlay_from_env();
2377 unsafe { std::env::remove_var("CACHE_ROOT_DIRECTORY") };
2378
2379 assert_eq!(s.cache_root_directory, "/tmp/cache");
2380 }
2381
2382 #[test]
2383 #[serial_test::serial]
2384 fn overlay_picks_up_enable_last_accessed() {
2385 unsafe { std::env::set_var("ENABLE_LAST_ACCESSED", "yes") };
2387 let mut s = Settings::default();
2388 s.overlay_from_env();
2389 unsafe { std::env::remove_var("ENABLE_LAST_ACCESSED") };
2390
2391 assert!(s.enable_last_accessed);
2392 }
2393
2394 #[test]
2395 #[serial_test::serial]
2396 fn overlay_picks_up_otel_service_name() {
2397 unsafe { std::env::set_var("OTEL_SERVICE_NAME", "my-service") };
2399 let mut s = Settings::default();
2400 s.overlay_from_env();
2401 unsafe { std::env::remove_var("OTEL_SERVICE_NAME") };
2402
2403 assert_eq!(s.otel_service_name, "my-service");
2404 }
2405
2406 #[test]
2407 #[serial_test::serial]
2408 fn overlay_picks_up_otel_exporter_otlp_endpoint() {
2409 unsafe { std::env::set_var("OTEL_EXPORTER_OTLP_ENDPOINT", "http://collector:4317") };
2411 let mut s = Settings::default();
2412 s.overlay_from_env();
2413 unsafe { std::env::remove_var("OTEL_EXPORTER_OTLP_ENDPOINT") };
2414
2415 assert_eq!(s.otel_exporter_otlp_endpoint, "http://collector:4317");
2416 }
2417
2418 #[test]
2419 #[serial_test::serial]
2420 fn overlay_picks_up_otel_exporter_otlp_headers() {
2421 unsafe {
2423 std::env::set_var(
2424 "OTEL_EXPORTER_OTLP_HEADERS",
2425 "authorization=Bearer abc,x-trace=on",
2426 )
2427 };
2428 let mut s = Settings::default();
2429 s.overlay_from_env();
2430 unsafe { std::env::remove_var("OTEL_EXPORTER_OTLP_HEADERS") };
2431
2432 assert_eq!(
2433 s.otel_exporter_otlp_headers,
2434 "authorization=Bearer abc,x-trace=on"
2435 );
2436 }
2437
2438 #[test]
2439 #[serial_test::serial]
2440 fn overlay_picks_up_otel_exporter_otlp_protocol() {
2441 unsafe { std::env::set_var("OTEL_EXPORTER_OTLP_PROTOCOL", "http/protobuf") };
2443 let mut s = Settings::default();
2444 s.overlay_from_env();
2445 unsafe { std::env::remove_var("OTEL_EXPORTER_OTLP_PROTOCOL") };
2446
2447 assert_eq!(s.otel_exporter_otlp_protocol, "http/protobuf");
2448 }
2449
2450 #[test]
2451 #[serial_test::serial]
2452 fn overlay_picks_up_otel_span_processor() {
2453 unsafe { std::env::set_var("OTEL_SPAN_PROCESSOR", "simple") };
2455 let mut s = Settings::default();
2456 s.overlay_from_env();
2457 unsafe { std::env::remove_var("OTEL_SPAN_PROCESSOR") };
2458
2459 assert_eq!(s.otel_span_processor, "simple");
2460 }
2461
2462 #[test]
2463 #[serial_test::serial]
2464 fn overlay_picks_up_otel_traces_sampler() {
2465 unsafe { std::env::set_var("OTEL_TRACES_SAMPLER", "parentbased_traceidratio") };
2467 unsafe { std::env::set_var("OTEL_TRACES_SAMPLER_ARG", "0.25") };
2468 let mut s = Settings::default();
2469 s.overlay_from_env();
2470 unsafe { std::env::remove_var("OTEL_TRACES_SAMPLER") };
2471 unsafe { std::env::remove_var("OTEL_TRACES_SAMPLER_ARG") };
2472
2473 assert_eq!(s.otel_traces_sampler, "parentbased_traceidratio");
2474 assert_eq!(s.otel_traces_sampler_arg, "0.25");
2475 }
2476
2477 #[test]
2478 #[serial_test::serial]
2479 fn overlay_picks_up_rate_limit_requests() {
2480 unsafe { std::env::set_var("LLM_RATE_LIMIT_REQUESTS", "120") };
2482 unsafe { std::env::set_var("EMBEDDING_RATE_LIMIT_REQUESTS", "30") };
2483 let mut s = Settings::default();
2484 s.overlay_from_env();
2485 unsafe { std::env::remove_var("LLM_RATE_LIMIT_REQUESTS") };
2486 unsafe { std::env::remove_var("EMBEDDING_RATE_LIMIT_REQUESTS") };
2487
2488 assert_eq!(s.llm_rate_limit_requests, 120);
2489 assert_eq!(s.embedding_rate_limit_requests, 30);
2490 }
2491
2492 #[test]
2493 #[serial_test::serial]
2494 fn overlay_picks_up_storage_backend() {
2495 unsafe { std::env::set_var("STORAGE_BACKEND", "s3") };
2497 unsafe { std::env::set_var("STORAGE_BUCKET_NAME", "my-bucket") };
2498 let mut s = Settings::default();
2499 s.overlay_from_env();
2500 unsafe { std::env::remove_var("STORAGE_BACKEND") };
2501 unsafe { std::env::remove_var("STORAGE_BUCKET_NAME") };
2502
2503 assert_eq!(s.storage_backend, "s3");
2504 assert_eq!(s.storage_bucket_name, "my-bucket");
2505 }
2506
2507 #[test]
2508 fn default_values_are_correct() {
2509 let s = Settings::default();
2510 assert_eq!(s.cache_backend, "fs");
2511 assert_eq!(s.cache_host, "localhost");
2512 assert_eq!(s.cache_port, 6379);
2513 assert_eq!(s.session_ttl_seconds, 604800);
2514 assert!(s.enable_caching);
2515 assert!(!s.auto_feedback);
2516 assert!(!s.enable_access_control);
2517 assert_eq!(s.log_level, "info");
2518 assert!(!s.llm_rate_limit_enabled);
2519 assert_eq!(s.llm_rate_limit_requests, 60);
2520 assert_eq!(s.llm_rate_limit_interval, 60);
2521 assert!(!s.embedding_rate_limit_enabled);
2522 assert_eq!(s.embedding_rate_limit_requests, 60);
2523 assert_eq!(s.embedding_rate_limit_interval, 60);
2524 assert_eq!(s.storage_backend, "local");
2525 assert!(!s.cognee_tracing_enabled);
2526 assert_eq!(s.otel_service_name, "cognee");
2527 assert_eq!(s.otel_exporter_otlp_endpoint, "");
2528 assert_eq!(s.otel_exporter_otlp_headers, "");
2529 assert_eq!(s.otel_exporter_otlp_protocol, "grpc");
2530 assert_eq!(s.otel_span_processor, "batch");
2531 assert_eq!(s.otel_traces_sampler, "");
2532 assert_eq!(s.otel_traces_sampler_arg, "");
2533 assert!(!s.enable_last_accessed);
2534 #[cfg(not(target_os = "android"))]
2535 assert_eq!(s.embedding_provider, "openai");
2536 #[cfg(target_os = "android")]
2537 assert_eq!(s.embedding_provider, "onnx");
2538 }
2539
2540 #[test]
2541 #[serial_test::serial]
2542 fn overlay_picks_up_embedding_endpoint() {
2543 unsafe { std::env::set_var("EMBEDDING_ENDPOINT", "https://api.example.com/embed") };
2545 let mut s = Settings::default();
2546 s.overlay_from_env();
2547 unsafe { std::env::remove_var("EMBEDDING_ENDPOINT") };
2548
2549 assert_eq!(s.embedding_endpoint, "https://api.example.com/embed");
2550 }
2551
2552 #[test]
2553 fn telemetry_snapshot_only_emits_allowlisted_keys() {
2554 let cfg = Settings::default();
2555 let snap = cfg.telemetry_snapshot();
2556 let keys: std::collections::BTreeSet<&str> = snap.keys().map(String::as_str).collect();
2557 let expected: std::collections::BTreeSet<&str> = [
2558 "sdk_runtime",
2559 "vector_db_provider",
2560 "graph_db_provider",
2561 "relational_db_provider",
2562 "llm_provider",
2563 "llm_model",
2564 "embedding_provider",
2565 "embedding_model",
2566 "embedding_dimensions",
2567 "chunk_strategy",
2568 ]
2569 .iter()
2570 .copied()
2571 .collect();
2572 assert_eq!(
2573 keys, expected,
2574 "telemetry_snapshot must not leak fields outside the allowlist"
2575 );
2576 }
2577
2578 #[test]
2579 fn telemetry_snapshot_redacts_credentials_and_urls() {
2580 let cfg = Settings {
2581 llm_api_key: "sk-secret".into(),
2582 embedding_api_key: "sk-also-secret".into(),
2583 vector_db_password: "vector-pass".into(),
2584 db_password: "db-pass".into(),
2585 relational_db_url: "postgres://user:pass@host/db".into(),
2586 embedding_endpoint: "https://internal.example/v1/embed".into(),
2587 ..Settings::default()
2588 };
2589
2590 let snap = cfg.telemetry_snapshot();
2591 let json =
2592 serde_json::to_string(&snap).expect("serde_json::Map<String,Value> always serializes");
2593 for forbidden in [
2594 "sk-secret",
2595 "sk-also-secret",
2596 "vector-pass",
2597 "db-pass",
2598 "postgres://",
2599 "internal.example",
2600 ] {
2601 assert!(
2602 !json.contains(forbidden),
2603 "telemetry_snapshot leaked credential/URL substring: {forbidden}"
2604 );
2605 }
2606 }
2607
2608 #[test]
2609 fn telemetry_snapshot_carries_sdk_runtime_rust() {
2610 let cfg = Settings::default();
2611 let snap = cfg.telemetry_snapshot();
2612 assert_eq!(
2613 snap.get("sdk_runtime"),
2614 Some(&serde_json::Value::String("rust".into()))
2615 );
2616 }
2617
2618 #[test]
2619 fn test_config_defaults_match_expected_values() {
2620 let settings = Settings::default();
2621 assert_eq!(settings.graph_database_provider, "ladybug");
2622 assert_eq!(settings.logs_root_directory, "./logs");
2623 }
2624
2625 #[test]
2626 fn test_get_settings_masks_secrets() {
2627 let cfg = ConfigManager::new(Settings::default());
2628 cfg.set_llm_api_key("my-secret-key");
2629 let settings = cfg.get_settings();
2630 let api_key = settings
2631 .get("llm_api_key")
2632 .and_then(|v| v.as_str())
2633 .unwrap_or("");
2634 assert_ne!(api_key, "my-secret-key", "API key must be masked");
2635 assert!(!api_key.is_empty(), "api_key field must be non-empty");
2639 }
2640
2641 #[test]
2642 fn test_get_settings_masks_url_credentials() {
2643 let cfg = ConfigManager::new(Settings::default());
2644 cfg.set_relational_db_url("postgres://admin:s3cret@db.example.com:5432/cognee");
2645 let settings = cfg.get_settings();
2646 let url = settings
2647 .get("relational_db_url")
2648 .and_then(|v| v.as_str())
2649 .unwrap_or("");
2650 assert!(
2651 !url.contains("s3cret") && !url.contains("admin"),
2652 "URL credentials must be masked, got: {url}"
2653 );
2654 assert!(
2655 url.contains("db.example.com") && url.contains("<redacted>"),
2656 "host must remain and userinfo redacted, got: {url}"
2657 );
2658 let cfg2 = ConfigManager::new(Settings::default());
2660 cfg2.set_relational_db_url("sqlite:///tmp/test.db");
2661 let s2 = cfg2.get_settings();
2662 assert_eq!(
2663 s2.get("relational_db_url").and_then(|v| v.as_str()),
2664 Some("sqlite:///tmp/test.db")
2665 );
2666 }
2667
2668 #[test]
2669 fn test_set_relational_db_config_bulk() {
2670 let cfg = ConfigManager::new(Settings::default());
2671 cfg.set_relational_db_config(
2672 Some("sqlite:///tmp/test.db"),
2673 Some("sqlite"),
2674 None,
2675 None,
2676 None,
2677 None,
2678 None,
2679 );
2680 let s = cfg.read();
2681 assert_eq!(s.relational_db_url, "sqlite:///tmp/test.db");
2682 assert_eq!(s.db_provider, "sqlite");
2683 }
2684
2685 #[test]
2686 fn test_llm_fallback_setters() {
2687 let cfg = ConfigManager::new(Settings::default());
2688 cfg.set_llm_fallback_model("gpt-4o-mini");
2689 cfg.set_llm_fallback_provider("openai");
2690 cfg.set_llm_fallback_endpoint("https://fallback.example.com/v1");
2691 cfg.set_llm_fallback_api_key("fallback-key");
2692 let s = cfg.read();
2693 assert_eq!(s.llm_fallback_model, "gpt-4o-mini");
2694 assert_eq!(s.llm_fallback_provider, "openai");
2695 assert_eq!(s.llm_fallback_endpoint, "https://fallback.example.com/v1");
2696 assert_eq!(s.llm_fallback_api_key, "fallback-key");
2697 }
2698
2699 #[test]
2700 fn test_embedding_api_version_setter() {
2701 let cfg = ConfigManager::new(Settings::default());
2702 cfg.set_embedding_api_version("2024-02-15");
2703 assert_eq!(cfg.read().embedding_api_version, "2024-02-15");
2704 }
2705
2706 #[test]
2707 fn test_transcription_model_setter() {
2708 let cfg = ConfigManager::new(Settings::default());
2709 cfg.set_transcription_model("whisper-1");
2710 assert_eq!(cfg.read().transcription_model, "whisper-1");
2711 }
2712
2713 #[test]
2714 fn test_migration_db_config_setter() {
2715 let cfg = ConfigManager::new(Settings::default());
2716 cfg.set_migration_db_config("postgres://localhost/migrations");
2717 assert_eq!(
2718 cfg.read().migration_db_url,
2719 "postgres://localhost/migrations"
2720 );
2721 }
2722}