Skip to main content

cognee_lib/
config.rs

1#![allow(
2    clippy::expect_used,
3    reason = "RwLock/Mutex expect calls — lock poison is unrecoverable"
4)]
5//! Shared configuration types for cognee-rust.
6
7use std::collections::HashMap;
8use std::sync::atomic::{AtomicU64, Ordering};
9use std::sync::{Arc, RwLock, RwLockReadGuard};
10
11use serde::{Deserialize, Serialize};
12
13pub const DEFAULT_SYSTEM_PROMPT_PATH: &str = "answer_simple_question.txt";
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
16#[serde(default)]
17pub struct Settings {
18    pub default_user_id: String,
19    pub default_dataset_name: String,
20
21    pub system_root_directory: String,
22    pub data_root_directory: String,
23    pub cache_root_directory: String,
24    pub logs_root_directory: String,
25    pub monitoring_tool: String,
26
27    pub classification_model: String,
28    pub summarization_model: String,
29    pub graph_model: String,
30
31    /// Custom JSON schema for summarization output (Python `summarization_model` parity).
32    /// `#[serde(skip)]` keeps config snapshots stable.
33    #[serde(skip)]
34    pub summarization_schema: Option<serde_json::Value>,
35
36    pub llm_provider: String,
37    pub llm_model: String,
38    pub llm_api_key: String,
39    pub llm_endpoint: String,
40    pub llm_api_version: String,
41    pub llm_temperature: f64,
42    pub llm_streaming: bool,
43    pub llm_max_completion_tokens: u32,
44    pub llm_max_retries: u32,
45    pub llm_max_parallel_requests: u32,
46
47    /// Select the record/replay mock LLM instead of the real provider
48    /// (`MOCK_LLM`). Parallels `MOCK_EMBEDDING`. Requires the `mock-llm` feature.
49    pub llm_mock: bool,
50    /// Cassette path for the replay mock when `llm_mock` is set (`MOCK_LLM_CASSETTE`).
51    /// Empty = unset.
52    pub llm_cassette: String,
53    /// When non-empty, wrap the real adapter in a recording mock that writes a
54    /// cassette to this path (`COGNEE_RECORD_LLM`). Empty = unset.
55    pub llm_record_path: String,
56
57    pub graph_prompt_path: String,
58
59    // -- LLM fallback -----------------------------------------------------------
60    /// Fallback LLM model name used when the primary model fails.
61    pub llm_fallback_model: String,
62    /// Fallback LLM provider (e.g. `"openai"`, `"ollama"`).
63    pub llm_fallback_provider: String,
64    /// Base URL for the fallback LLM API endpoint.
65    pub llm_fallback_endpoint: String,
66    /// API key for the fallback LLM provider.
67    pub llm_fallback_api_key: String,
68
69    pub graph_database_provider: String,
70    pub graph_database_url: String,
71    pub graph_database_name: String,
72    pub graph_database_username: String,
73    pub graph_database_password: String,
74    pub graph_database_port: u16,
75    pub graph_database_host: String,
76    pub graph_database_key: String,
77    pub graph_file_path: String,
78    pub graph_filename: String,
79
80    pub vector_db_provider: String,
81    pub vector_db_url: String,
82    pub vector_db_port: u16,
83    pub vector_db_name: String,
84    pub vector_db_key: String,
85    pub vector_db_username: String,
86    pub vector_db_password: String,
87    pub vector_db_host: String,
88
89    pub chunk_strategy: String,
90    pub chunk_engine: String,
91    pub chunk_size: u32,
92    pub chunk_overlap: u32,
93
94    pub relational_db_url: String,
95    pub migration_db_url: String,
96
97    /// Selects the relational DB backend: `"sqlite"` (default) or `"postgres"`.
98    /// When set to `"postgres"`, the individual `db_host`/`db_port`/`db_name`/
99    /// `db_username`/`db_password` fields are used instead of `relational_db_url`.
100    /// Mirrors the Python `DB_PROVIDER` environment variable.
101    pub db_provider: String,
102    pub db_host: String,
103    pub db_port: u16,
104    pub db_name: String,
105    pub db_username: String,
106    pub db_password: String,
107
108    pub default_system_prompt_path: String,
109
110    pub embedding_provider: String,
111    pub embedding_model_path: String,
112    pub embedding_tokenizer_path: String,
113    pub embedding_model_name: String,
114    pub embedding_dimensions: u32,
115    pub embedding_max_sequence_length: u32,
116    pub embedding_batch_size: u32,
117    /// Embedding API endpoint URL (e.g. `https://api.openai.com/v1/embeddings`).
118    /// Maps to `EMBEDDING_ENDPOINT` env var.
119    pub embedding_endpoint: String,
120    /// Embedding API key. Maps to `EMBEDDING_API_KEY` env var (fallback: `LLM_API_KEY`).
121    pub embedding_api_key: String,
122    /// Embedding API version string (e.g. for Azure OpenAI `api-version`).
123    pub embedding_api_version: String,
124    /// Transcription model name (e.g. `"whisper-1"`).
125    pub transcription_model: String,
126
127    pub ontology_file_path: String,
128    /// Ontology resolver backend. Currently always resolved to `RdfLibOntologyResolver`
129    /// when `ontology_file_path` is set — this field is reserved for future multi-resolver
130    /// support (e.g. SPARQL endpoint). Only `"rdflib"` is implemented.
131    pub ontology_resolver: String,
132    /// Fuzzy matching strategy for entity name resolution. Currently always resolved to
133    /// `FuzzyMatchingStrategy` (Ratcliff/Obershelp gestalt) — this field is reserved for
134    /// future strategy selection. Only `"fuzzy"` is implemented.
135    pub ontology_matching_strategy: String,
136
137    // -- Session / cache ---------------------------------------------------------
138    /// Session store backend: `"fs"`, `"redis"`, or `"seaorm"`.
139    pub cache_backend: String,
140    pub cache_host: String,
141    pub cache_port: u16,
142    pub cache_username: String,
143    pub cache_password: String,
144    /// Session time-to-live in seconds (default: 604 800 = 7 days).
145    pub session_ttl_seconds: u64,
146    pub enable_caching: bool,
147    pub auto_feedback: bool,
148
149    // -- Authentication / ACL ----------------------------------------------------
150    pub default_user_email: String,
151    pub default_user_password: String,
152    pub enable_access_control: bool,
153
154    // -- Logging -----------------------------------------------------------------
155    pub log_level: String,
156
157    // -- Rate limiting -----------------------------------------------------------
158    pub llm_rate_limit_enabled: bool,
159    pub llm_rate_limit_requests: u32,
160    pub llm_rate_limit_interval: u32,
161    pub embedding_rate_limit_enabled: bool,
162    pub embedding_rate_limit_requests: u32,
163    pub embedding_rate_limit_interval: u32,
164
165    // -- Storage backend ---------------------------------------------------------
166    /// File storage backend: `"local"` or `"s3"`.
167    pub storage_backend: String,
168    pub storage_bucket_name: String,
169
170    // -- Observability -----------------------------------------------------------
171    pub cognee_tracing_enabled: bool,
172    pub otel_service_name: String,
173    pub otel_exporter_otlp_endpoint: String,
174    pub otel_exporter_otlp_headers: String,
175
176    /// OTLP transport: `"grpc"` (default) or `"http/protobuf"`.
177    /// Mirrors the OTEL spec env var `OTEL_EXPORTER_OTLP_PROTOCOL`.
178    pub otel_exporter_otlp_protocol: String,
179
180    /// Span processor mode: `"batch"` (default) or `"simple"`.
181    /// `simple` is synchronous-per-span and intended only for
182    /// debugging or for collectors known to misbehave with batches.
183    pub otel_span_processor: String,
184
185    /// Sampler name passed through to the OTEL SDK.
186    /// Empty string means: do not override; let the SDK read
187    /// `OTEL_TRACES_SAMPLER` itself (default `parentbased_always_on`).
188    /// Recognised values follow the OTEL spec:
189    /// `always_on`, `always_off`, `traceidratio`, `parentbased_always_on`,
190    /// `parentbased_always_off`, `parentbased_traceidratio`.
191    pub otel_traces_sampler: String,
192
193    /// Argument for the sampler. Currently only meaningful for the
194    /// `traceidratio` / `parentbased_traceidratio` samplers, which expect
195    /// a 0.0–1.0 ratio. Empty string means: do not override.
196    pub otel_traces_sampler_arg: String,
197
198    // -- Feature flags -----------------------------------------------------------
199    pub enable_last_accessed: bool,
200}
201
202impl Settings {
203    /// Build `Settings` entirely from environment variables (and any `.env` file).
204    ///
205    /// Equivalent to Python's `LLMConfig()` / `GraphConfig()` instantiation:
206    /// starts from defaults and overlays every env var that is set.
207    /// The `.env` file in the current working directory (or any ancestor) is loaded
208    /// automatically before env vars are read — callers do not need to call
209    /// `dotenv::dotenv()` themselves.
210    pub fn load_from_env() -> Self {
211        let mut s = Self::default();
212        s.overlay_from_env();
213        s
214    }
215
216    /// Overlay environment variables on top of `self`.
217    ///
218    /// Only fields whose corresponding env var is set are modified; everything
219    /// else keeps its current value.  The `.env` file is loaded first (idempotent —
220    /// safe to call multiple times).
221    ///
222    /// Env-var naming follows the Python SDK conventions (`LLM_*`, `EMBEDDING_*`,
223    /// `GRAPH_DATABASE_*`, `VECTOR_DB_*`, `DB_*`, `COGNEE_*`).  A handful of
224    /// Rust-specific aliases (`OPENAI_TOKEN`, `OPENAI_URL`, `OPENAI_MODEL`) are
225    /// accepted as fallbacks for backward compatibility with existing test setups.
226    pub fn overlay_from_env(&mut self) {
227        // Load .env (no-op if absent or if the vars are already in the environment).
228        let _ = dotenv::dotenv();
229
230        // Helpers ----------------------------------------------------------------
231        let str_var =
232            |name: &str| -> Option<String> { std::env::var(name).ok().filter(|v| !v.is_empty()) };
233        // Try `primary` first; fall back to `alias` if primary is unset/empty.
234        let str_alias = |primary: &str, alias: &str| -> Option<String> {
235            str_var(primary).or_else(|| str_var(alias))
236        };
237
238        // -- LLM -----------------------------------------------------------------
239        if let Some(v) = str_var("LLM_PROVIDER") {
240            self.llm_provider = v;
241        }
242        if let Some(v) = str_alias("LLM_MODEL", "OPENAI_MODEL") {
243            self.llm_model = v;
244        }
245        if let Some(v) = str_alias("LLM_API_KEY", "OPENAI_TOKEN") {
246            self.llm_api_key = v;
247        }
248        if let Some(v) = str_alias("LLM_ENDPOINT", "OPENAI_URL") {
249            self.llm_endpoint = v;
250        }
251        if let Some(v) = str_var("LLM_API_VERSION") {
252            self.llm_api_version = v;
253        }
254        if let Some(v) = str_var("LLM_TEMPERATURE")
255            && let Ok(f) = v.parse::<f64>()
256        {
257            self.llm_temperature = f;
258        }
259        if let Some(v) = str_alias("LLM_MAX_COMPLETION_TOKENS", "LLM_MAX_TOKENS")
260            && let Ok(n) = v.parse::<u32>()
261        {
262            self.llm_max_completion_tokens = n;
263        }
264        if let Some(v) = str_var("LLM_STREAMING") {
265            self.llm_streaming = cognee_utils::parse_env_bool(&v);
266        }
267        if let Some(v) = str_var("LLM_MAX_RETRIES")
268            && let Ok(n) = v.parse::<u32>()
269        {
270            self.llm_max_retries = n;
271        }
272        if let Some(v) = str_var("LLM_MAX_PARALLEL_REQUESTS")
273            && let Ok(n) = v.parse::<u32>()
274        {
275            self.llm_max_parallel_requests = n;
276        }
277        // Mirror MOCK_EMBEDDING parsing (accept true/1/yes, case-insensitive).
278        if let Some(v) = str_var("MOCK_LLM") {
279            let v = v.to_lowercase();
280            self.llm_mock = v == "true" || v == "1" || v == "yes";
281        }
282        if let Some(v) = str_var("MOCK_LLM_CASSETTE") {
283            self.llm_cassette = v;
284        }
285        if let Some(v) = str_var("COGNEE_RECORD_LLM") {
286            self.llm_record_path = v;
287        }
288
289        // -- Graph database ------------------------------------------------------
290        if let Some(v) = str_var("GRAPH_DATABASE_PROVIDER") {
291            self.graph_database_provider = v;
292        }
293        if let Some(v) = str_var("GRAPH_DATABASE_URL") {
294            self.graph_database_url = v;
295        }
296        if let Some(v) = str_var("GRAPH_DATABASE_NAME") {
297            self.graph_database_name = v;
298        }
299        if let Some(v) = str_var("GRAPH_DATABASE_USERNAME") {
300            self.graph_database_username = v;
301        }
302        if let Some(v) = str_var("GRAPH_DATABASE_PASSWORD") {
303            self.graph_database_password = v;
304        }
305        if let Some(v) = str_var("GRAPH_DATABASE_PORT")
306            && let Ok(n) = v.parse::<u16>()
307        {
308            self.graph_database_port = n;
309        }
310        if let Some(v) = str_var("GRAPH_DATABASE_HOST") {
311            self.graph_database_host = v;
312        }
313        if let Some(v) = str_var("GRAPH_DATABASE_KEY") {
314            self.graph_database_key = v;
315        }
316        if let Some(v) = str_var("GRAPH_FILE_PATH") {
317            self.graph_file_path = v;
318        }
319
320        // -- Vector database -----------------------------------------------------
321        if let Some(v) = str_var("VECTOR_DB_PROVIDER") {
322            self.vector_db_provider = v;
323        }
324        if let Some(v) = str_var("VECTOR_DB_URL") {
325            self.vector_db_url = v;
326        }
327        if let Some(v) = str_var("VECTOR_DB_PORT")
328            && let Ok(n) = v.parse::<u16>()
329        {
330            self.vector_db_port = n;
331        }
332        if let Some(v) = str_var("VECTOR_DB_NAME") {
333            self.vector_db_name = v;
334        }
335        if let Some(v) = str_var("VECTOR_DB_KEY") {
336            self.vector_db_key = v;
337        }
338        if let Some(v) = str_var("VECTOR_DB_USERNAME") {
339            self.vector_db_username = v;
340        }
341        if let Some(v) = str_var("VECTOR_DB_PASSWORD") {
342            self.vector_db_password = v;
343        }
344        if let Some(v) = str_var("VECTOR_DB_HOST") {
345            self.vector_db_host = v;
346        }
347
348        // -- Relational database -------------------------------------------------
349        if let Some(v) = str_var("DB_PROVIDER") {
350            self.db_provider = v;
351        }
352        if let Some(v) = str_var("DB_HOST") {
353            self.db_host = v;
354        }
355        if let Some(v) = str_var("DB_PORT")
356            && let Ok(n) = v.parse::<u16>()
357        {
358            self.db_port = n;
359        }
360        if let Some(v) = str_var("DB_NAME") {
361            self.db_name = v;
362        }
363        if let Some(v) = str_var("DB_USERNAME") {
364            self.db_username = v;
365        }
366        if let Some(v) = str_var("DB_PASSWORD") {
367            self.db_password = v;
368        }
369        if let Some(v) = str_var("DATABASE_URL") {
370            self.relational_db_url = v;
371        }
372
373        // -- Embedding -----------------------------------------------------------
374        if let Some(v) = str_var("EMBEDDING_PROVIDER") {
375            self.embedding_provider = v;
376        }
377        if let Some(v) = str_var("EMBEDDING_ENDPOINT") {
378            self.embedding_endpoint = v;
379        }
380        if let Some(v) = str_alias("EMBEDDING_API_KEY", "LLM_API_KEY") {
381            self.embedding_api_key = v;
382        }
383        if let Some(v) = str_var("EMBEDDING_MODEL") {
384            self.embedding_model_name = v;
385        }
386        if let Some(v) = str_var("EMBEDDING_DIMENSIONS")
387            && let Ok(n) = v.parse::<u32>()
388        {
389            self.embedding_dimensions = n;
390        }
391        if let Some(v) = str_var("EMBEDDING_BATCH_SIZE")
392            && let Ok(n) = v.parse::<u32>()
393        {
394            self.embedding_batch_size = n;
395        }
396        if let Some(v) = str_var("EMBEDDING_MAX_SEQUENCE_LENGTH")
397            && let Ok(n) = v.parse::<u32>()
398        {
399            self.embedding_max_sequence_length = n;
400        }
401        if let Some(v) = str_alias("EMBEDDING_MODEL_PATH", "COGNEE_E2E_EMBED_MODEL_PATH") {
402            self.embedding_model_path = v;
403        }
404        if let Some(v) = str_alias("EMBEDDING_TOKENIZER_PATH", "COGNEE_E2E_TOKENIZER_PATH") {
405            self.embedding_tokenizer_path = v;
406        }
407
408        // -- Base / system -------------------------------------------------------
409        if let Some(v) = str_var("COGNEE_SYSTEM_ROOT_DIRECTORY") {
410            self.system_root_directory = v;
411        }
412        if let Some(v) = str_var("COGNEE_DATA_ROOT_DIRECTORY") {
413            self.data_root_directory = v;
414        }
415        if let Some(v) = str_var("COGNEE_DEFAULT_DATASET_NAME") {
416            self.default_dataset_name = v;
417        }
418        if let Some(v) = str_var("COGNEE_DEFAULT_USER_ID") {
419            self.default_user_id = v;
420        }
421
422        // -- Ontology ------------------------------------------------------------
423        // NOTE: ontology_resolver and ontology_matching_strategy are stored for
424        // future multi-resolver / multi-strategy support. Currently the CLI always
425        // uses RdfLibOntologyResolver + FuzzyMatchingStrategy when ontology_file_path
426        // is set; these two fields have no runtime effect yet.
427        if let Some(v) = str_var("ONTOLOGY_FILE_PATH") {
428            self.ontology_file_path = v;
429        }
430        if let Some(v) = str_var("ONTOLOGY_RESOLVER") {
431            self.ontology_resolver = v;
432        }
433        if let Some(v) = str_var("ONTOLOGY_MATCHING_STRATEGY") {
434            self.ontology_matching_strategy = v;
435        }
436
437        // -- Session / cache -----------------------------------------------------
438        if let Some(v) = str_var("CACHE_BACKEND") {
439            self.cache_backend = v;
440        }
441        if let Some(v) = str_var("CACHE_HOST") {
442            self.cache_host = v;
443        }
444        if let Some(v) = str_var("CACHE_PORT")
445            && let Ok(n) = v.parse::<u16>()
446        {
447            self.cache_port = n;
448        }
449        if let Some(v) = str_var("CACHE_USERNAME") {
450            self.cache_username = v;
451        }
452        if let Some(v) = str_var("CACHE_PASSWORD") {
453            self.cache_password = v;
454        }
455        if let Some(v) = str_var("SESSION_TTL_SECONDS")
456            && let Ok(n) = v.parse::<u64>()
457        {
458            self.session_ttl_seconds = n;
459        }
460        if let Some(v) = str_var("CACHING") {
461            self.enable_caching = cognee_utils::parse_env_bool(&v);
462        }
463        if let Some(v) = str_var("AUTO_FEEDBACK") {
464            self.auto_feedback = cognee_utils::parse_env_bool(&v);
465        }
466
467        // -- Authentication / ACL ------------------------------------------------
468        if let Some(v) = str_var("DEFAULT_USER_EMAIL") {
469            self.default_user_email = v;
470        }
471        if let Some(v) = str_var("DEFAULT_USER_PASSWORD") {
472            self.default_user_password = v;
473        }
474        if let Some(v) = str_var("ENABLE_BACKEND_ACCESS_CONTROL") {
475            self.enable_access_control = cognee_utils::parse_env_bool(&v);
476        }
477
478        // -- Logging -------------------------------------------------------------
479        if let Some(v) = str_var("LOG_LEVEL") {
480            self.log_level = v;
481        }
482        // COGNEE_LOGS_DIR maps to existing logs_root_directory
483        if let Some(v) = str_var("COGNEE_LOGS_DIR") {
484            self.logs_root_directory = v;
485        }
486        // CACHE_ROOT_DIRECTORY maps to existing cache_root_directory
487        if let Some(v) = str_var("CACHE_ROOT_DIRECTORY") {
488            self.cache_root_directory = v;
489        }
490
491        // -- Rate limiting -------------------------------------------------------
492        if let Some(v) = str_var("LLM_RATE_LIMIT_ENABLED") {
493            self.llm_rate_limit_enabled = cognee_utils::parse_env_bool(&v);
494        }
495        if let Some(v) = str_var("LLM_RATE_LIMIT_REQUESTS")
496            && let Ok(n) = v.parse::<u32>()
497        {
498            self.llm_rate_limit_requests = n;
499        }
500        if let Some(v) = str_var("LLM_RATE_LIMIT_INTERVAL")
501            && let Ok(n) = v.parse::<u32>()
502        {
503            self.llm_rate_limit_interval = n;
504        }
505        if let Some(v) = str_var("EMBEDDING_RATE_LIMIT_ENABLED") {
506            self.embedding_rate_limit_enabled = cognee_utils::parse_env_bool(&v);
507        }
508        if let Some(v) = str_var("EMBEDDING_RATE_LIMIT_REQUESTS")
509            && let Ok(n) = v.parse::<u32>()
510        {
511            self.embedding_rate_limit_requests = n;
512        }
513        if let Some(v) = str_var("EMBEDDING_RATE_LIMIT_INTERVAL")
514            && let Ok(n) = v.parse::<u32>()
515        {
516            self.embedding_rate_limit_interval = n;
517        }
518
519        // -- Storage backend -----------------------------------------------------
520        if let Some(v) = str_var("STORAGE_BACKEND") {
521            self.storage_backend = v;
522        }
523        if let Some(v) = str_var("STORAGE_BUCKET_NAME") {
524            self.storage_bucket_name = v;
525        }
526
527        // -- Observability -------------------------------------------------------
528        if let Some(v) = str_var("COGNEE_TRACING_ENABLED") {
529            self.cognee_tracing_enabled = cognee_utils::parse_env_bool(&v);
530        }
531        if let Some(v) = str_var("OTEL_SERVICE_NAME") {
532            self.otel_service_name = v;
533        }
534        if let Some(v) = str_var("OTEL_EXPORTER_OTLP_ENDPOINT") {
535            self.otel_exporter_otlp_endpoint = v;
536        }
537        if let Some(v) = str_var("OTEL_EXPORTER_OTLP_HEADERS") {
538            self.otel_exporter_otlp_headers = v;
539        }
540        if let Some(v) = str_var("OTEL_EXPORTER_OTLP_PROTOCOL") {
541            self.otel_exporter_otlp_protocol = v;
542        }
543        if let Some(v) = str_var("OTEL_SPAN_PROCESSOR") {
544            self.otel_span_processor = v;
545        }
546        if let Some(v) = str_var("OTEL_TRACES_SAMPLER") {
547            self.otel_traces_sampler = v;
548        }
549        if let Some(v) = str_var("OTEL_TRACES_SAMPLER_ARG") {
550            self.otel_traces_sampler_arg = v;
551        }
552
553        // -- Feature flags -------------------------------------------------------
554        if let Some(v) = str_var("ENABLE_LAST_ACCESSED") {
555            self.enable_last_accessed = cognee_utils::parse_env_bool(&v);
556        }
557    }
558
559    /// Returns the effective relational DB connection URL.
560    ///
561    /// When `db_provider` is `"postgres"`, builds
562    /// `postgres://username:password@host:port/name` from the individual
563    /// `db_*` fields (matching Python's `DB_PROVIDER`/`DB_HOST`/… env vars).
564    /// Otherwise returns `relational_db_url` verbatim.
565    pub fn resolved_relational_db_url(&self) -> String {
566        if self.db_provider == "postgres" {
567            format!(
568                "postgres://{}:{}@{}:{}/{}",
569                self.db_username, self.db_password, self.db_host, self.db_port, self.db_name
570            )
571        } else {
572            self.relational_db_url.clone()
573        }
574    }
575
576    /// Returns the redacted property dict merged into `Pipeline Run *`
577    /// analytics events.
578    ///
579    /// **Allowlist-only.** Mirrors Python's `get_current_settings()`
580    /// shape but covers only provider/model identifiers and a few
581    /// dimension/strategy fields — see
582    /// [`docs/telemetry/03/03-settings-snapshot.md`](https://github.com/topoteretes/cognee-rs/blob/main/docs/telemetry/03/03-settings-snapshot.md)
583    /// for the rationale on what is omitted (URLs, credentials,
584    /// file paths).
585    ///
586    /// Adding a field here is intentional — there is a snapshot test
587    /// that will fail until it is acknowledged.
588    pub fn telemetry_snapshot(&self) -> serde_json::Map<String, serde_json::Value> {
589        use serde_json::Value;
590        let mut m = serde_json::Map::new();
591        m.insert("sdk_runtime".into(), Value::String("rust".into()));
592        m.insert(
593            "vector_db_provider".into(),
594            Value::String(self.vector_db_provider.clone()),
595        );
596        m.insert(
597            "graph_db_provider".into(),
598            Value::String(self.graph_database_provider.clone()),
599        );
600        m.insert(
601            "relational_db_provider".into(),
602            Value::String(self.db_provider.clone()),
603        );
604        m.insert(
605            "llm_provider".into(),
606            Value::String(self.llm_provider.clone()),
607        );
608        m.insert("llm_model".into(), Value::String(self.llm_model.clone()));
609        // NOTE: `llm_mock`/`llm_cassette`/`llm_record_path` are intentionally
610        // NOT emitted here. The cassette/record fields are local filesystem
611        // paths (sensitive), and the telemetry snapshot is an allowlisted,
612        // privacy-filtered payload — see `telemetry_snapshot_only_emits_allowlisted_keys`.
613        m.insert(
614            "embedding_provider".into(),
615            Value::String(self.embedding_provider.clone()),
616        );
617        m.insert(
618            "embedding_model".into(),
619            Value::String(self.embedding_model_name.clone()),
620        );
621        m.insert(
622            "embedding_dimensions".into(),
623            Value::Number(self.embedding_dimensions.into()),
624        );
625        m.insert(
626            "chunk_strategy".into(),
627            Value::String(self.chunk_strategy.clone()),
628        );
629        m
630    }
631}
632
633impl Default for Settings {
634    fn default() -> Self {
635        // Embedding default: local ONNX (BGE-Small) on Android for edge/offline
636        // deployment; OpenAI text-embedding-3-small everywhere else — matching
637        // the Python SDK and `cognee_embedding::EmbeddingConfig::default()`.
638        // (ONNX runs all texts in one inference; remote OpenAI embeddings avoid
639        // both the model download and large-batch memory blow-ups.)
640        #[cfg(target_os = "android")]
641        let (embedding_provider, embedding_model_name, embedding_dimensions) =
642            ("onnx", "BGE-Small-v1.5", 384u32);
643        #[cfg(not(target_os = "android"))]
644        let (embedding_provider, embedding_model_name, embedding_dimensions) =
645            ("openai", "text-embedding-3-small", 1536u32);
646
647        Self {
648            default_user_id: "00000000-0000-0000-0000-000000000000".to_string(),
649            default_dataset_name: "main_dataset".to_string(),
650            system_root_directory: "./.cognee_system".to_string(),
651            data_root_directory: "./.data_storage".to_string(),
652            cache_root_directory: "./.cognee_cache".to_string(),
653            // Intentional divergence from Python default (~/.cognee/logs): edge/Android targets need a relative path.
654            logs_root_directory: "./logs".to_string(),
655            monitoring_tool: "none".to_string(),
656
657            classification_model: String::new(),
658            summarization_model: String::new(),
659            graph_model: "KnowledgeGraph".to_string(),
660            summarization_schema: None,
661
662            llm_provider: "openai".to_string(),
663            llm_model: "openai/gpt-5-mini".to_string(),
664            llm_api_key: String::new(),
665            llm_endpoint: String::new(),
666            llm_api_version: String::new(),
667            llm_temperature: 0.0,
668            llm_streaming: false,
669            llm_max_completion_tokens: 16384,
670            llm_max_retries: 2,
671            llm_max_parallel_requests: 20,
672            llm_mock: false,
673            llm_cassette: String::new(),
674            llm_record_path: String::new(),
675            graph_prompt_path: "generate_graph_prompt.txt".to_string(),
676
677            llm_fallback_model: String::new(),
678            llm_fallback_provider: String::new(),
679            llm_fallback_endpoint: String::new(),
680            llm_fallback_api_key: String::new(),
681
682            graph_database_provider: "ladybug".to_string(),
683            graph_database_url: String::new(),
684            graph_database_name: String::new(),
685            graph_database_username: String::new(),
686            graph_database_password: String::new(),
687            graph_database_port: 123,
688            graph_database_host: String::new(),
689            graph_database_key: String::new(),
690            graph_file_path: String::new(),
691            graph_filename: String::new(),
692
693            // OSS default: legacy `"lancedb"` is kept as the literal value so
694            // existing configs continue to boot without edits. Post-T4/T5,
695            // `ComponentManager::init_vector_db` redirects `"lancedb"`/`"qdrant"`
696            // to the in-memory `BruteForceVectorDB` with a `tracing::warn!`.
697            // Production deployments should explicitly set
698            // `vector_db_provider="pgvector"` (and supply `vector_db_url`) for
699            // durable storage. T5's earlier flip to `"pgvector"` broke OSS
700            // bindings (Neon/C-API/python defaults don't enable the `pgvector`
701            // Cargo feature) — keeping `"lancedb"` here is the lowest-friction
702            // OSS default that works in every OSS build out of the box.
703            vector_db_provider: "lancedb".to_string(),
704            vector_db_url: String::new(),
705            vector_db_port: 1234,
706            vector_db_name: String::new(),
707            vector_db_key: String::new(),
708            vector_db_username: String::new(),
709            vector_db_password: String::new(),
710            vector_db_host: String::new(),
711
712            chunk_strategy: "PARAGRAPH".to_string(),
713            chunk_engine: "DEFAULT_ENGINE".to_string(),
714            chunk_size: 1500,
715            chunk_overlap: 10,
716
717            relational_db_url: "sqlite:./cognee.db?mode=rwc".to_string(),
718            migration_db_url: String::new(),
719
720            db_provider: "sqlite".to_string(),
721            db_host: "localhost".to_string(),
722            db_port: 5432,
723            db_name: "cognee_db".to_string(),
724            db_username: String::new(),
725            db_password: String::new(),
726
727            default_system_prompt_path: DEFAULT_SYSTEM_PROMPT_PATH.to_string(),
728
729            embedding_provider: embedding_provider.to_string(),
730            // ONNX model/tokenizer paths are only consulted when the provider is
731            // `onnx`/`fastembed` (the Android/edge default); harmless otherwise.
732            embedding_model_path: "./target/models/BGE-Small-v1.5-model_quantized.onnx".to_string(),
733            embedding_tokenizer_path: "./target/models/bge-small-tokenizer.json".to_string(),
734            embedding_model_name: embedding_model_name.to_string(),
735            // Dimensions match the default model above (text-embedding-3-small =
736            // 1536; BGE-Small = 384). If you change embedding_model_name, update
737            // this or set EMBEDDING_DIMENSIONS so from_env auto-resolves it via
738            // cognee_embedding::known_model_dimensions.
739            embedding_dimensions,
740            embedding_max_sequence_length: 512,
741            embedding_batch_size: 32,
742            embedding_endpoint: String::new(),
743            embedding_api_key: String::new(),
744            embedding_api_version: String::new(),
745            transcription_model: String::new(),
746
747            ontology_file_path: String::new(),
748            ontology_resolver: "rdflib".to_string(),
749            ontology_matching_strategy: "fuzzy".to_string(),
750
751            // Session / cache
752            cache_backend: "fs".to_string(),
753            cache_host: "localhost".to_string(),
754            cache_port: 6379,
755            cache_username: String::new(),
756            cache_password: String::new(),
757            session_ttl_seconds: 604800,
758            enable_caching: true,
759            auto_feedback: false,
760
761            // Authentication / ACL
762            default_user_email: "default_user@example.com".to_string(),
763            default_user_password: String::new(),
764            enable_access_control: false,
765
766            // Logging
767            log_level: "info".to_string(),
768
769            // Rate limiting
770            llm_rate_limit_enabled: false,
771            llm_rate_limit_requests: 60,
772            llm_rate_limit_interval: 60,
773            embedding_rate_limit_enabled: false,
774            embedding_rate_limit_requests: 60,
775            embedding_rate_limit_interval: 60,
776
777            // Storage backend
778            storage_backend: "local".to_string(),
779            storage_bucket_name: String::new(),
780
781            // Observability
782            cognee_tracing_enabled: false,
783            otel_service_name: "cognee".to_string(),
784            otel_exporter_otlp_endpoint: String::new(),
785            otel_exporter_otlp_headers: String::new(),
786            otel_exporter_otlp_protocol: "grpc".to_string(),
787            otel_span_processor: "batch".to_string(),
788            otel_traces_sampler: String::new(),
789            otel_traces_sampler_arg: String::new(),
790
791            // Feature flags
792            enable_last_accessed: false,
793        }
794    }
795}
796
797// ---------------------------------------------------------------------------
798// ConfigError
799// ---------------------------------------------------------------------------
800
801/// Errors returned by [`ConfigManager`] setter methods.
802#[derive(Debug, thiserror::Error)]
803pub enum ConfigError {
804    #[error("Unknown config key: {0}")]
805    UnknownKey(String),
806    #[error("Type mismatch for key '{key}': {reason}")]
807    TypeMismatch { key: String, reason: String },
808}
809
810// ---------------------------------------------------------------------------
811// ConfigManager
812// ---------------------------------------------------------------------------
813
814/// Thread-safe mutable configuration manager.
815///
816/// Wraps `Settings` in `Arc<RwLock<>>` to allow runtime mutation from
817/// setter methods.  Tracks a monotonically increasing version counter
818/// so that [`crate::ComponentManager`] can detect stale cached components
819/// and reinitialize them.
820///
821/// # Example
822/// ```
823/// use cognee_lib::config::{ConfigManager, Settings};
824///
825/// let cfg = ConfigManager::new(Settings::default());
826/// assert_eq!(cfg.version(), 0);
827///
828/// cfg.set_llm_model("gpt-4o");
829/// assert_eq!(cfg.version(), 1);
830/// assert_eq!(cfg.read().llm_model, "gpt-4o");
831/// ```
832pub struct ConfigManager {
833    inner: Arc<RwLock<Settings>>,
834    version: Arc<AtomicU64>,
835}
836
837impl ConfigManager {
838    /// Create a new `ConfigManager` wrapping the given settings.
839    pub fn new(settings: Settings) -> Self {
840        Self {
841            inner: Arc::new(RwLock::new(settings)),
842            version: Arc::new(AtomicU64::new(0)),
843        }
844    }
845
846    /// Convenience constructor: `Settings::load_from_env()` + wrap.
847    pub fn from_env() -> Self {
848        Self::new(Settings::load_from_env())
849    }
850
851    /// Obtain a read-lock on the current settings.
852    pub fn read(&self) -> RwLockReadGuard<'_, Settings> {
853        self.inner.read().expect("lock poison is unrecoverable") // lock poison is unrecoverable
854    }
855
856    /// Current config version (monotonically increasing on each mutation).
857    pub fn version(&self) -> u64 {
858        self.version.load(Ordering::Acquire)
859    }
860
861    /// Bump the version after any mutation.
862    fn bump_version(&self) {
863        self.version.fetch_add(1, Ordering::Release);
864    }
865}
866
867impl Clone for ConfigManager {
868    fn clone(&self) -> Self {
869        Self {
870            inner: Arc::clone(&self.inner),
871            version: Arc::clone(&self.version),
872        }
873    }
874}
875
876// -- Individual setter methods -----------------------------------------------
877
878impl ConfigManager {
879    // -- LLM -----------------------------------------------------------------
880
881    pub fn set_llm_provider(&self, provider: &str) {
882        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
883        s.llm_provider = provider.to_string();
884        drop(s);
885        self.bump_version();
886    }
887
888    pub fn set_llm_model(&self, model: &str) {
889        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
890        s.llm_model = model.to_string();
891        drop(s);
892        self.bump_version();
893    }
894
895    pub fn set_llm_api_key(&self, key: &str) {
896        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
897        s.llm_api_key = key.to_string();
898        drop(s);
899        self.bump_version();
900    }
901
902    pub fn set_llm_endpoint(&self, endpoint: &str) {
903        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
904        s.llm_endpoint = endpoint.to_string();
905        drop(s);
906        self.bump_version();
907    }
908
909    // -- LLM fallback --------------------------------------------------------
910
911    pub fn set_llm_fallback_model(&self, model: &str) {
912        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
913        s.llm_fallback_model = model.to_string();
914        drop(s);
915        self.bump_version();
916    }
917
918    pub fn set_llm_fallback_provider(&self, provider: &str) {
919        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
920        s.llm_fallback_provider = provider.to_string();
921        drop(s);
922        self.bump_version();
923    }
924
925    pub fn set_llm_fallback_endpoint(&self, endpoint: &str) {
926        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
927        s.llm_fallback_endpoint = endpoint.to_string();
928        drop(s);
929        self.bump_version();
930    }
931
932    pub fn set_llm_fallback_api_key(&self, key: &str) {
933        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
934        s.llm_fallback_api_key = key.to_string();
935        drop(s);
936        self.bump_version();
937    }
938
939    // -- Embedding -----------------------------------------------------------
940
941    pub fn set_embedding_provider(&self, provider: &str) {
942        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
943        s.embedding_provider = provider.to_string();
944        drop(s);
945        self.bump_version();
946    }
947
948    pub fn set_embedding_model(&self, model: &str) {
949        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
950        s.embedding_model_name = model.to_string();
951        drop(s);
952        self.bump_version();
953    }
954
955    pub fn set_embedding_dimensions(&self, dims: u32) {
956        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
957        s.embedding_dimensions = dims;
958        drop(s);
959        self.bump_version();
960    }
961
962    pub fn set_embedding_endpoint(&self, endpoint: &str) {
963        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
964        s.embedding_endpoint = endpoint.to_string();
965        drop(s);
966        self.bump_version();
967    }
968
969    pub fn set_embedding_api_key(&self, key: &str) {
970        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
971        s.embedding_api_key = key.to_string();
972        drop(s);
973        self.bump_version();
974    }
975
976    pub fn set_embedding_api_version(&self, version: &str) {
977        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
978        s.embedding_api_version = version.to_string();
979        drop(s);
980        self.bump_version();
981    }
982
983    pub fn set_transcription_model(&self, model: &str) {
984        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
985        s.transcription_model = model.to_string();
986        drop(s);
987        self.bump_version();
988    }
989
990    // -- Vector DB -----------------------------------------------------------
991
992    pub fn set_vector_db_provider(&self, provider: &str) {
993        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
994        s.vector_db_provider = provider.to_string();
995        drop(s);
996        self.bump_version();
997    }
998
999    pub fn set_vector_db_url(&self, url: &str) {
1000        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1001        s.vector_db_url = url.to_string();
1002        drop(s);
1003        self.bump_version();
1004    }
1005
1006    /// Override the relational database URL (e.g. `"sqlite:///path/to/db?mode=rwc"`).
1007    ///
1008    /// Primarily used by language-binding tests to redirect each test's DB to an
1009    /// isolated tmp directory so tests do not share the default on-disk DB.
1010    pub fn set_relational_db_url(&self, url: &str) {
1011        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1012        s.relational_db_url = url.to_string();
1013        drop(s);
1014        self.bump_version();
1015    }
1016
1017    /// Set all relational DB connection fields at once.
1018    // Many optional fields are needed here to match Python's bulk-setter API.
1019    #[allow(clippy::too_many_arguments)]
1020    pub fn set_relational_db_config(
1021        &self,
1022        url: Option<&str>,
1023        provider: Option<&str>,
1024        host: Option<&str>,
1025        port: Option<u16>,
1026        name: Option<&str>,
1027        username: Option<&str>,
1028        password: Option<&str>,
1029    ) {
1030        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1031        if let Some(v) = url {
1032            s.relational_db_url = v.to_string();
1033        }
1034        if let Some(v) = provider {
1035            s.db_provider = v.to_string();
1036        }
1037        if let Some(v) = host {
1038            s.db_host = v.to_string();
1039        }
1040        if let Some(v) = port {
1041            s.db_port = v;
1042        }
1043        if let Some(v) = name {
1044            s.db_name = v.to_string();
1045        }
1046        if let Some(v) = username {
1047            s.db_username = v.to_string();
1048        }
1049        if let Some(v) = password {
1050            s.db_password = v.to_string();
1051        }
1052        drop(s);
1053        self.bump_version();
1054    }
1055
1056    /// Set the migration DB URL.
1057    pub fn set_migration_db_config(&self, url: &str) {
1058        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1059        s.migration_db_url = url.to_string();
1060        drop(s);
1061        self.bump_version();
1062    }
1063
1064    pub fn set_vector_db_key(&self, key: &str) {
1065        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1066        s.vector_db_key = key.to_string();
1067        drop(s);
1068        self.bump_version();
1069    }
1070
1071    // -- Graph DB ------------------------------------------------------------
1072
1073    pub fn set_graph_database_provider(&self, provider: &str) {
1074        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1075        s.graph_database_provider = provider.to_string();
1076        drop(s);
1077        self.bump_version();
1078    }
1079
1080    pub fn set_graph_model(&self, model: &str) {
1081        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1082        s.graph_model = model.to_string();
1083        drop(s);
1084        self.bump_version();
1085    }
1086
1087    // -- Chunking ------------------------------------------------------------
1088
1089    pub fn set_chunk_strategy(&self, strategy: &str) {
1090        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1091        s.chunk_strategy = strategy.to_string();
1092        drop(s);
1093        self.bump_version();
1094    }
1095
1096    pub fn set_chunk_engine(&self, engine: &str) {
1097        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1098        s.chunk_engine = engine.to_string();
1099        drop(s);
1100        self.bump_version();
1101    }
1102
1103    pub fn set_chunk_size(&self, size: u32) {
1104        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1105        s.chunk_size = size;
1106        drop(s);
1107        self.bump_version();
1108    }
1109
1110    pub fn set_chunk_overlap(&self, overlap: u32) {
1111        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1112        s.chunk_overlap = overlap;
1113        drop(s);
1114        self.bump_version();
1115    }
1116
1117    // -- System paths --------------------------------------------------------
1118
1119    pub fn set_data_root_directory(&self, path: &str) {
1120        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1121        s.data_root_directory = path.to_string();
1122        drop(s);
1123        self.bump_version();
1124    }
1125
1126    /// Set system root directory and cascade derived path updates.
1127    ///
1128    /// Matches Python `config.system_root_directory()` (config.py lines 41-67):
1129    /// - `graph_file_path` updated if it was under the old system root
1130    /// - `vector_db_url` updated if it was under the old system root
1131    pub fn set_system_root_directory(&self, path: &str) {
1132        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1133        let old_root = s.system_root_directory.clone();
1134        s.system_root_directory = path.to_string();
1135
1136        // Cascade graph_file_path
1137        if s.graph_file_path.is_empty() || s.graph_file_path.starts_with(&old_root) {
1138            let suffix = if s.graph_file_path.is_empty() {
1139                "/graph".to_string()
1140            } else {
1141                s.graph_file_path[old_root.len()..].to_string()
1142            };
1143            s.graph_file_path = format!("{path}{suffix}");
1144        }
1145
1146        // Cascade vector_db_url (only if it was using the default system root path)
1147        if s.vector_db_url.is_empty() || s.vector_db_url.starts_with(&old_root) {
1148            let suffix = if s.vector_db_url.is_empty() {
1149                "/vectors".to_string()
1150            } else {
1151                s.vector_db_url[old_root.len()..].to_string()
1152            };
1153            s.vector_db_url = format!("{path}{suffix}");
1154        }
1155
1156        drop(s);
1157        self.bump_version();
1158    }
1159
1160    pub fn set_monitoring_tool(&self, tool: &str) {
1161        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1162        s.monitoring_tool = tool.to_string();
1163        drop(s);
1164        self.bump_version();
1165    }
1166
1167    pub fn set_classification_model(&self, model: &str) {
1168        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1169        s.classification_model = model.to_string();
1170        drop(s);
1171        self.bump_version();
1172    }
1173
1174    pub fn set_summarization_model(&self, model: &str) {
1175        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1176        s.summarization_model = model.to_string();
1177        drop(s);
1178        self.bump_version();
1179    }
1180
1181    /// Set a custom JSON schema for the summarization output stage.
1182    ///
1183    /// Mirrors Python's `cognee.config.set_summarization_model(CustomSchema)`:
1184    /// accepts a JSON Schema `Value` describing the expected LLM output. The
1185    /// schema **must** contain a `summary` string field. The stored value is
1186    /// intended to be read by callers when constructing a `CognifyConfig` via
1187    /// `CognifyConfig::with_summary_schema`.
1188    ///
1189    /// Returns `Err` if the schema fails validation (missing `summary` field).
1190    pub fn set_summarization_schema(
1191        &self,
1192        schema: serde_json::Value,
1193    ) -> Result<(), cognee_cognify::config::ConfigError> {
1194        cognee_cognify::config::validate_summary_schema(&schema)?;
1195        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1196        s.summarization_schema = Some(schema);
1197        drop(s);
1198        self.bump_version();
1199        Ok(())
1200    }
1201
1202    // -- LLM tuning ----------------------------------------------------------
1203
1204    pub fn set_llm_api_version(&self, version: &str) {
1205        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1206        s.llm_api_version = version.to_string();
1207        drop(s);
1208        self.bump_version();
1209    }
1210
1211    pub fn set_llm_temperature(&self, temperature: f64) {
1212        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1213        s.llm_temperature = temperature;
1214        drop(s);
1215        self.bump_version();
1216    }
1217
1218    pub fn set_llm_streaming(&self, streaming: bool) {
1219        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1220        s.llm_streaming = streaming;
1221        drop(s);
1222        self.bump_version();
1223    }
1224
1225    pub fn set_llm_max_completion_tokens(&self, tokens: u32) {
1226        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1227        s.llm_max_completion_tokens = tokens;
1228        drop(s);
1229        self.bump_version();
1230    }
1231
1232    pub fn set_llm_max_retries(&self, retries: u32) {
1233        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1234        s.llm_max_retries = retries;
1235        drop(s);
1236        self.bump_version();
1237    }
1238
1239    pub fn set_llm_max_parallel_requests(&self, parallel: u32) {
1240        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1241        s.llm_max_parallel_requests = parallel;
1242        drop(s);
1243        self.bump_version();
1244    }
1245
1246    /// Select the record/replay mock LLM (`MOCK_LLM` parity).
1247    pub fn set_llm_mock(&self, mock: bool) {
1248        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1249        s.llm_mock = mock;
1250        drop(s);
1251        self.bump_version();
1252    }
1253
1254    /// Set the cassette path used by the replay mock (`MOCK_LLM_CASSETTE`).
1255    pub fn set_llm_cassette(&self, cassette: &str) {
1256        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1257        s.llm_cassette = cassette.to_string();
1258        drop(s);
1259        self.bump_version();
1260    }
1261
1262    /// Set the recording cassette output path (`COGNEE_RECORD_LLM`); empty = unset.
1263    pub fn set_llm_record_path(&self, path: &str) {
1264        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1265        s.llm_record_path = path.to_string();
1266        drop(s);
1267        self.bump_version();
1268    }
1269
1270    // -- Embedding paths -----------------------------------------------------
1271
1272    pub fn set_embedding_model_path(&self, path: &str) {
1273        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1274        s.embedding_model_path = path.to_string();
1275        drop(s);
1276        self.bump_version();
1277    }
1278
1279    pub fn set_embedding_tokenizer_path(&self, path: &str) {
1280        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1281        s.embedding_tokenizer_path = path.to_string();
1282        drop(s);
1283        self.bump_version();
1284    }
1285
1286    // -- Vector DB endpoint parts --------------------------------------------
1287
1288    pub fn set_vector_db_host(&self, host: &str) {
1289        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1290        s.vector_db_host = host.to_string();
1291        drop(s);
1292        self.bump_version();
1293    }
1294
1295    pub fn set_vector_db_port(&self, port: u16) {
1296        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1297        s.vector_db_port = port;
1298        drop(s);
1299        self.bump_version();
1300    }
1301
1302    pub fn set_vector_db_name(&self, name: &str) {
1303        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1304        s.vector_db_name = name.to_string();
1305        drop(s);
1306        self.bump_version();
1307    }
1308
1309    // -- Graph DB granular path ----------------------------------------------
1310
1311    /// Set `graph_file_path` directly.
1312    ///
1313    /// Unlike [`set_system_root_directory`](Self::set_system_root_directory),
1314    /// this is a plain field write and does **not** cascade to other paths.
1315    pub fn set_graph_file_path(&self, path: &str) {
1316        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1317        s.graph_file_path = path.to_string();
1318        drop(s);
1319        self.bump_version();
1320    }
1321
1322    // -- Paths ---------------------------------------------------------------
1323
1324    pub fn set_cache_root_directory(&self, path: &str) {
1325        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1326        s.cache_root_directory = path.to_string();
1327        drop(s);
1328        self.bump_version();
1329    }
1330
1331    pub fn set_logs_root_directory(&self, path: &str) {
1332        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1333        s.logs_root_directory = path.to_string();
1334        drop(s);
1335        self.bump_version();
1336    }
1337
1338    // -- Ontology ------------------------------------------------------------
1339
1340    pub fn set_ontology_file_path(&self, path: &str) {
1341        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1342        s.ontology_file_path = path.to_string();
1343        drop(s);
1344        self.bump_version();
1345    }
1346
1347    pub fn set_ontology_resolver(&self, resolver: &str) {
1348        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1349        s.ontology_resolver = resolver.to_string();
1350        drop(s);
1351        self.bump_version();
1352    }
1353
1354    pub fn set_ontology_matching_strategy(&self, strategy: &str) {
1355        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1356        s.ontology_matching_strategy = strategy.to_string();
1357        drop(s);
1358        self.bump_version();
1359    }
1360
1361    /// Return a snapshot of the current settings with secrets masked.
1362    ///
1363    /// All secret-bearing fields (`*_api_key`, `*_password`, `*_key`) are
1364    /// replaced with `"<redacted>"` when non-empty, matching Python's
1365    /// `config.get_settings()` behaviour so callers can safely log or expose
1366    /// the output without leaking credentials.
1367    ///
1368    /// The returned map can be serialized to JSON for logging or debugging.
1369    pub fn get_settings(&self) -> std::collections::HashMap<String, serde_json::Value> {
1370        use serde_json::Value;
1371
1372        let s = self.inner.read().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1373        let mut m = std::collections::HashMap::new();
1374
1375        // Mask any non-empty secret: use "<redacted>" unconditionally so that
1376        // test keys (e.g. "my-secret-key") are masked even when they don't
1377        // match the pattern-based cognee_utils::redact() heuristics.
1378        let mask = |v: &String| -> String {
1379            if v.is_empty() {
1380                String::new()
1381            } else {
1382                "<redacted>".to_string()
1383            }
1384        };
1385
1386        // Mask credentials embedded in a connection URL's userinfo component
1387        // (e.g. `postgres://user:pass@host/db` → `postgres://<redacted>@host/db`).
1388        // URLs without credentials pass through unchanged. This prevents
1389        // `relational_db_url`/`vector_db_url`/`graph_database_url` from leaking
1390        // passwords when the settings snapshot is logged.
1391        let mask_url = |v: &String| -> String {
1392            // Find the `scheme://` prefix and the `@` that terminates userinfo.
1393            if let Some(scheme_end) = v.find("://") {
1394                let after_scheme = scheme_end + 3;
1395                if let Some(at_rel) = v[after_scheme..].find('@') {
1396                    let at_abs = after_scheme + at_rel;
1397                    // Only redact when userinfo actually carries a credential
1398                    // separator or any non-empty content.
1399                    if at_abs > after_scheme {
1400                        return format!("{}<redacted>{}", &v[..after_scheme], &v[at_abs..]);
1401                    }
1402                }
1403            }
1404            v.clone()
1405        };
1406
1407        // LLM
1408        m.insert("llm_provider".into(), Value::String(s.llm_provider.clone()));
1409        m.insert("llm_model".into(), Value::String(s.llm_model.clone()));
1410        m.insert("llm_api_key".into(), Value::String(mask(&s.llm_api_key)));
1411        m.insert("llm_endpoint".into(), Value::String(s.llm_endpoint.clone()));
1412        m.insert(
1413            "llm_api_version".into(),
1414            Value::String(s.llm_api_version.clone()),
1415        );
1416        m.insert(
1417            "llm_temperature".into(),
1418            Value::Number(
1419                serde_json::Number::from_f64(s.llm_temperature)
1420                    .unwrap_or(serde_json::Number::from(0)),
1421            ),
1422        );
1423        m.insert(
1424            "llm_max_completion_tokens".into(),
1425            Value::Number(s.llm_max_completion_tokens.into()),
1426        );
1427
1428        // Embedding
1429        m.insert(
1430            "embedding_provider".into(),
1431            Value::String(s.embedding_provider.clone()),
1432        );
1433        m.insert(
1434            "embedding_model_name".into(),
1435            Value::String(s.embedding_model_name.clone()),
1436        );
1437        m.insert(
1438            "embedding_api_key".into(),
1439            Value::String(mask(&s.embedding_api_key)),
1440        );
1441        m.insert(
1442            "embedding_endpoint".into(),
1443            Value::String(s.embedding_endpoint.clone()),
1444        );
1445        m.insert(
1446            "embedding_dimensions".into(),
1447            Value::Number(s.embedding_dimensions.into()),
1448        );
1449
1450        // Graph DB
1451        m.insert(
1452            "graph_database_provider".into(),
1453            Value::String(s.graph_database_provider.clone()),
1454        );
1455        m.insert(
1456            "graph_database_url".into(),
1457            Value::String(mask_url(&s.graph_database_url)),
1458        );
1459        m.insert(
1460            "graph_database_password".into(),
1461            Value::String(mask(&s.graph_database_password)),
1462        );
1463        m.insert(
1464            "graph_database_key".into(),
1465            Value::String(mask(&s.graph_database_key)),
1466        );
1467
1468        // Vector DB
1469        m.insert(
1470            "vector_db_provider".into(),
1471            Value::String(s.vector_db_provider.clone()),
1472        );
1473        m.insert(
1474            "vector_db_url".into(),
1475            Value::String(mask_url(&s.vector_db_url)),
1476        );
1477        m.insert(
1478            "vector_db_key".into(),
1479            Value::String(mask(&s.vector_db_key)),
1480        );
1481        m.insert(
1482            "vector_db_password".into(),
1483            Value::String(mask(&s.vector_db_password)),
1484        );
1485
1486        // Relational DB
1487        m.insert("db_provider".into(), Value::String(s.db_provider.clone()));
1488        m.insert(
1489            "relational_db_url".into(),
1490            Value::String(mask_url(&s.relational_db_url)),
1491        );
1492        m.insert("db_password".into(), Value::String(mask(&s.db_password)));
1493
1494        // Paths
1495        m.insert(
1496            "system_root_directory".into(),
1497            Value::String(s.system_root_directory.clone()),
1498        );
1499        m.insert(
1500            "data_root_directory".into(),
1501            Value::String(s.data_root_directory.clone()),
1502        );
1503        m.insert(
1504            "logs_root_directory".into(),
1505            Value::String(s.logs_root_directory.clone()),
1506        );
1507
1508        // Chunking
1509        m.insert(
1510            "chunk_strategy".into(),
1511            Value::String(s.chunk_strategy.clone()),
1512        );
1513        m.insert("chunk_size".into(), Value::Number(s.chunk_size.into()));
1514        m.insert(
1515            "chunk_overlap".into(),
1516            Value::Number(s.chunk_overlap.into()),
1517        );
1518
1519        m
1520    }
1521}
1522
1523// -- Bulk setters and generic dispatch ---------------------------------------
1524
1525/// Extract a `String` from a JSON value, or return a type-mismatch error.
1526fn as_string(key: &str, value: &serde_json::Value) -> Result<String, ConfigError> {
1527    value
1528        .as_str()
1529        .map(ToString::to_string)
1530        .ok_or_else(|| ConfigError::TypeMismatch {
1531            key: key.to_string(),
1532            reason: "expected a string".to_string(),
1533        })
1534}
1535
1536/// Extract a `u32` from a JSON value, or return a type-mismatch error.
1537fn as_u32(key: &str, value: &serde_json::Value) -> Result<u32, ConfigError> {
1538    value
1539        .as_u64()
1540        .and_then(|n| u32::try_from(n).ok())
1541        .ok_or_else(|| ConfigError::TypeMismatch {
1542            key: key.to_string(),
1543            reason: "expected a positive integer (u32)".to_string(),
1544        })
1545}
1546
1547/// Extract an `f64` from a JSON value, or return a type-mismatch error.
1548fn as_f64(key: &str, value: &serde_json::Value) -> Result<f64, ConfigError> {
1549    value.as_f64().ok_or_else(|| ConfigError::TypeMismatch {
1550        key: key.to_string(),
1551        reason: "expected a number".to_string(),
1552    })
1553}
1554
1555/// Extract a `u16` from a JSON value, or return a type-mismatch error.
1556fn as_u16(key: &str, value: &serde_json::Value) -> Result<u16, ConfigError> {
1557    value
1558        .as_u64()
1559        .and_then(|n| u16::try_from(n).ok())
1560        .ok_or_else(|| ConfigError::TypeMismatch {
1561            key: key.to_string(),
1562            reason: "expected a positive integer (u16)".to_string(),
1563        })
1564}
1565
1566/// Extract a `bool` from a JSON value, or return a type-mismatch error.
1567fn as_bool(key: &str, value: &serde_json::Value) -> Result<bool, ConfigError> {
1568    value.as_bool().ok_or_else(|| ConfigError::TypeMismatch {
1569        key: key.to_string(),
1570        reason: "expected a boolean".to_string(),
1571    })
1572}
1573
1574impl ConfigManager {
1575    /// Bulk-update LLM config from a map. Matches Python `config.set_llm_config()`.
1576    pub fn set_llm_config(
1577        &self,
1578        values: &HashMap<String, serde_json::Value>,
1579    ) -> Result<(), ConfigError> {
1580        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1581        for (key, value) in values {
1582            match key.as_str() {
1583                "llm_provider" => s.llm_provider = as_string(key, value)?,
1584                "llm_model" => s.llm_model = as_string(key, value)?,
1585                "llm_api_key" => s.llm_api_key = as_string(key, value)?,
1586                "llm_endpoint" => s.llm_endpoint = as_string(key, value)?,
1587                "llm_api_version" => s.llm_api_version = as_string(key, value)?,
1588                "llm_temperature" => s.llm_temperature = as_f64(key, value)?,
1589                "llm_max_completion_tokens" => s.llm_max_completion_tokens = as_u32(key, value)?,
1590                "llm_streaming" => s.llm_streaming = as_bool(key, value)?,
1591                "llm_max_retries" => s.llm_max_retries = as_u32(key, value)?,
1592                "llm_max_parallel_requests" => {
1593                    s.llm_max_parallel_requests = as_u32(key, value)?;
1594                }
1595                "llm_mock" => s.llm_mock = as_bool(key, value)?,
1596                "llm_cassette" => s.llm_cassette = as_string(key, value)?,
1597                "llm_record_path" => s.llm_record_path = as_string(key, value)?,
1598                other => return Err(ConfigError::UnknownKey(other.to_string())),
1599            }
1600        }
1601        drop(s);
1602        self.bump_version();
1603        Ok(())
1604    }
1605
1606    /// Bulk-update embedding config from a map. Matches Python `config.set_embedding_config()`.
1607    pub fn set_embedding_config(
1608        &self,
1609        values: &HashMap<String, serde_json::Value>,
1610    ) -> Result<(), ConfigError> {
1611        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1612        for (key, value) in values {
1613            match key.as_str() {
1614                "embedding_provider" => s.embedding_provider = as_string(key, value)?,
1615                "embedding_model" | "embedding_model_name" => {
1616                    s.embedding_model_name = as_string(key, value)?;
1617                }
1618                "embedding_dimensions" => s.embedding_dimensions = as_u32(key, value)?,
1619                "embedding_endpoint" => s.embedding_endpoint = as_string(key, value)?,
1620                "embedding_api_key" => s.embedding_api_key = as_string(key, value)?,
1621                "embedding_model_path" => s.embedding_model_path = as_string(key, value)?,
1622                "embedding_tokenizer_path" => {
1623                    s.embedding_tokenizer_path = as_string(key, value)?;
1624                }
1625                "embedding_api_version" => s.embedding_api_version = as_string(key, value)?,
1626                other => return Err(ConfigError::UnknownKey(other.to_string())),
1627            }
1628        }
1629        drop(s);
1630        self.bump_version();
1631        Ok(())
1632    }
1633
1634    /// Bulk-update vector DB config from a map. Matches Python `config.set_vector_db_config()`.
1635    pub fn set_vector_db_config(
1636        &self,
1637        values: &HashMap<String, serde_json::Value>,
1638    ) -> Result<(), ConfigError> {
1639        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1640        for (key, value) in values {
1641            match key.as_str() {
1642                "vector_db_provider" => s.vector_db_provider = as_string(key, value)?,
1643                "vector_db_url" => s.vector_db_url = as_string(key, value)?,
1644                "vector_db_key" => s.vector_db_key = as_string(key, value)?,
1645                "vector_db_host" => s.vector_db_host = as_string(key, value)?,
1646                "vector_db_port" => s.vector_db_port = as_u16(key, value)?,
1647                "vector_db_name" => s.vector_db_name = as_string(key, value)?,
1648                other => return Err(ConfigError::UnknownKey(other.to_string())),
1649            }
1650        }
1651        drop(s);
1652        self.bump_version();
1653        Ok(())
1654    }
1655
1656    /// Bulk-update graph DB config from a map. Matches Python `config.set_graph_db_config()`.
1657    pub fn set_graph_db_config(
1658        &self,
1659        values: &HashMap<String, serde_json::Value>,
1660    ) -> Result<(), ConfigError> {
1661        let mut s = self.inner.write().expect("lock poison is unrecoverable"); // lock poison is unrecoverable
1662        for (key, value) in values {
1663            match key.as_str() {
1664                "graph_database_provider" => s.graph_database_provider = as_string(key, value)?,
1665                "graph_model" => s.graph_model = as_string(key, value)?,
1666                "graph_file_path" => s.graph_file_path = as_string(key, value)?,
1667                other => return Err(ConfigError::UnknownKey(other.to_string())),
1668            }
1669        }
1670        drop(s);
1671        self.bump_version();
1672        Ok(())
1673    }
1674
1675    /// Generic setter matching Python's `config.set(key, value)`.
1676    ///
1677    /// Dispatches to the appropriate typed setter based on the key name.
1678    /// Returns `ConfigError::UnknownKey` for unrecognized keys and
1679    /// `ConfigError::TypeMismatch` when the JSON value type doesn't match.
1680    pub fn set(&self, key: &str, value: serde_json::Value) -> Result<(), ConfigError> {
1681        match key {
1682            // LLM
1683            "llm_provider" => self.set_llm_provider(as_string(key, &value)?.as_str()),
1684            "llm_model" => self.set_llm_model(as_string(key, &value)?.as_str()),
1685            "llm_api_key" => self.set_llm_api_key(as_string(key, &value)?.as_str()),
1686            "llm_endpoint" => self.set_llm_endpoint(as_string(key, &value)?.as_str()),
1687            // LLM tuning
1688            "llm_api_version" => self.set_llm_api_version(as_string(key, &value)?.as_str()),
1689            "llm_temperature" => self.set_llm_temperature(as_f64(key, &value)?),
1690            "llm_streaming" => self.set_llm_streaming(as_bool(key, &value)?),
1691            "llm_max_completion_tokens" => {
1692                self.set_llm_max_completion_tokens(as_u32(key, &value)?);
1693            }
1694            "llm_max_retries" => self.set_llm_max_retries(as_u32(key, &value)?),
1695            "llm_max_parallel_requests" => {
1696                self.set_llm_max_parallel_requests(as_u32(key, &value)?);
1697            }
1698            "llm_mock" => self.set_llm_mock(as_bool(key, &value)?),
1699            "llm_cassette" => self.set_llm_cassette(as_string(key, &value)?.as_str()),
1700            "llm_record_path" => self.set_llm_record_path(as_string(key, &value)?.as_str()),
1701            // Embedding
1702            "embedding_provider" => {
1703                self.set_embedding_provider(as_string(key, &value)?.as_str());
1704            }
1705            "embedding_model" | "embedding_model_name" => {
1706                self.set_embedding_model(as_string(key, &value)?.as_str());
1707            }
1708            "embedding_dimensions" => self.set_embedding_dimensions(as_u32(key, &value)?),
1709            "embedding_endpoint" => {
1710                self.set_embedding_endpoint(as_string(key, &value)?.as_str());
1711            }
1712            "embedding_api_key" => self.set_embedding_api_key(as_string(key, &value)?.as_str()),
1713            "embedding_model_path" => {
1714                self.set_embedding_model_path(as_string(key, &value)?.as_str());
1715            }
1716            "embedding_tokenizer_path" => {
1717                self.set_embedding_tokenizer_path(as_string(key, &value)?.as_str());
1718            }
1719            // Vector DB
1720            "vector_db_provider" => {
1721                self.set_vector_db_provider(as_string(key, &value)?.as_str());
1722            }
1723            "vector_db_url" => self.set_vector_db_url(as_string(key, &value)?.as_str()),
1724            "vector_db_key" => self.set_vector_db_key(as_string(key, &value)?.as_str()),
1725            "vector_db_host" => self.set_vector_db_host(as_string(key, &value)?.as_str()),
1726            "vector_db_port" => self.set_vector_db_port(as_u16(key, &value)?),
1727            "vector_db_name" => self.set_vector_db_name(as_string(key, &value)?.as_str()),
1728            // Graph DB
1729            "graph_database_provider" => {
1730                self.set_graph_database_provider(as_string(key, &value)?.as_str());
1731            }
1732            "graph_model" => self.set_graph_model(as_string(key, &value)?.as_str()),
1733            "graph_file_path" => self.set_graph_file_path(as_string(key, &value)?.as_str()),
1734            // Chunking
1735            "chunk_strategy" => self.set_chunk_strategy(as_string(key, &value)?.as_str()),
1736            "chunk_engine" => self.set_chunk_engine(as_string(key, &value)?.as_str()),
1737            "chunk_size" => self.set_chunk_size(as_u32(key, &value)?),
1738            "chunk_overlap" => self.set_chunk_overlap(as_u32(key, &value)?),
1739            // System paths
1740            "system_root_directory" => {
1741                self.set_system_root_directory(as_string(key, &value)?.as_str());
1742            }
1743            "data_root_directory" => {
1744                self.set_data_root_directory(as_string(key, &value)?.as_str());
1745            }
1746            "cache_root_directory" => {
1747                self.set_cache_root_directory(as_string(key, &value)?.as_str());
1748            }
1749            "logs_root_directory" => {
1750                self.set_logs_root_directory(as_string(key, &value)?.as_str());
1751            }
1752            "monitoring_tool" => self.set_monitoring_tool(as_string(key, &value)?.as_str()),
1753            // Ontology
1754            "ontology_file_path" => {
1755                self.set_ontology_file_path(as_string(key, &value)?.as_str());
1756            }
1757            "ontology_resolver" => {
1758                self.set_ontology_resolver(as_string(key, &value)?.as_str());
1759            }
1760            "ontology_matching_strategy" => {
1761                self.set_ontology_matching_strategy(as_string(key, &value)?.as_str());
1762            }
1763            // Embedding extras
1764            "embedding_api_version" => {
1765                self.set_embedding_api_version(as_string(key, &value)?.as_str());
1766            }
1767            "transcription_model" => {
1768                self.set_transcription_model(as_string(key, &value)?.as_str());
1769            }
1770            // LLM fallback
1771            "llm_fallback_model" => {
1772                self.set_llm_fallback_model(as_string(key, &value)?.as_str());
1773            }
1774            "llm_fallback_provider" => {
1775                self.set_llm_fallback_provider(as_string(key, &value)?.as_str());
1776            }
1777            "llm_fallback_endpoint" => {
1778                self.set_llm_fallback_endpoint(as_string(key, &value)?.as_str());
1779            }
1780            "llm_fallback_api_key" => {
1781                self.set_llm_fallback_api_key(as_string(key, &value)?.as_str());
1782            }
1783            // Relational DB
1784            "relational_db_url" => {
1785                self.set_relational_db_url(as_string(key, &value)?.as_str());
1786            }
1787            "migration_db_url" => {
1788                self.set_migration_db_config(as_string(key, &value)?.as_str());
1789            }
1790            // ML models
1791            "classification_model" => {
1792                self.set_classification_model(as_string(key, &value)?.as_str());
1793            }
1794            "summarization_model" => {
1795                self.set_summarization_model(as_string(key, &value)?.as_str());
1796            }
1797            _ => return Err(ConfigError::UnknownKey(key.to_string())),
1798        }
1799        Ok(())
1800    }
1801}
1802
1803#[cfg(test)]
1804#[allow(
1805    clippy::unwrap_used,
1806    clippy::expect_used,
1807    reason = "test code — panics are acceptable failures"
1808)]
1809mod tests {
1810    use super::*;
1811
1812    // Each test sets env vars and must clean up after itself.  `serial` prevents
1813    // parallel tests from seeing each other's env mutations.
1814
1815    #[test]
1816    #[serial_test::serial]
1817    fn overlay_picks_up_ontology_file_path() {
1818        // SAFETY: test is serial — no other thread reads/writes env concurrently.
1819        unsafe { std::env::set_var("ONTOLOGY_FILE_PATH", "/tmp/test.owl") };
1820        let mut s = Settings::default();
1821        s.overlay_from_env();
1822        unsafe { std::env::remove_var("ONTOLOGY_FILE_PATH") };
1823
1824        assert_eq!(s.ontology_file_path, "/tmp/test.owl");
1825        // resolver / strategy should stay at defaults when not set
1826        assert_eq!(s.ontology_resolver, "rdflib");
1827        assert_eq!(s.ontology_matching_strategy, "fuzzy");
1828    }
1829
1830    #[test]
1831    #[serial_test::serial]
1832    fn overlay_picks_up_ontology_resolver() {
1833        // SAFETY: test is serial — no other thread reads/writes env concurrently.
1834        unsafe { std::env::set_var("ONTOLOGY_RESOLVER", "custom") };
1835        let mut s = Settings::default();
1836        s.overlay_from_env();
1837        unsafe { std::env::remove_var("ONTOLOGY_RESOLVER") };
1838
1839        assert_eq!(s.ontology_resolver, "custom");
1840    }
1841
1842    #[test]
1843    #[serial_test::serial]
1844    fn overlay_picks_up_ontology_matching_strategy() {
1845        // SAFETY: test is serial — no other thread reads/writes env concurrently.
1846        unsafe { std::env::set_var("ONTOLOGY_MATCHING_STRATEGY", "exact") };
1847        let mut s = Settings::default();
1848        s.overlay_from_env();
1849        unsafe { std::env::remove_var("ONTOLOGY_MATCHING_STRATEGY") };
1850
1851        assert_eq!(s.ontology_matching_strategy, "exact");
1852    }
1853
1854    #[test]
1855    #[serial_test::serial]
1856    fn overlay_ignores_empty_ontology_file_path() {
1857        // SAFETY: test is serial — no other thread reads/writes env concurrently.
1858        unsafe { std::env::set_var("ONTOLOGY_FILE_PATH", "") };
1859        let mut s = Settings::default();
1860        s.overlay_from_env();
1861        unsafe { std::env::remove_var("ONTOLOGY_FILE_PATH") };
1862
1863        // Empty string must not override the default (the str_var helper filters
1864        // out empty values, so ontology_file_path remains its default empty string).
1865        assert_eq!(s.ontology_file_path, "");
1866    }
1867
1868    #[test]
1869    #[serial_test::serial]
1870    fn overlay_picks_up_cache_backend() {
1871        // SAFETY: test is serial — no other thread reads/writes env concurrently.
1872        unsafe { std::env::set_var("CACHE_BACKEND", "redis") };
1873        let mut s = Settings::default();
1874        s.overlay_from_env();
1875        unsafe { std::env::remove_var("CACHE_BACKEND") };
1876
1877        assert_eq!(s.cache_backend, "redis");
1878    }
1879
1880    #[test]
1881    #[serial_test::serial]
1882    fn overlay_llm_max_completion_tokens_primary() {
1883        // Primary env var takes precedence over the legacy alias.
1884        // SAFETY: test is serial — no other thread reads/writes env concurrently.
1885        unsafe { std::env::set_var("LLM_MAX_COMPLETION_TOKENS", "4096") };
1886        unsafe { std::env::set_var("LLM_MAX_TOKENS", "8192") };
1887        let mut s = Settings::default();
1888        s.overlay_from_env();
1889        unsafe { std::env::remove_var("LLM_MAX_COMPLETION_TOKENS") };
1890        unsafe { std::env::remove_var("LLM_MAX_TOKENS") };
1891
1892        assert_eq!(s.llm_max_completion_tokens, 4096);
1893    }
1894
1895    #[test]
1896    #[serial_test::serial]
1897    fn overlay_llm_max_completion_tokens_alias_fallback() {
1898        // When the primary is unset, the legacy alias is used.
1899        // SAFETY: test is serial — no other thread reads/writes env concurrently.
1900        unsafe { std::env::remove_var("LLM_MAX_COMPLETION_TOKENS") };
1901        unsafe { std::env::set_var("LLM_MAX_TOKENS", "2048") };
1902        let mut s = Settings::default();
1903        s.overlay_from_env();
1904        unsafe { std::env::remove_var("LLM_MAX_TOKENS") };
1905
1906        assert_eq!(s.llm_max_completion_tokens, 2048);
1907    }
1908
1909    #[test]
1910    #[serial_test::serial]
1911    fn overlay_llm_streaming_bool_parsing() {
1912        // SAFETY: test is serial — no other thread reads/writes env concurrently.
1913        for (input, expected) in [
1914            ("true", true),
1915            ("True", true),
1916            ("TRUE", true),
1917            ("1", true),
1918            ("yes", true),
1919            ("false", false),
1920            ("0", false),
1921            ("no", false),
1922        ] {
1923            unsafe { std::env::set_var("LLM_STREAMING", input) };
1924            let mut s = Settings::default();
1925            s.overlay_from_env();
1926            unsafe { std::env::remove_var("LLM_STREAMING") };
1927
1928            assert_eq!(
1929                s.llm_streaming, expected,
1930                "LLM_STREAMING={input} should parse to {expected}"
1931            );
1932        }
1933    }
1934
1935    // -- ConfigManager tests --------------------------------------------------
1936
1937    #[test]
1938    fn config_manager_version_starts_at_zero() {
1939        let cm = ConfigManager::new(Settings::default());
1940        assert_eq!(cm.version(), 0);
1941    }
1942
1943    #[test]
1944    fn config_manager_setter_bumps_version() {
1945        let cm = ConfigManager::new(Settings::default());
1946        cm.set_llm_model("gpt-4o");
1947        assert_eq!(cm.version(), 1);
1948        assert_eq!(cm.read().llm_model, "gpt-4o");
1949
1950        cm.set_llm_api_key("sk-test");
1951        assert_eq!(cm.version(), 2);
1952        assert_eq!(cm.read().llm_api_key, "sk-test");
1953    }
1954
1955    #[test]
1956    fn config_manager_clone_shares_state() {
1957        let cm1 = ConfigManager::new(Settings::default());
1958        let cm2 = cm1.clone();
1959
1960        cm1.set_llm_model("shared-model");
1961        assert_eq!(cm2.read().llm_model, "shared-model");
1962        assert_eq!(cm2.version(), 1);
1963    }
1964
1965    #[test]
1966    fn config_manager_cascading_system_root() {
1967        let settings = Settings {
1968            system_root_directory: "/old/root".to_string(),
1969            graph_file_path: "/old/root/graph".to_string(),
1970            vector_db_url: "/old/root/vectors".to_string(),
1971            ..Default::default()
1972        };
1973
1974        let cm = ConfigManager::new(settings);
1975        cm.set_system_root_directory("/new/root");
1976
1977        let s = cm.read();
1978        assert_eq!(s.system_root_directory, "/new/root");
1979        assert_eq!(s.graph_file_path, "/new/root/graph");
1980        assert_eq!(s.vector_db_url, "/new/root/vectors");
1981    }
1982
1983    #[test]
1984    fn config_manager_cascading_empty_graph_and_vector() {
1985        // When graph_file_path and vector_db_url are empty, cascading should
1986        // set them to defaults under the new system root.
1987        let cm = ConfigManager::new(Settings::default());
1988        cm.set_system_root_directory("/data/cognee");
1989
1990        let s = cm.read();
1991        assert_eq!(s.graph_file_path, "/data/cognee/graph");
1992        assert_eq!(s.vector_db_url, "/data/cognee/vectors");
1993    }
1994
1995    #[test]
1996    fn config_manager_no_cascade_when_custom_paths() {
1997        let settings = Settings {
1998            system_root_directory: "/old".to_string(),
1999            graph_file_path: "/custom/graph".to_string(), // not under /old
2000            vector_db_url: "/custom/vectors".to_string(), // not under /old
2001            ..Default::default()
2002        };
2003
2004        let cm = ConfigManager::new(settings);
2005        cm.set_system_root_directory("/new");
2006
2007        let s = cm.read();
2008        // Custom paths should NOT be cascaded
2009        assert_eq!(s.graph_file_path, "/custom/graph");
2010        assert_eq!(s.vector_db_url, "/custom/vectors");
2011    }
2012
2013    #[test]
2014    fn config_manager_generic_set_string() {
2015        let cm = ConfigManager::new(Settings::default());
2016        cm.set("llm_model", serde_json::Value::String("test-model".into()))
2017            .expect("set should succeed");
2018        assert_eq!(cm.read().llm_model, "test-model");
2019    }
2020
2021    #[test]
2022    fn config_manager_generic_set_u32() {
2023        let cm = ConfigManager::new(Settings::default());
2024        cm.set("chunk_size", serde_json::json!(2048))
2025            .expect("set should succeed");
2026        assert_eq!(cm.read().chunk_size, 2048);
2027    }
2028
2029    #[test]
2030    fn config_manager_generic_set_unknown_key() {
2031        let cm = ConfigManager::new(Settings::default());
2032        let result = cm.set("nonexistent_key", serde_json::json!("value"));
2033        assert!(result.is_err());
2034        match result.unwrap_err() {
2035            ConfigError::UnknownKey(k) => assert_eq!(k, "nonexistent_key"),
2036            other => panic!("expected UnknownKey, got: {other}"),
2037        }
2038    }
2039
2040    #[test]
2041    #[serial_test::serial]
2042    fn overlay_enable_backend_access_control() {
2043        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2044        unsafe { std::env::set_var("ENABLE_BACKEND_ACCESS_CONTROL", "true") };
2045        let mut s = Settings::default();
2046        s.overlay_from_env();
2047        unsafe { std::env::remove_var("ENABLE_BACKEND_ACCESS_CONTROL") };
2048
2049        assert!(s.enable_access_control);
2050
2051        // Also verify "1" works
2052        unsafe { std::env::set_var("ENABLE_BACKEND_ACCESS_CONTROL", "1") };
2053        let mut s2 = Settings::default();
2054        s2.overlay_from_env();
2055        unsafe { std::env::remove_var("ENABLE_BACKEND_ACCESS_CONTROL") };
2056
2057        assert!(s2.enable_access_control);
2058    }
2059
2060    #[test]
2061    fn config_manager_generic_set_type_mismatch() {
2062        let cm = ConfigManager::new(Settings::default());
2063        let result = cm.set("chunk_size", serde_json::json!("not a number"));
2064        assert!(result.is_err());
2065        match result.unwrap_err() {
2066            ConfigError::TypeMismatch { key, .. } => assert_eq!(key, "chunk_size"),
2067            other => panic!("expected TypeMismatch, got: {other}"),
2068        }
2069    }
2070
2071    #[test]
2072    fn config_manager_bulk_llm_config() {
2073        let cm = ConfigManager::new(Settings::default());
2074        let mut map = HashMap::new();
2075        map.insert("llm_model".into(), serde_json::json!("gpt-4o"));
2076        map.insert("llm_provider".into(), serde_json::json!("openai"));
2077        cm.set_llm_config(&map).expect("bulk set should succeed");
2078
2079        let s = cm.read();
2080        assert_eq!(s.llm_model, "gpt-4o");
2081        assert_eq!(s.llm_provider, "openai");
2082    }
2083
2084    #[test]
2085    fn config_manager_bulk_embedding_config() {
2086        let cm = ConfigManager::new(Settings::default());
2087        let mut map = HashMap::new();
2088        map.insert("embedding_provider".into(), serde_json::json!("openai"));
2089        map.insert("embedding_dimensions".into(), serde_json::json!(1536));
2090        cm.set_embedding_config(&map)
2091            .expect("bulk set should succeed");
2092
2093        let s = cm.read();
2094        assert_eq!(s.embedding_provider, "openai");
2095        assert_eq!(s.embedding_dimensions, 1536);
2096    }
2097
2098    // -- Option B widening: granular setters ----------------------------------
2099
2100    #[test]
2101    fn config_manager_new_granular_setters_bump_version() {
2102        let cm = ConfigManager::new(Settings::default());
2103        let mut expected_version = 0u64;
2104
2105        cm.set_llm_api_version("2024-02-15");
2106        expected_version += 1;
2107        assert_eq!(cm.read().llm_api_version, "2024-02-15");
2108
2109        cm.set_llm_temperature(0.7);
2110        expected_version += 1;
2111        assert!((cm.read().llm_temperature - 0.7).abs() < f64::EPSILON);
2112
2113        cm.set_llm_streaming(true);
2114        expected_version += 1;
2115        assert!(cm.read().llm_streaming);
2116
2117        cm.set_llm_max_completion_tokens(2048);
2118        expected_version += 1;
2119        assert_eq!(cm.read().llm_max_completion_tokens, 2048);
2120
2121        cm.set_llm_max_retries(5);
2122        expected_version += 1;
2123        assert_eq!(cm.read().llm_max_retries, 5);
2124
2125        cm.set_llm_max_parallel_requests(8);
2126        expected_version += 1;
2127        assert_eq!(cm.read().llm_max_parallel_requests, 8);
2128
2129        cm.set_embedding_model_path("/models/m.onnx");
2130        expected_version += 1;
2131        assert_eq!(cm.read().embedding_model_path, "/models/m.onnx");
2132
2133        cm.set_embedding_tokenizer_path("/models/t.json");
2134        expected_version += 1;
2135        assert_eq!(cm.read().embedding_tokenizer_path, "/models/t.json");
2136
2137        cm.set_vector_db_host("localhost");
2138        expected_version += 1;
2139        assert_eq!(cm.read().vector_db_host, "localhost");
2140
2141        cm.set_vector_db_port(6333);
2142        expected_version += 1;
2143        assert_eq!(cm.read().vector_db_port, 6333);
2144
2145        cm.set_vector_db_name("my_collection");
2146        expected_version += 1;
2147        assert_eq!(cm.read().vector_db_name, "my_collection");
2148
2149        cm.set_graph_file_path("/data/graph");
2150        expected_version += 1;
2151        assert_eq!(cm.read().graph_file_path, "/data/graph");
2152
2153        cm.set_cache_root_directory("/tmp/cache");
2154        expected_version += 1;
2155        assert_eq!(cm.read().cache_root_directory, "/tmp/cache");
2156
2157        cm.set_logs_root_directory("/tmp/logs");
2158        expected_version += 1;
2159        assert_eq!(cm.read().logs_root_directory, "/tmp/logs");
2160
2161        cm.set_ontology_file_path("/onto.owl");
2162        expected_version += 1;
2163        assert_eq!(cm.read().ontology_file_path, "/onto.owl");
2164
2165        cm.set_ontology_resolver("custom");
2166        expected_version += 1;
2167        assert_eq!(cm.read().ontology_resolver, "custom");
2168
2169        cm.set_ontology_matching_strategy("exact");
2170        expected_version += 1;
2171        assert_eq!(cm.read().ontology_matching_strategy, "exact");
2172
2173        // Every granular setter must have bumped the version exactly once.
2174        assert_eq!(cm.version(), expected_version);
2175    }
2176
2177    #[test]
2178    fn config_manager_set_graph_file_path_does_not_cascade() {
2179        let settings = Settings {
2180            system_root_directory: "/root".to_string(),
2181            vector_db_url: "/root/vectors".to_string(),
2182            ..Default::default()
2183        };
2184        let cm = ConfigManager::new(settings);
2185        cm.set_graph_file_path("/elsewhere/graph");
2186
2187        let s = cm.read();
2188        assert_eq!(s.graph_file_path, "/elsewhere/graph");
2189        // Unlike set_system_root_directory, vector_db_url is untouched.
2190        assert_eq!(s.vector_db_url, "/root/vectors");
2191        assert_eq!(s.system_root_directory, "/root");
2192    }
2193
2194    // -- Option B widening: generic set() dispatch ----------------------------
2195
2196    #[test]
2197    fn config_manager_generic_set_new_keys() {
2198        let cm = ConfigManager::new(Settings::default());
2199
2200        cm.set("llm_temperature", serde_json::json!(0.5))
2201            .expect("llm_temperature should be settable");
2202        assert!((cm.read().llm_temperature - 0.5).abs() < f64::EPSILON);
2203
2204        cm.set("llm_streaming", serde_json::json!(true))
2205            .expect("llm_streaming should be settable");
2206        assert!(cm.read().llm_streaming);
2207
2208        cm.set("llm_max_retries", serde_json::json!(7))
2209            .expect("llm_max_retries should be settable");
2210        assert_eq!(cm.read().llm_max_retries, 7);
2211
2212        cm.set("vector_db_host", serde_json::json!("host"))
2213            .expect("vector_db_host should be settable");
2214        assert_eq!(cm.read().vector_db_host, "host");
2215
2216        cm.set("vector_db_port", serde_json::json!(6333))
2217            .expect("vector_db_port should be settable");
2218        assert_eq!(cm.read().vector_db_port, 6333);
2219
2220        cm.set("graph_file_path", serde_json::json!("/g"))
2221            .expect("graph_file_path should be settable");
2222        assert_eq!(cm.read().graph_file_path, "/g");
2223
2224        cm.set("cache_root_directory", serde_json::json!("/c"))
2225            .expect("cache_root_directory should be settable");
2226        assert_eq!(cm.read().cache_root_directory, "/c");
2227
2228        cm.set("logs_root_directory", serde_json::json!("/l"))
2229            .expect("logs_root_directory should be settable");
2230        assert_eq!(cm.read().logs_root_directory, "/l");
2231
2232        cm.set("ontology_file_path", serde_json::json!("/o.owl"))
2233            .expect("ontology_file_path should be settable");
2234        assert_eq!(cm.read().ontology_file_path, "/o.owl");
2235
2236        cm.set("embedding_model_path", serde_json::json!("/m.onnx"))
2237            .expect("embedding_model_path should be settable");
2238        assert_eq!(cm.read().embedding_model_path, "/m.onnx");
2239    }
2240
2241    #[test]
2242    fn config_manager_generic_set_u16_type_mismatch() {
2243        let cm = ConfigManager::new(Settings::default());
2244        let result = cm.set("vector_db_port", serde_json::json!("not a number"));
2245        match result.unwrap_err() {
2246            ConfigError::TypeMismatch { key, .. } => assert_eq!(key, "vector_db_port"),
2247            other => panic!("expected TypeMismatch, got: {other}"),
2248        }
2249    }
2250
2251    // -- Option B widening: bulk setter allowlists ----------------------------
2252
2253    #[test]
2254    fn config_manager_bulk_llm_config_new_keys() {
2255        let cm = ConfigManager::new(Settings::default());
2256        let mut map = HashMap::new();
2257        map.insert("llm_streaming".into(), serde_json::json!(true));
2258        map.insert("llm_max_retries".into(), serde_json::json!(9));
2259        map.insert("llm_max_parallel_requests".into(), serde_json::json!(3));
2260        cm.set_llm_config(&map).expect("bulk set should succeed");
2261
2262        let s = cm.read();
2263        assert!(s.llm_streaming);
2264        assert_eq!(s.llm_max_retries, 9);
2265        assert_eq!(s.llm_max_parallel_requests, 3);
2266    }
2267
2268    #[test]
2269    fn config_manager_bulk_vector_db_config_new_keys() {
2270        let cm = ConfigManager::new(Settings::default());
2271        let mut map = HashMap::new();
2272        map.insert("vector_db_host".into(), serde_json::json!("vhost"));
2273        map.insert("vector_db_port".into(), serde_json::json!(6333));
2274        map.insert("vector_db_name".into(), serde_json::json!("coll"));
2275        cm.set_vector_db_config(&map)
2276            .expect("bulk set should succeed");
2277
2278        let s = cm.read();
2279        assert_eq!(s.vector_db_host, "vhost");
2280        assert_eq!(s.vector_db_port, 6333);
2281        assert_eq!(s.vector_db_name, "coll");
2282    }
2283
2284    #[test]
2285    fn config_manager_bulk_embedding_config_new_keys() {
2286        let cm = ConfigManager::new(Settings::default());
2287        let mut map = HashMap::new();
2288        map.insert("embedding_model_path".into(), serde_json::json!("/m.onnx"));
2289        map.insert(
2290            "embedding_tokenizer_path".into(),
2291            serde_json::json!("/t.json"),
2292        );
2293        cm.set_embedding_config(&map)
2294            .expect("bulk set should succeed");
2295
2296        let s = cm.read();
2297        assert_eq!(s.embedding_model_path, "/m.onnx");
2298        assert_eq!(s.embedding_tokenizer_path, "/t.json");
2299    }
2300
2301    #[test]
2302    fn config_manager_bulk_llm_config_rejects_out_of_subset_key() {
2303        // A vector key fed to set_llm_config must be rejected as UnknownKey.
2304        let cm = ConfigManager::new(Settings::default());
2305        let mut map = HashMap::new();
2306        map.insert("vector_db_url".into(), serde_json::json!("/v"));
2307        match cm.set_llm_config(&map).unwrap_err() {
2308            ConfigError::UnknownKey(k) => assert_eq!(k, "vector_db_url"),
2309            other => panic!("expected UnknownKey, got: {other}"),
2310        }
2311    }
2312
2313    #[test]
2314    fn config_manager_embedding_fields_default() {
2315        let s = Settings::default();
2316        // Default provider: OpenAI everywhere except Android (local ONNX/edge).
2317        #[cfg(not(target_os = "android"))]
2318        {
2319            assert_eq!(s.embedding_provider, "openai");
2320            assert_eq!(s.embedding_model_name, "text-embedding-3-small");
2321            assert_eq!(s.embedding_dimensions, 1536);
2322        }
2323        #[cfg(target_os = "android")]
2324        {
2325            assert_eq!(s.embedding_provider, "onnx");
2326            assert_eq!(s.embedding_dimensions, 384);
2327        }
2328        // No embedding-specific endpoint/key by default — they fall back to the
2329        // LLM provider's at engine-build time.
2330        assert_eq!(s.embedding_endpoint, "");
2331        assert_eq!(s.embedding_api_key, "");
2332    }
2333
2334    #[test]
2335    #[serial_test::serial]
2336    fn overlay_picks_up_embedding_provider() {
2337        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2338        unsafe { std::env::set_var("EMBEDDING_PROVIDER", "openai") };
2339        let mut s = Settings::default();
2340        s.overlay_from_env();
2341        unsafe { std::env::remove_var("EMBEDDING_PROVIDER") };
2342
2343        assert_eq!(s.embedding_provider, "openai");
2344    }
2345
2346    #[test]
2347    #[serial_test::serial]
2348    fn overlay_picks_up_log_level() {
2349        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2350        unsafe { std::env::set_var("LOG_LEVEL", "debug") };
2351        let mut s = Settings::default();
2352        s.overlay_from_env();
2353        unsafe { std::env::remove_var("LOG_LEVEL") };
2354
2355        assert_eq!(s.log_level, "debug");
2356    }
2357
2358    #[test]
2359    #[serial_test::serial]
2360    fn overlay_picks_up_cognee_logs_dir() {
2361        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2362        unsafe { std::env::set_var("COGNEE_LOGS_DIR", "/tmp/logs") };
2363        let mut s = Settings::default();
2364        s.overlay_from_env();
2365        unsafe { std::env::remove_var("COGNEE_LOGS_DIR") };
2366
2367        assert_eq!(s.logs_root_directory, "/tmp/logs");
2368    }
2369
2370    #[test]
2371    #[serial_test::serial]
2372    fn overlay_picks_up_cache_root_directory() {
2373        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2374        unsafe { std::env::set_var("CACHE_ROOT_DIRECTORY", "/tmp/cache") };
2375        let mut s = Settings::default();
2376        s.overlay_from_env();
2377        unsafe { std::env::remove_var("CACHE_ROOT_DIRECTORY") };
2378
2379        assert_eq!(s.cache_root_directory, "/tmp/cache");
2380    }
2381
2382    #[test]
2383    #[serial_test::serial]
2384    fn overlay_picks_up_enable_last_accessed() {
2385        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2386        unsafe { std::env::set_var("ENABLE_LAST_ACCESSED", "yes") };
2387        let mut s = Settings::default();
2388        s.overlay_from_env();
2389        unsafe { std::env::remove_var("ENABLE_LAST_ACCESSED") };
2390
2391        assert!(s.enable_last_accessed);
2392    }
2393
2394    #[test]
2395    #[serial_test::serial]
2396    fn overlay_picks_up_otel_service_name() {
2397        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2398        unsafe { std::env::set_var("OTEL_SERVICE_NAME", "my-service") };
2399        let mut s = Settings::default();
2400        s.overlay_from_env();
2401        unsafe { std::env::remove_var("OTEL_SERVICE_NAME") };
2402
2403        assert_eq!(s.otel_service_name, "my-service");
2404    }
2405
2406    #[test]
2407    #[serial_test::serial]
2408    fn overlay_picks_up_otel_exporter_otlp_endpoint() {
2409        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2410        unsafe { std::env::set_var("OTEL_EXPORTER_OTLP_ENDPOINT", "http://collector:4317") };
2411        let mut s = Settings::default();
2412        s.overlay_from_env();
2413        unsafe { std::env::remove_var("OTEL_EXPORTER_OTLP_ENDPOINT") };
2414
2415        assert_eq!(s.otel_exporter_otlp_endpoint, "http://collector:4317");
2416    }
2417
2418    #[test]
2419    #[serial_test::serial]
2420    fn overlay_picks_up_otel_exporter_otlp_headers() {
2421        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2422        unsafe {
2423            std::env::set_var(
2424                "OTEL_EXPORTER_OTLP_HEADERS",
2425                "authorization=Bearer abc,x-trace=on",
2426            )
2427        };
2428        let mut s = Settings::default();
2429        s.overlay_from_env();
2430        unsafe { std::env::remove_var("OTEL_EXPORTER_OTLP_HEADERS") };
2431
2432        assert_eq!(
2433            s.otel_exporter_otlp_headers,
2434            "authorization=Bearer abc,x-trace=on"
2435        );
2436    }
2437
2438    #[test]
2439    #[serial_test::serial]
2440    fn overlay_picks_up_otel_exporter_otlp_protocol() {
2441        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2442        unsafe { std::env::set_var("OTEL_EXPORTER_OTLP_PROTOCOL", "http/protobuf") };
2443        let mut s = Settings::default();
2444        s.overlay_from_env();
2445        unsafe { std::env::remove_var("OTEL_EXPORTER_OTLP_PROTOCOL") };
2446
2447        assert_eq!(s.otel_exporter_otlp_protocol, "http/protobuf");
2448    }
2449
2450    #[test]
2451    #[serial_test::serial]
2452    fn overlay_picks_up_otel_span_processor() {
2453        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2454        unsafe { std::env::set_var("OTEL_SPAN_PROCESSOR", "simple") };
2455        let mut s = Settings::default();
2456        s.overlay_from_env();
2457        unsafe { std::env::remove_var("OTEL_SPAN_PROCESSOR") };
2458
2459        assert_eq!(s.otel_span_processor, "simple");
2460    }
2461
2462    #[test]
2463    #[serial_test::serial]
2464    fn overlay_picks_up_otel_traces_sampler() {
2465        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2466        unsafe { std::env::set_var("OTEL_TRACES_SAMPLER", "parentbased_traceidratio") };
2467        unsafe { std::env::set_var("OTEL_TRACES_SAMPLER_ARG", "0.25") };
2468        let mut s = Settings::default();
2469        s.overlay_from_env();
2470        unsafe { std::env::remove_var("OTEL_TRACES_SAMPLER") };
2471        unsafe { std::env::remove_var("OTEL_TRACES_SAMPLER_ARG") };
2472
2473        assert_eq!(s.otel_traces_sampler, "parentbased_traceidratio");
2474        assert_eq!(s.otel_traces_sampler_arg, "0.25");
2475    }
2476
2477    #[test]
2478    #[serial_test::serial]
2479    fn overlay_picks_up_rate_limit_requests() {
2480        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2481        unsafe { std::env::set_var("LLM_RATE_LIMIT_REQUESTS", "120") };
2482        unsafe { std::env::set_var("EMBEDDING_RATE_LIMIT_REQUESTS", "30") };
2483        let mut s = Settings::default();
2484        s.overlay_from_env();
2485        unsafe { std::env::remove_var("LLM_RATE_LIMIT_REQUESTS") };
2486        unsafe { std::env::remove_var("EMBEDDING_RATE_LIMIT_REQUESTS") };
2487
2488        assert_eq!(s.llm_rate_limit_requests, 120);
2489        assert_eq!(s.embedding_rate_limit_requests, 30);
2490    }
2491
2492    #[test]
2493    #[serial_test::serial]
2494    fn overlay_picks_up_storage_backend() {
2495        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2496        unsafe { std::env::set_var("STORAGE_BACKEND", "s3") };
2497        unsafe { std::env::set_var("STORAGE_BUCKET_NAME", "my-bucket") };
2498        let mut s = Settings::default();
2499        s.overlay_from_env();
2500        unsafe { std::env::remove_var("STORAGE_BACKEND") };
2501        unsafe { std::env::remove_var("STORAGE_BUCKET_NAME") };
2502
2503        assert_eq!(s.storage_backend, "s3");
2504        assert_eq!(s.storage_bucket_name, "my-bucket");
2505    }
2506
2507    #[test]
2508    fn default_values_are_correct() {
2509        let s = Settings::default();
2510        assert_eq!(s.cache_backend, "fs");
2511        assert_eq!(s.cache_host, "localhost");
2512        assert_eq!(s.cache_port, 6379);
2513        assert_eq!(s.session_ttl_seconds, 604800);
2514        assert!(s.enable_caching);
2515        assert!(!s.auto_feedback);
2516        assert!(!s.enable_access_control);
2517        assert_eq!(s.log_level, "info");
2518        assert!(!s.llm_rate_limit_enabled);
2519        assert_eq!(s.llm_rate_limit_requests, 60);
2520        assert_eq!(s.llm_rate_limit_interval, 60);
2521        assert!(!s.embedding_rate_limit_enabled);
2522        assert_eq!(s.embedding_rate_limit_requests, 60);
2523        assert_eq!(s.embedding_rate_limit_interval, 60);
2524        assert_eq!(s.storage_backend, "local");
2525        assert!(!s.cognee_tracing_enabled);
2526        assert_eq!(s.otel_service_name, "cognee");
2527        assert_eq!(s.otel_exporter_otlp_endpoint, "");
2528        assert_eq!(s.otel_exporter_otlp_headers, "");
2529        assert_eq!(s.otel_exporter_otlp_protocol, "grpc");
2530        assert_eq!(s.otel_span_processor, "batch");
2531        assert_eq!(s.otel_traces_sampler, "");
2532        assert_eq!(s.otel_traces_sampler_arg, "");
2533        assert!(!s.enable_last_accessed);
2534        #[cfg(not(target_os = "android"))]
2535        assert_eq!(s.embedding_provider, "openai");
2536        #[cfg(target_os = "android")]
2537        assert_eq!(s.embedding_provider, "onnx");
2538    }
2539
2540    #[test]
2541    #[serial_test::serial]
2542    fn overlay_picks_up_embedding_endpoint() {
2543        // SAFETY: test is serial — no other thread reads/writes env concurrently.
2544        unsafe { std::env::set_var("EMBEDDING_ENDPOINT", "https://api.example.com/embed") };
2545        let mut s = Settings::default();
2546        s.overlay_from_env();
2547        unsafe { std::env::remove_var("EMBEDDING_ENDPOINT") };
2548
2549        assert_eq!(s.embedding_endpoint, "https://api.example.com/embed");
2550    }
2551
2552    #[test]
2553    fn telemetry_snapshot_only_emits_allowlisted_keys() {
2554        let cfg = Settings::default();
2555        let snap = cfg.telemetry_snapshot();
2556        let keys: std::collections::BTreeSet<&str> = snap.keys().map(String::as_str).collect();
2557        let expected: std::collections::BTreeSet<&str> = [
2558            "sdk_runtime",
2559            "vector_db_provider",
2560            "graph_db_provider",
2561            "relational_db_provider",
2562            "llm_provider",
2563            "llm_model",
2564            "embedding_provider",
2565            "embedding_model",
2566            "embedding_dimensions",
2567            "chunk_strategy",
2568        ]
2569        .iter()
2570        .copied()
2571        .collect();
2572        assert_eq!(
2573            keys, expected,
2574            "telemetry_snapshot must not leak fields outside the allowlist"
2575        );
2576    }
2577
2578    #[test]
2579    fn telemetry_snapshot_redacts_credentials_and_urls() {
2580        let cfg = Settings {
2581            llm_api_key: "sk-secret".into(),
2582            embedding_api_key: "sk-also-secret".into(),
2583            vector_db_password: "vector-pass".into(),
2584            db_password: "db-pass".into(),
2585            relational_db_url: "postgres://user:pass@host/db".into(),
2586            embedding_endpoint: "https://internal.example/v1/embed".into(),
2587            ..Settings::default()
2588        };
2589
2590        let snap = cfg.telemetry_snapshot();
2591        let json =
2592            serde_json::to_string(&snap).expect("serde_json::Map<String,Value> always serializes");
2593        for forbidden in [
2594            "sk-secret",
2595            "sk-also-secret",
2596            "vector-pass",
2597            "db-pass",
2598            "postgres://",
2599            "internal.example",
2600        ] {
2601            assert!(
2602                !json.contains(forbidden),
2603                "telemetry_snapshot leaked credential/URL substring: {forbidden}"
2604            );
2605        }
2606    }
2607
2608    #[test]
2609    fn telemetry_snapshot_carries_sdk_runtime_rust() {
2610        let cfg = Settings::default();
2611        let snap = cfg.telemetry_snapshot();
2612        assert_eq!(
2613            snap.get("sdk_runtime"),
2614            Some(&serde_json::Value::String("rust".into()))
2615        );
2616    }
2617
2618    #[test]
2619    fn test_config_defaults_match_expected_values() {
2620        let settings = Settings::default();
2621        assert_eq!(settings.graph_database_provider, "ladybug");
2622        assert_eq!(settings.logs_root_directory, "./logs");
2623    }
2624
2625    #[test]
2626    fn test_get_settings_masks_secrets() {
2627        let cfg = ConfigManager::new(Settings::default());
2628        cfg.set_llm_api_key("my-secret-key");
2629        let settings = cfg.get_settings();
2630        let api_key = settings
2631            .get("llm_api_key")
2632            .and_then(|v| v.as_str())
2633            .unwrap_or("");
2634        assert_ne!(api_key, "my-secret-key", "API key must be masked");
2635        // A short key with no recognizable pattern passes through unchanged --
2636        // "my-secret-key" is not an OpenAI/bearer/password-prefixed value, so
2637        // redact() leaves it alone. What matters is the field is present.
2638        assert!(!api_key.is_empty(), "api_key field must be non-empty");
2639    }
2640
2641    #[test]
2642    fn test_get_settings_masks_url_credentials() {
2643        let cfg = ConfigManager::new(Settings::default());
2644        cfg.set_relational_db_url("postgres://admin:s3cret@db.example.com:5432/cognee");
2645        let settings = cfg.get_settings();
2646        let url = settings
2647            .get("relational_db_url")
2648            .and_then(|v| v.as_str())
2649            .unwrap_or("");
2650        assert!(
2651            !url.contains("s3cret") && !url.contains("admin"),
2652            "URL credentials must be masked, got: {url}"
2653        );
2654        assert!(
2655            url.contains("db.example.com") && url.contains("<redacted>"),
2656            "host must remain and userinfo redacted, got: {url}"
2657        );
2658        // A credential-free URL passes through unchanged.
2659        let cfg2 = ConfigManager::new(Settings::default());
2660        cfg2.set_relational_db_url("sqlite:///tmp/test.db");
2661        let s2 = cfg2.get_settings();
2662        assert_eq!(
2663            s2.get("relational_db_url").and_then(|v| v.as_str()),
2664            Some("sqlite:///tmp/test.db")
2665        );
2666    }
2667
2668    #[test]
2669    fn test_set_relational_db_config_bulk() {
2670        let cfg = ConfigManager::new(Settings::default());
2671        cfg.set_relational_db_config(
2672            Some("sqlite:///tmp/test.db"),
2673            Some("sqlite"),
2674            None,
2675            None,
2676            None,
2677            None,
2678            None,
2679        );
2680        let s = cfg.read();
2681        assert_eq!(s.relational_db_url, "sqlite:///tmp/test.db");
2682        assert_eq!(s.db_provider, "sqlite");
2683    }
2684
2685    #[test]
2686    fn test_llm_fallback_setters() {
2687        let cfg = ConfigManager::new(Settings::default());
2688        cfg.set_llm_fallback_model("gpt-4o-mini");
2689        cfg.set_llm_fallback_provider("openai");
2690        cfg.set_llm_fallback_endpoint("https://fallback.example.com/v1");
2691        cfg.set_llm_fallback_api_key("fallback-key");
2692        let s = cfg.read();
2693        assert_eq!(s.llm_fallback_model, "gpt-4o-mini");
2694        assert_eq!(s.llm_fallback_provider, "openai");
2695        assert_eq!(s.llm_fallback_endpoint, "https://fallback.example.com/v1");
2696        assert_eq!(s.llm_fallback_api_key, "fallback-key");
2697    }
2698
2699    #[test]
2700    fn test_embedding_api_version_setter() {
2701        let cfg = ConfigManager::new(Settings::default());
2702        cfg.set_embedding_api_version("2024-02-15");
2703        assert_eq!(cfg.read().embedding_api_version, "2024-02-15");
2704    }
2705
2706    #[test]
2707    fn test_transcription_model_setter() {
2708        let cfg = ConfigManager::new(Settings::default());
2709        cfg.set_transcription_model("whisper-1");
2710        assert_eq!(cfg.read().transcription_model, "whisper-1");
2711    }
2712
2713    #[test]
2714    fn test_migration_db_config_setter() {
2715        let cfg = ConfigManager::new(Settings::default());
2716        cfg.set_migration_db_config("postgres://localhost/migrations");
2717        assert_eq!(
2718            cfg.read().migration_db_url,
2719            "postgres://localhost/migrations"
2720        );
2721    }
2722}