Skip to main content

ucotron_config/
lib.rs

1//! # Ucotron Config
2//!
3//! Configuration system for the Ucotron cognitive memory framework.
4//!
5//! Provides TOML-based configuration parsing and validation for the server,
6//! storage backends, model pipelines, consolidation settings, namespaces, and auth.
7//!
8//! # Configuration Schema
9//!
10//! The configuration file (`ucotron.toml`) supports the following sections:
11//! - `[server]` — HTTP server settings (host, port, workers, log_level)
12//! - `[storage]` — Storage backend selection and settings
13//! - `[models]` — Embedding, NER, and LLM model configuration
14//! - `[consolidation]` — Background consolidation worker settings
15//! - `[namespaces]` — Multi-tenancy namespace configuration
16//! - `[auth]` — Authentication settings (API key, JWT)
17//! - `[mcp]` — MCP (Model Context Protocol) server settings
18//! - `[telemetry]` — OpenTelemetry tracing and metrics export
19//!
20//! # Environment Variable Overrides
21//!
22//! Every config field can be overridden via environment variables using the
23//! `UCOTRON_` prefix and `_` as section separator:
24//! - `UCOTRON_SERVER_HOST` → `server.host`
25//! - `UCOTRON_SERVER_PORT` → `server.port`
26//! - `UCOTRON_SERVER_WORKERS` → `server.workers`
27//! - `UCOTRON_SERVER_LOG_LEVEL` → `server.log_level`
28//! - `UCOTRON_SERVER_LOG_FORMAT` → `server.log_format`
29//! - `UCOTRON_STORAGE_MODE` → `storage.mode`
30//! - `UCOTRON_MODELS_DIR` → `models.models_dir`
31//! - `UCOTRON_CONSOLIDATION_TRIGGER_INTERVAL` → `consolidation.trigger_interval`
32//! - `UCOTRON_AUTH_API_KEY` → `auth.api_key`
33//! - etc.
34
35use serde::{Deserialize, Serialize};
36
37/// Top-level Ucotron configuration.
38///
39/// Parsed from `ucotron.toml` or constructed programmatically.
40/// Environment variables with the `UCOTRON_` prefix override TOML values.
41#[derive(Debug, Clone, Default, Serialize, Deserialize)]
42pub struct UcotronConfig {
43    /// HTTP server settings.
44    #[serde(default)]
45    pub server: ServerConfig,
46    /// Storage backend configuration.
47    #[serde(default)]
48    pub storage: StorageConfig,
49    /// ML model configuration.
50    #[serde(default)]
51    pub models: ModelsConfig,
52    /// Background consolidation settings.
53    #[serde(default)]
54    pub consolidation: ConsolidationConfig,
55    /// MCP server settings.
56    #[serde(default)]
57    pub mcp: McpConfig,
58    /// Multi-tenancy namespace configuration.
59    #[serde(default)]
60    pub namespaces: NamespacesConfig,
61    /// Authentication settings.
62    #[serde(default)]
63    pub auth: AuthConfig,
64    /// Multi-instance configuration.
65    #[serde(default)]
66    pub instance: InstanceConfig,
67    /// GDPR compliance configuration.
68    #[serde(default)]
69    pub gdpr: GdprConfig,
70    /// Audit logging configuration.
71    #[serde(default)]
72    pub audit: AuditConfig,
73    /// OpenTelemetry configuration.
74    #[serde(default)]
75    pub telemetry: TelemetryConfig,
76    /// Mindset auto-detection configuration.
77    #[serde(default)]
78    pub mindset: MindsetDetectorConfig,
79    /// Connector scheduling configuration.
80    #[serde(default)]
81    pub connectors: ConnectorsConfig,
82}
83
84/// HTTP server configuration.
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct ServerConfig {
87    /// Bind address (default: "0.0.0.0").
88    #[serde(default = "default_host")]
89    pub host: String,
90    /// HTTP port (default: 8420).
91    #[serde(default = "default_port")]
92    pub port: u16,
93    /// Number of worker threads (default: 4).
94    #[serde(default = "default_workers")]
95    pub workers: usize,
96    /// Log level (default: "info").
97    #[serde(default = "default_log_level")]
98    pub log_level: String,
99    /// Log format: "text" (default) or "json" for structured JSON logging with trace IDs.
100    #[serde(default = "default_log_format")]
101    pub log_format: String,
102}
103
104impl Default for ServerConfig {
105    fn default() -> Self {
106        Self {
107            host: default_host(),
108            port: default_port(),
109            workers: default_workers(),
110            log_level: default_log_level(),
111            log_format: default_log_format(),
112        }
113    }
114}
115
116fn default_host() -> String {
117    "0.0.0.0".to_string()
118}
119fn default_port() -> u16 {
120    8420
121}
122fn default_workers() -> usize {
123    4
124}
125fn default_log_level() -> String {
126    "info".to_string()
127}
128fn default_log_format() -> String {
129    "text".to_string()
130}
131
132/// Storage backend configuration.
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct StorageConfig {
135    /// Storage mode: "embedded" (default) or "external".
136    #[serde(default = "default_storage_mode")]
137    pub mode: String,
138    /// Shared data directory for multi-instance mode.
139    /// When `mode = "shared"`, all instances must point to the same directory.
140    /// Both vector and graph backends will use sub-directories under this path.
141    /// If set, overrides `vector.data_dir` and `graph.data_dir`.
142    #[serde(default)]
143    pub shared_data_dir: Option<String>,
144    /// Directory for persisting uploaded media files (images, audio, video).
145    /// Defaults to "data/media". Files are stored as `{node_id}.{ext}`.
146    #[serde(default = "default_media_dir")]
147    pub media_dir: String,
148    /// Vector backend configuration.
149    #[serde(default)]
150    pub vector: VectorBackendConfig,
151    /// Graph backend configuration.
152    #[serde(default)]
153    pub graph: GraphBackendConfig,
154}
155
156impl Default for StorageConfig {
157    fn default() -> Self {
158        Self {
159            mode: default_storage_mode(),
160            shared_data_dir: None,
161            media_dir: default_media_dir(),
162            vector: VectorBackendConfig::default(),
163            graph: GraphBackendConfig::default(),
164        }
165    }
166}
167
168impl StorageConfig {
169    /// Returns the effective data directory for vector storage.
170    /// In shared mode with `shared_data_dir` set, returns the shared path.
171    /// Otherwise returns the backend's own `data_dir`.
172    pub fn effective_vector_data_dir(&self) -> &str {
173        if self.mode == "shared" {
174            if let Some(ref dir) = self.shared_data_dir {
175                return dir;
176            }
177        }
178        &self.vector.data_dir
179    }
180
181    /// Returns the effective data directory for graph storage.
182    /// In shared mode with `shared_data_dir` set, returns the shared path.
183    /// Otherwise returns the backend's own `data_dir`.
184    pub fn effective_graph_data_dir(&self) -> &str {
185        if self.mode == "shared" {
186            if let Some(ref dir) = self.shared_data_dir {
187                return dir;
188            }
189        }
190        &self.graph.data_dir
191    }
192
193    /// Returns the media storage directory path.
194    pub fn effective_media_dir(&self) -> &str {
195        &self.media_dir
196    }
197}
198
199fn default_storage_mode() -> String {
200    "embedded".to_string()
201}
202
203/// Vector backend settings.
204#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct VectorBackendConfig {
206    /// Backend type: "helix" (default), "qdrant", "custom".
207    #[serde(default = "default_backend")]
208    pub backend: String,
209    /// Data directory for embedded backends.
210    #[serde(default = "default_data_dir")]
211    pub data_dir: String,
212    /// Maximum database size in bytes (for LMDB map_size).
213    #[serde(default = "default_max_db_size")]
214    pub max_db_size: u64,
215    /// External service URL (for qdrant, etc.).
216    pub url: Option<String>,
217    /// HNSW configuration (only used when vector index is HNSW).
218    #[serde(default)]
219    pub hnsw: HnswConfig,
220}
221
222/// HNSW vector index parameters.
223#[derive(Debug, Clone, Serialize, Deserialize)]
224pub struct HnswConfig {
225    /// Number of bi-directional links per node (default: 24).
226    /// Higher values improve recall at the cost of memory and build time.
227    #[serde(default = "default_hnsw_ef_construction")]
228    pub ef_construction: usize,
229    /// Search parameter: number of candidates to evaluate during search (default: 200).
230    #[serde(default = "default_hnsw_ef_search")]
231    pub ef_search: usize,
232    /// Enable HNSW index (default: true). When false, falls back to brute-force SIMD.
233    #[serde(default = "default_hnsw_enabled")]
234    pub enabled: bool,
235}
236
237impl Default for HnswConfig {
238    fn default() -> Self {
239        Self {
240            ef_construction: default_hnsw_ef_construction(),
241            ef_search: default_hnsw_ef_search(),
242            enabled: default_hnsw_enabled(),
243        }
244    }
245}
246
247fn default_hnsw_ef_construction() -> usize {
248    200
249}
250fn default_hnsw_ef_search() -> usize {
251    200
252}
253fn default_hnsw_enabled() -> bool {
254    true
255}
256
257impl Default for VectorBackendConfig {
258    fn default() -> Self {
259        Self {
260            backend: default_backend(),
261            data_dir: default_data_dir(),
262            max_db_size: default_max_db_size(),
263            url: None,
264            hnsw: HnswConfig::default(),
265        }
266    }
267}
268
269/// Graph backend settings.
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct GraphBackendConfig {
272    /// Backend type: "helix" (default), "falkordb", "custom".
273    #[serde(default = "default_backend")]
274    pub backend: String,
275    /// Data directory for embedded backends.
276    #[serde(default = "default_data_dir")]
277    pub data_dir: String,
278    /// Maximum database size in bytes.
279    #[serde(default = "default_max_db_size")]
280    pub max_db_size: u64,
281    /// Batch size for bulk operations.
282    #[serde(default = "default_batch_size")]
283    pub batch_size: usize,
284    /// External service URL (for falkordb, etc.).
285    pub url: Option<String>,
286}
287
288impl Default for GraphBackendConfig {
289    fn default() -> Self {
290        Self {
291            backend: default_backend(),
292            data_dir: default_data_dir(),
293            max_db_size: default_max_db_size(),
294            batch_size: default_batch_size(),
295            url: None,
296        }
297    }
298}
299
300fn default_backend() -> String {
301    "helix".to_string()
302}
303fn default_data_dir() -> String {
304    "data".to_string()
305}
306fn default_media_dir() -> String {
307    "data/media".to_string()
308}
309fn default_max_db_size() -> u64 {
310    10 * 1024 * 1024 * 1024 // 10GB
311}
312fn default_batch_size() -> usize {
313    10_000
314}
315
316/// ML model configuration.
317#[derive(Debug, Clone, Serialize, Deserialize)]
318pub struct ModelsConfig {
319    /// Embedding model name (default: "all-MiniLM-L6-v2").
320    #[serde(default = "default_embedding_model")]
321    pub embedding_model: String,
322    /// NER model name (default: "gliner-multi-v2.1").
323    #[serde(default = "default_ner_model")]
324    pub ner_model: String,
325    /// LLM model for relation extraction (default: "Qwen3-4B-GGUF").
326    #[serde(default = "default_llm_model")]
327    pub llm_model: String,
328    /// LLM backend: "candle" or "llama_cpp" (default: "candle").
329    #[serde(default = "default_llm_backend")]
330    pub llm_backend: String,
331    /// CLIP model name for image embedding (default: "clip-vit-base-patch32").
332    #[serde(default = "default_clip_model")]
333    pub clip_model: String,
334    /// Directory for storing model files.
335    #[serde(default = "default_models_dir")]
336    pub models_dir: String,
337    /// Enable document OCR pipeline (default: true).
338    #[serde(default = "default_enable_ocr")]
339    pub enable_ocr: bool,
340    /// Language for Tesseract OCR (default: "eng").
341    #[serde(default = "default_ocr_language")]
342    pub ocr_language: String,
343    /// Path to the tesseract binary (default: "tesseract", relies on PATH).
344    #[serde(default = "default_tesseract_path")]
345    pub tesseract_path: String,
346    /// Fine-tuned relation extraction model name on Fireworks (e.g., "accounts/ucotron/models/re-qwen2-5-7b").
347    /// When set and non-empty, the extraction pipeline will use this model via Fireworks API
348    /// instead of co-occurrence. Falls back to co-occurrence on API errors.
349    #[serde(default)]
350    pub fine_tuned_re_model: String,
351    /// Fireworks inference API endpoint (default: "https://api.fireworks.ai/inference/v1").
352    #[serde(default = "default_fine_tuned_re_endpoint")]
353    pub fine_tuned_re_endpoint: String,
354    /// Name of the environment variable holding the Fireworks API key (default: "FIREWORKS_API_KEY").
355    /// The actual key is read from this env var at runtime — never stored in config files.
356    #[serde(default = "default_fine_tuned_re_api_key_env")]
357    pub fine_tuned_re_api_key_env: String,
358}
359
360impl Default for ModelsConfig {
361    fn default() -> Self {
362        Self {
363            embedding_model: default_embedding_model(),
364            ner_model: default_ner_model(),
365            llm_model: default_llm_model(),
366            llm_backend: default_llm_backend(),
367            clip_model: default_clip_model(),
368            models_dir: default_models_dir(),
369            enable_ocr: default_enable_ocr(),
370            ocr_language: default_ocr_language(),
371            tesseract_path: default_tesseract_path(),
372            fine_tuned_re_model: String::new(),
373            fine_tuned_re_endpoint: default_fine_tuned_re_endpoint(),
374            fine_tuned_re_api_key_env: default_fine_tuned_re_api_key_env(),
375        }
376    }
377}
378
379fn default_embedding_model() -> String {
380    "all-MiniLM-L6-v2".to_string()
381}
382fn default_ner_model() -> String {
383    "gliner-multi-v2.1".to_string()
384}
385fn default_llm_model() -> String {
386    "Qwen3-4B-GGUF".to_string()
387}
388fn default_llm_backend() -> String {
389    "candle".to_string()
390}
391fn default_clip_model() -> String {
392    "clip-vit-base-patch32".to_string()
393}
394fn default_models_dir() -> String {
395    "models".to_string()
396}
397fn default_enable_ocr() -> bool {
398    true
399}
400fn default_ocr_language() -> String {
401    "eng".to_string()
402}
403fn default_tesseract_path() -> String {
404    "tesseract".to_string()
405}
406fn default_fine_tuned_re_endpoint() -> String {
407    "https://api.fireworks.ai/inference/v1".to_string()
408}
409fn default_fine_tuned_re_api_key_env() -> String {
410    "FIREWORKS_API_KEY".to_string()
411}
412
413/// Background consolidation worker configuration.
414#[derive(Debug, Clone, Serialize, Deserialize)]
415pub struct ConsolidationConfig {
416    /// Number of messages between consolidation runs (default: 100).
417    #[serde(default = "default_trigger_interval")]
418    pub trigger_interval: usize,
419    /// Enable memory decay for old nodes (default: true).
420    #[serde(default = "default_enable_decay")]
421    pub enable_decay: bool,
422    /// Decay half-life in seconds (default: 30 days).
423    #[serde(default = "default_decay_halflife")]
424    pub decay_halflife_secs: u64,
425}
426
427impl Default for ConsolidationConfig {
428    fn default() -> Self {
429        Self {
430            trigger_interval: default_trigger_interval(),
431            enable_decay: default_enable_decay(),
432            decay_halflife_secs: default_decay_halflife(),
433        }
434    }
435}
436
437fn default_trigger_interval() -> usize {
438    100
439}
440fn default_enable_decay() -> bool {
441    true
442}
443fn default_decay_halflife() -> u64 {
444    30 * 24 * 3600 // 30 days
445}
446
447/// MCP (Model Context Protocol) server configuration.
448#[derive(Debug, Clone, Serialize, Deserialize)]
449pub struct McpConfig {
450    /// Enable MCP server (default: true).
451    #[serde(default = "default_mcp_enabled")]
452    pub enabled: bool,
453    /// Transport mode: "stdio" or "sse" (default: "stdio").
454    #[serde(default = "default_mcp_transport")]
455    pub transport: String,
456    /// SSE port (only used when transport = "sse", default: 8421).
457    #[serde(default = "default_mcp_port")]
458    pub port: u16,
459}
460
461impl Default for McpConfig {
462    fn default() -> Self {
463        Self {
464            enabled: default_mcp_enabled(),
465            transport: default_mcp_transport(),
466            port: default_mcp_port(),
467        }
468    }
469}
470
471fn default_mcp_enabled() -> bool {
472    true
473}
474fn default_mcp_transport() -> String {
475    "stdio".to_string()
476}
477fn default_mcp_port() -> u16 {
478    8421
479}
480
481/// Multi-tenancy namespace configuration.
482///
483/// Namespaces isolate memory data between different tenants,
484/// projects, users, agents, or threads.
485#[derive(Debug, Clone, Serialize, Deserialize)]
486pub struct NamespacesConfig {
487    /// Default namespace when no `X-Ucotron-Namespace` header is provided.
488    #[serde(default = "default_namespace")]
489    pub default_namespace: String,
490    /// If non-empty, only these namespaces are allowed.
491    /// Empty means any namespace is allowed.
492    #[serde(default)]
493    pub allowed_namespaces: Vec<String>,
494    /// Maximum number of namespaces allowed (0 = unlimited).
495    #[serde(default)]
496    pub max_namespaces: usize,
497}
498
499impl Default for NamespacesConfig {
500    fn default() -> Self {
501        Self {
502            default_namespace: default_namespace(),
503            allowed_namespaces: Vec::new(),
504            max_namespaces: 0,
505        }
506    }
507}
508
509fn default_namespace() -> String {
510    "default".to_string()
511}
512
513/// Authentication configuration.
514///
515/// Optional authentication for the REST API and MCP server.
516/// When `enabled` is false (default), all requests are accepted.
517///
518/// RBAC roles (ordered by privilege):
519/// - `admin`: full access including API key management and admin endpoints
520/// - `writer`: read + write (ingest, learn, update, delete, GDPR)
521/// - `reader`: read-only (search, augment, get, list, export)
522/// - `viewer`: health + metrics only
523#[derive(Debug, Clone, Default, Serialize, Deserialize)]
524pub struct AuthConfig {
525    /// Enable authentication (default: false).
526    #[serde(default)]
527    pub enabled: bool,
528    /// Legacy single API key for simple auth (checked via `Authorization: Bearer <key>` header).
529    /// Set via TOML or `UCOTRON_AUTH_API_KEY` env var.
530    /// When used alone (without `api_keys`), grants `admin` role.
531    #[serde(default)]
532    pub api_key: Option<String>,
533    /// JWT secret for token-based auth (future use).
534    #[serde(default)]
535    pub jwt_secret: Option<String>,
536    /// JWT issuer (future use).
537    #[serde(default)]
538    pub jwt_issuer: Option<String>,
539    /// Named API keys with role-based access control.
540    /// Each key has a role and optional namespace scope.
541    #[serde(default)]
542    pub api_keys: Vec<ApiKeyEntry>,
543}
544
545/// A named API key with role and optional namespace scope.
546#[derive(Debug, Clone, Serialize, Deserialize)]
547pub struct ApiKeyEntry {
548    /// Human-readable name for this key (e.g., "backend-service", "analytics-reader").
549    pub name: String,
550    /// The secret key value (checked via `Authorization: Bearer <key>` header).
551    pub key: String,
552    /// Role assigned to this key: "admin", "writer", "reader", or "viewer".
553    #[serde(default = "default_api_key_role")]
554    pub role: String,
555    /// Optional namespace scope. If set, this key can only access the specified namespace.
556    /// If empty/unset, the key can access all namespaces.
557    #[serde(default)]
558    pub namespace: Option<String>,
559    /// Whether this key is active. Set to false to revoke without deleting.
560    #[serde(default = "default_true")]
561    pub active: bool,
562}
563
564fn default_api_key_role() -> String {
565    "reader".to_string()
566}
567
568fn default_true() -> bool {
569    true
570}
571
572/// RBAC role with ordered privilege levels.
573#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
574pub enum AuthRole {
575    Viewer = 0,
576    Reader = 1,
577    Writer = 2,
578    Admin = 3,
579}
580
581impl AuthRole {
582    /// Parse a role string into an AuthRole.
583    pub fn parse_role(s: &str) -> Option<Self> {
584        match s {
585            "admin" => Some(AuthRole::Admin),
586            "writer" => Some(AuthRole::Writer),
587            "reader" => Some(AuthRole::Reader),
588            "viewer" => Some(AuthRole::Viewer),
589            _ => None,
590        }
591    }
592
593    /// Whether this role has at least the given privilege level.
594    pub fn has_privilege(&self, required: AuthRole) -> bool {
595        (*self as u8) >= (required as u8)
596    }
597
598    pub fn as_str(&self) -> &'static str {
599        match self {
600            AuthRole::Admin => "admin",
601            AuthRole::Writer => "writer",
602            AuthRole::Reader => "reader",
603            AuthRole::Viewer => "viewer",
604        }
605    }
606}
607
608impl AuthConfig {
609    /// Look up an API key and return its role and namespace scope.
610    /// Checks named `api_keys` first, then falls back to legacy `api_key` (admin role).
611    pub fn authenticate(&self, bearer_token: &str) -> Option<(AuthRole, Option<String>)> {
612        // Check named API keys first.
613        for entry in &self.api_keys {
614            if entry.active && entry.key == bearer_token {
615                if let Some(role) = AuthRole::parse_role(&entry.role) {
616                    return Some((role, entry.namespace.clone()));
617                }
618            }
619        }
620        // Fall back to legacy single API key (grants admin).
621        if let Some(ref legacy_key) = self.api_key {
622            if legacy_key == bearer_token {
623                return Some((AuthRole::Admin, None));
624            }
625        }
626        None
627    }
628}
629
630/// Multi-instance configuration.
631///
632/// Controls how this server instance participates in a multi-instance deployment.
633/// In single-instance mode (default), all settings can be left at defaults.
634///
635/// For multi-instance deployments:
636/// - Each instance needs a unique `instance_id`
637/// - `role` determines whether this instance can write (`writer`), only read (`reader`), or auto-detect
638/// - `id_range_start` and `id_range_size` partition the node ID space to avoid collisions
639#[derive(Debug, Clone, Serialize, Deserialize)]
640pub struct InstanceConfig {
641    /// Unique identifier for this server instance.
642    /// Auto-generated from hostname + PID if not set.
643    #[serde(default = "default_instance_id")]
644    pub instance_id: String,
645    /// Instance role: "auto" (default), "writer", or "reader".
646    /// - "auto": single-instance mode, acts as both reader and writer
647    /// - "writer": can perform writes (ingestion, learn, update, delete)
648    /// - "reader": read-only (search, augment, get operations only)
649    #[serde(default = "default_instance_role")]
650    pub role: String,
651    /// Starting node ID for this instance's ID allocation range.
652    /// Each instance should have a non-overlapping range to avoid ID collisions.
653    /// Default: 1_000_000 (same as single-instance).
654    #[serde(default = "default_id_range_start")]
655    pub id_range_start: u64,
656    /// Size of this instance's node ID allocation range.
657    /// Default: 1_000_000_000 (1 billion IDs per instance).
658    #[serde(default = "default_id_range_size")]
659    pub id_range_size: u64,
660}
661
662impl Default for InstanceConfig {
663    fn default() -> Self {
664        Self {
665            instance_id: default_instance_id(),
666            role: default_instance_role(),
667            id_range_start: default_id_range_start(),
668            id_range_size: default_id_range_size(),
669        }
670    }
671}
672
673fn default_instance_id() -> String {
674    "auto".to_string()
675}
676fn default_instance_role() -> String {
677    "auto".to_string()
678}
679fn default_id_range_start() -> u64 {
680    1_000_000
681}
682fn default_id_range_size() -> u64 {
683    1_000_000_000
684}
685
686impl InstanceConfig {
687    /// Resolve the instance_id. If set to "auto", generate from hostname + PID.
688    pub fn resolved_instance_id(&self) -> String {
689        if self.instance_id == "auto" {
690            let hostname = hostname::get()
691                .ok()
692                .and_then(|h| h.into_string().ok())
693                .unwrap_or_else(|| "unknown".to_string());
694            let pid = std::process::id();
695            format!("{}-{}", hostname, pid)
696        } else {
697            self.instance_id.clone()
698        }
699    }
700
701    /// Whether this instance can perform write operations.
702    pub fn can_write(&self) -> bool {
703        matches!(self.role.as_str(), "auto" | "writer")
704    }
705
706    /// Whether this instance is a dedicated reader (no writes).
707    pub fn is_reader_only(&self) -> bool {
708        self.role == "reader"
709    }
710}
711
712/// GDPR compliance configuration.
713///
714/// Controls data retention policies and right-to-be-forgotten behavior.
715#[derive(Debug, Clone, Serialize, Deserialize)]
716pub struct GdprConfig {
717    /// Enable GDPR endpoints (default: true).
718    #[serde(default = "default_gdpr_enabled")]
719    pub enabled: bool,
720    /// Default data retention TTL in seconds (0 = no automatic expiry).
721    #[serde(default)]
722    pub default_retention_ttl_secs: u64,
723    /// Per-namespace retention policies: list of {namespace, ttl_secs}.
724    #[serde(default)]
725    pub retention_policies: Vec<GdprRetentionPolicyConfig>,
726}
727
728/// A single retention policy entry in configuration.
729#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct GdprRetentionPolicyConfig {
731    /// Namespace this policy applies to ("*" = all namespaces).
732    pub namespace: String,
733    /// Time-to-live in seconds (0 = no expiry).
734    pub ttl_secs: u64,
735}
736
737impl Default for GdprConfig {
738    fn default() -> Self {
739        Self {
740            enabled: default_gdpr_enabled(),
741            default_retention_ttl_secs: 0,
742            retention_policies: Vec::new(),
743        }
744    }
745}
746
747fn default_gdpr_enabled() -> bool {
748    true
749}
750
751/// Audit logging configuration.
752///
753/// Controls the immutable audit trail that records all API operations.
754/// Audit entries are stored in an append-only in-memory log and persisted
755/// as special graph nodes for durability.
756#[derive(Debug, Clone, Serialize, Deserialize)]
757pub struct AuditConfig {
758    /// Enable audit logging (default: true).
759    #[serde(default = "default_audit_enabled")]
760    pub enabled: bool,
761    /// Retention period for audit entries in seconds.
762    /// Entries older than this are eligible for pruning.
763    /// 0 = keep forever. Default: 7776000 (90 days).
764    #[serde(default = "default_audit_retention_secs")]
765    pub retention_secs: u64,
766    /// Maximum number of audit entries kept in memory.
767    /// Oldest entries are evicted when this limit is exceeded.
768    /// Default: 100000.
769    #[serde(default = "default_audit_max_entries")]
770    pub max_entries: usize,
771}
772
773impl Default for AuditConfig {
774    fn default() -> Self {
775        Self {
776            enabled: default_audit_enabled(),
777            retention_secs: default_audit_retention_secs(),
778            max_entries: default_audit_max_entries(),
779        }
780    }
781}
782
783fn default_audit_enabled() -> bool {
784    true
785}
786
787fn default_audit_retention_secs() -> u64 {
788    7_776_000 // 90 days
789}
790
791fn default_audit_max_entries() -> usize {
792    100_000
793}
794
795/// OpenTelemetry configuration.
796///
797/// Controls OTLP trace/metric/log export to an OpenTelemetry collector.
798/// Disabled by default — enable and point to a collector (e.g., Jaeger, Grafana Tempo).
799#[derive(Debug, Clone, Serialize, Deserialize)]
800pub struct TelemetryConfig {
801    /// Enable OTLP telemetry export (default: false).
802    #[serde(default)]
803    pub enabled: bool,
804    /// OTLP gRPC collector endpoint (default: "http://localhost:4317").
805    #[serde(default = "default_telemetry_otlp_endpoint")]
806    pub otlp_endpoint: String,
807    /// Service name reported in OTLP traces (default: "ucotron").
808    #[serde(default = "default_telemetry_service_name")]
809    pub service_name: String,
810    /// Trace sampling ratio, 0.0 to 1.0 (default: 1.0 = sample everything).
811    #[serde(default = "default_telemetry_sample_rate")]
812    pub sample_rate: f64,
813    /// Export traces via OTLP (default: true).
814    #[serde(default = "default_true")]
815    pub export_traces: bool,
816    /// Export metrics via OTLP (default: true).
817    #[serde(default = "default_true")]
818    pub export_metrics: bool,
819    /// Export logs via OTLP (default: false).
820    #[serde(default)]
821    pub export_logs: bool,
822}
823
824impl Default for TelemetryConfig {
825    fn default() -> Self {
826        Self {
827            enabled: false,
828            otlp_endpoint: default_telemetry_otlp_endpoint(),
829            service_name: default_telemetry_service_name(),
830            sample_rate: default_telemetry_sample_rate(),
831            export_traces: true,
832            export_metrics: true,
833            export_logs: false,
834        }
835    }
836}
837
838fn default_telemetry_otlp_endpoint() -> String {
839    "http://localhost:4317".to_string()
840}
841fn default_telemetry_service_name() -> String {
842    "ucotron".to_string()
843}
844fn default_telemetry_sample_rate() -> f64 {
845    1.0
846}
847
848/// Mindset auto-detection configuration.
849///
850/// Controls automatic detection of cognitive mindset (Convergent, Divergent,
851/// Algorithmic) from query keywords. When enabled and no explicit mindset is
852/// provided in the search request, the system scans for keyword patterns.
853///
854/// ```toml
855/// [mindset]
856/// enabled = true
857/// algorithmic_keywords = ["verify", "confirm", "check", "validate", "prove", "correct"]
858/// divergent_keywords = ["what if", "explore", "brainstorm", "alternative", "imagine", "creative"]
859/// convergent_keywords = ["summarize", "consensus", "agree", "common", "overview", "conclude"]
860/// spatial_keywords = ["connected", "path", "route", "bridge", "relationship", "link", "network", "graph"]
861/// ```
862#[derive(Debug, Clone, Serialize, Deserialize)]
863pub struct MindsetDetectorConfig {
864    /// Enable automatic mindset detection from query keywords (default: true).
865    #[serde(default = "default_true")]
866    pub enabled: bool,
867    /// Keywords that trigger Algorithmic mindset (verification, logical checking).
868    #[serde(default = "default_algorithmic_keywords")]
869    pub algorithmic_keywords: Vec<String>,
870    /// Keywords that trigger Divergent mindset (exploration, brainstorming).
871    #[serde(default = "default_divergent_keywords")]
872    pub divergent_keywords: Vec<String>,
873    /// Keywords that trigger Convergent mindset (synthesis, consensus).
874    #[serde(default = "default_convergent_keywords")]
875    pub convergent_keywords: Vec<String>,
876    /// Keywords that trigger Spatial mindset (graph traversal, path-based reasoning).
877    #[serde(default = "default_spatial_keywords")]
878    pub spatial_keywords: Vec<String>,
879}
880
881impl Default for MindsetDetectorConfig {
882    fn default() -> Self {
883        Self {
884            enabled: true,
885            algorithmic_keywords: default_algorithmic_keywords(),
886            divergent_keywords: default_divergent_keywords(),
887            convergent_keywords: default_convergent_keywords(),
888            spatial_keywords: default_spatial_keywords(),
889        }
890    }
891}
892
893fn default_algorithmic_keywords() -> Vec<String> {
894    vec![
895        "verify".into(),
896        "confirm".into(),
897        "check".into(),
898        "validate".into(),
899        "prove".into(),
900        "correct".into(),
901    ]
902}
903
904fn default_divergent_keywords() -> Vec<String> {
905    vec![
906        "what if".into(),
907        "explore".into(),
908        "brainstorm".into(),
909        "alternative".into(),
910        "imagine".into(),
911        "creative".into(),
912    ]
913}
914
915fn default_convergent_keywords() -> Vec<String> {
916    vec![
917        "summarize".into(),
918        "consensus".into(),
919        "agree".into(),
920        "common".into(),
921        "overview".into(),
922        "conclude".into(),
923    ]
924}
925
926fn default_spatial_keywords() -> Vec<String> {
927    vec![
928        "connected".into(),
929        "path".into(),
930        "route".into(),
931        "bridge".into(),
932        "relationship".into(),
933        "link".into(),
934        "network".into(),
935        "graph".into(),
936    ]
937}
938
939/// Connector scheduling configuration.
940///
941/// Controls cron-based periodic sync for external data source connectors
942/// (Slack, GitHub, Notion, etc.). Individual connector schedules are
943/// configured as entries in the `[[connectors.schedules]]` array.
944///
945/// ```toml
946/// [connectors]
947/// enabled = true
948/// check_interval_secs = 60
949///
950/// [[connectors.schedules]]
951/// connector_id = "my-slack"
952/// cron_expression = "0 */6 * * * *"
953/// timeout_secs = 300
954/// max_retries = 3
955/// ```
956#[derive(Debug, Clone, Serialize, Deserialize)]
957pub struct ConnectorsConfig {
958    /// Enable the connector scheduler (default: false).
959    #[serde(default)]
960    pub enabled: bool,
961    /// How often the scheduler checks for due cron jobs (seconds, default: 60).
962    #[serde(default = "default_connector_check_interval")]
963    pub check_interval_secs: u64,
964    /// Connector schedule entries.
965    #[serde(default)]
966    pub schedules: Vec<ConnectorScheduleEntry>,
967}
968
969impl Default for ConnectorsConfig {
970    fn default() -> Self {
971        Self {
972            enabled: false,
973            check_interval_secs: default_connector_check_interval(),
974            schedules: Vec::new(),
975        }
976    }
977}
978
979/// A single connector schedule entry in the configuration file.
980#[derive(Debug, Clone, Serialize, Deserialize)]
981pub struct ConnectorScheduleEntry {
982    /// Connector instance ID (must match a registered connector).
983    pub connector_id: String,
984    /// Cron expression for periodic sync (e.g., "0 */6 * * * *").
985    /// Uses 6-field format: sec min hour day month weekday.
986    pub cron_expression: Option<String>,
987    /// Whether this schedule is active (default: true).
988    #[serde(default = "default_true")]
989    pub enabled: bool,
990    /// Timeout for a single sync operation in seconds (default: 300).
991    #[serde(default = "default_connector_timeout")]
992    pub timeout_secs: u64,
993    /// Number of retries on sync failure (default: 3).
994    #[serde(default = "default_connector_retries")]
995    pub max_retries: u32,
996}
997
998fn default_connector_check_interval() -> u64 {
999    60
1000}
1001
1002fn default_connector_timeout() -> u64 {
1003    300
1004}
1005
1006fn default_connector_retries() -> u32 {
1007    3
1008}
1009
1010impl UcotronConfig {
1011    /// Load configuration from a TOML file, then apply environment variable overrides.
1012    pub fn from_file(path: &str) -> anyhow::Result<Self> {
1013        let contents = std::fs::read_to_string(path)
1014            .map_err(|e| anyhow::anyhow!("Failed to read config file '{}': {}", path, e))?;
1015        Self::parse_toml(&contents)
1016    }
1017
1018    /// Parse configuration from a TOML string, apply env overrides, then validate.
1019    pub fn parse_toml(toml_str: &str) -> anyhow::Result<Self> {
1020        let mut config: UcotronConfig = toml::from_str(toml_str)
1021            .map_err(|e| anyhow::anyhow!("Failed to parse TOML config: {}", e))?;
1022        config.apply_env_overrides();
1023        config.validate()?;
1024        Ok(config)
1025    }
1026
1027    /// Apply environment variable overrides to the configuration.
1028    ///
1029    /// Variables use the `UCOTRON_` prefix with `_` as section separator:
1030    /// - `UCOTRON_SERVER_HOST` → `server.host`
1031    /// - `UCOTRON_SERVER_PORT` → `server.port`
1032    /// - `UCOTRON_SERVER_WORKERS` → `server.workers`
1033    /// - `UCOTRON_SERVER_LOG_LEVEL` → `server.log_level`
1034    /// - `UCOTRON_SERVER_LOG_FORMAT` → `server.log_format`
1035    /// - `UCOTRON_STORAGE_MODE` → `storage.mode`
1036    /// - `UCOTRON_STORAGE_VECTOR_BACKEND` → `storage.vector.backend`
1037    /// - `UCOTRON_STORAGE_VECTOR_DATA_DIR` → `storage.vector.data_dir`
1038    /// - `UCOTRON_STORAGE_GRAPH_BACKEND` → `storage.graph.backend`
1039    /// - `UCOTRON_STORAGE_GRAPH_DATA_DIR` → `storage.graph.data_dir`
1040    /// - `UCOTRON_STORAGE_GRAPH_BATCH_SIZE` → `storage.graph.batch_size`
1041    /// - `UCOTRON_MODELS_EMBEDDING_MODEL` → `models.embedding_model`
1042    /// - `UCOTRON_MODELS_NER_MODEL` → `models.ner_model`
1043    /// - `UCOTRON_MODELS_LLM_MODEL` → `models.llm_model`
1044    /// - `UCOTRON_MODELS_LLM_BACKEND` → `models.llm_backend`
1045    /// - `UCOTRON_MODELS_DIR` → `models.models_dir`
1046    /// - `UCOTRON_CONSOLIDATION_TRIGGER_INTERVAL` → `consolidation.trigger_interval`
1047    /// - `UCOTRON_CONSOLIDATION_ENABLE_DECAY` → `consolidation.enable_decay`
1048    /// - `UCOTRON_CONSOLIDATION_DECAY_HALFLIFE_SECS` → `consolidation.decay_halflife_secs`
1049    /// - `UCOTRON_MCP_ENABLED` → `mcp.enabled`
1050    /// - `UCOTRON_MCP_TRANSPORT` → `mcp.transport`
1051    /// - `UCOTRON_MCP_PORT` → `mcp.port`
1052    /// - `UCOTRON_NAMESPACES_DEFAULT` → `namespaces.default_namespace`
1053    /// - `UCOTRON_AUTH_ENABLED` → `auth.enabled`
1054    /// - `UCOTRON_AUTH_API_KEY` → `auth.api_key`
1055    /// - `UCOTRON_AUTH_JWT_SECRET` → `auth.jwt_secret`
1056    /// - `UCOTRON_TELEMETRY_ENABLED` → `telemetry.enabled`
1057    /// - `UCOTRON_TELEMETRY_OTLP_ENDPOINT` → `telemetry.otlp_endpoint`
1058    /// - `UCOTRON_TELEMETRY_SERVICE_NAME` → `telemetry.service_name`
1059    /// - `UCOTRON_TELEMETRY_SAMPLE_RATE` → `telemetry.sample_rate`
1060    pub fn apply_env_overrides(&mut self) {
1061        // Server overrides
1062        if let Ok(v) = std::env::var("UCOTRON_SERVER_HOST") {
1063            self.server.host = v;
1064        }
1065        if let Ok(v) = std::env::var("UCOTRON_SERVER_PORT") {
1066            if let Ok(port) = v.parse::<u16>() {
1067                self.server.port = port;
1068            }
1069        }
1070        if let Ok(v) = std::env::var("UCOTRON_SERVER_WORKERS") {
1071            if let Ok(w) = v.parse::<usize>() {
1072                self.server.workers = w;
1073            }
1074        }
1075        if let Ok(v) = std::env::var("UCOTRON_SERVER_LOG_LEVEL") {
1076            self.server.log_level = v;
1077        }
1078        if let Ok(v) = std::env::var("UCOTRON_SERVER_LOG_FORMAT") {
1079            self.server.log_format = v;
1080        }
1081
1082        // Storage overrides
1083        if let Ok(v) = std::env::var("UCOTRON_STORAGE_MODE") {
1084            self.storage.mode = v;
1085        }
1086        if let Ok(v) = std::env::var("UCOTRON_STORAGE_SHARED_DATA_DIR") {
1087            self.storage.shared_data_dir = Some(v);
1088        }
1089        if let Ok(v) = std::env::var("UCOTRON_STORAGE_MEDIA_DIR") {
1090            self.storage.media_dir = v;
1091        }
1092        if let Ok(v) = std::env::var("UCOTRON_STORAGE_VECTOR_BACKEND") {
1093            self.storage.vector.backend = v;
1094        }
1095        if let Ok(v) = std::env::var("UCOTRON_STORAGE_VECTOR_DATA_DIR") {
1096            self.storage.vector.data_dir = v;
1097        }
1098        if let Ok(v) = std::env::var("UCOTRON_STORAGE_GRAPH_BACKEND") {
1099            self.storage.graph.backend = v;
1100        }
1101        if let Ok(v) = std::env::var("UCOTRON_STORAGE_GRAPH_DATA_DIR") {
1102            self.storage.graph.data_dir = v;
1103        }
1104        if let Ok(v) = std::env::var("UCOTRON_STORAGE_GRAPH_BATCH_SIZE") {
1105            if let Ok(bs) = v.parse::<usize>() {
1106                self.storage.graph.batch_size = bs;
1107            }
1108        }
1109
1110        // Models overrides
1111        if let Ok(v) = std::env::var("UCOTRON_MODELS_EMBEDDING_MODEL") {
1112            self.models.embedding_model = v;
1113        }
1114        if let Ok(v) = std::env::var("UCOTRON_MODELS_NER_MODEL") {
1115            self.models.ner_model = v;
1116        }
1117        if let Ok(v) = std::env::var("UCOTRON_MODELS_LLM_MODEL") {
1118            self.models.llm_model = v;
1119        }
1120        if let Ok(v) = std::env::var("UCOTRON_MODELS_LLM_BACKEND") {
1121            self.models.llm_backend = v;
1122        }
1123        if let Ok(v) = std::env::var("UCOTRON_MODELS_DIR") {
1124            self.models.models_dir = v;
1125        }
1126        if let Ok(v) = std::env::var("UCOTRON_MODELS_ENABLE_OCR") {
1127            if let Ok(b) = v.parse::<bool>() {
1128                self.models.enable_ocr = b;
1129            }
1130        }
1131        if let Ok(v) = std::env::var("UCOTRON_MODELS_OCR_LANGUAGE") {
1132            self.models.ocr_language = v;
1133        }
1134        if let Ok(v) = std::env::var("UCOTRON_MODELS_TESSERACT_PATH") {
1135            self.models.tesseract_path = v;
1136        }
1137        if let Ok(v) = std::env::var("UCOTRON_MODELS_FINE_TUNED_RE_MODEL") {
1138            self.models.fine_tuned_re_model = v;
1139        }
1140        if let Ok(v) = std::env::var("UCOTRON_MODELS_FINE_TUNED_RE_ENDPOINT") {
1141            self.models.fine_tuned_re_endpoint = v;
1142        }
1143        if let Ok(v) = std::env::var("UCOTRON_MODELS_FINE_TUNED_RE_API_KEY_ENV") {
1144            self.models.fine_tuned_re_api_key_env = v;
1145        }
1146
1147        // Consolidation overrides
1148        if let Ok(v) = std::env::var("UCOTRON_CONSOLIDATION_TRIGGER_INTERVAL") {
1149            if let Ok(ti) = v.parse::<usize>() {
1150                self.consolidation.trigger_interval = ti;
1151            }
1152        }
1153        if let Ok(v) = std::env::var("UCOTRON_CONSOLIDATION_ENABLE_DECAY") {
1154            if let Ok(b) = v.parse::<bool>() {
1155                self.consolidation.enable_decay = b;
1156            }
1157        }
1158        if let Ok(v) = std::env::var("UCOTRON_CONSOLIDATION_DECAY_HALFLIFE_SECS") {
1159            if let Ok(s) = v.parse::<u64>() {
1160                self.consolidation.decay_halflife_secs = s;
1161            }
1162        }
1163
1164        // MCP overrides
1165        if let Ok(v) = std::env::var("UCOTRON_MCP_ENABLED") {
1166            if let Ok(b) = v.parse::<bool>() {
1167                self.mcp.enabled = b;
1168            }
1169        }
1170        if let Ok(v) = std::env::var("UCOTRON_MCP_TRANSPORT") {
1171            self.mcp.transport = v;
1172        }
1173        if let Ok(v) = std::env::var("UCOTRON_MCP_PORT") {
1174            if let Ok(port) = v.parse::<u16>() {
1175                self.mcp.port = port;
1176            }
1177        }
1178
1179        // Namespaces overrides
1180        if let Ok(v) = std::env::var("UCOTRON_NAMESPACES_DEFAULT") {
1181            self.namespaces.default_namespace = v;
1182        }
1183
1184        // Auth overrides
1185        if let Ok(v) = std::env::var("UCOTRON_AUTH_ENABLED") {
1186            if let Ok(b) = v.parse::<bool>() {
1187                self.auth.enabled = b;
1188            }
1189        }
1190        if let Ok(v) = std::env::var("UCOTRON_AUTH_API_KEY") {
1191            self.auth.api_key = Some(v);
1192        }
1193        if let Ok(v) = std::env::var("UCOTRON_AUTH_JWT_SECRET") {
1194            self.auth.jwt_secret = Some(v);
1195        }
1196
1197        // GDPR overrides
1198        if let Ok(v) = std::env::var("UCOTRON_GDPR_ENABLED") {
1199            if let Ok(b) = v.parse::<bool>() {
1200                self.gdpr.enabled = b;
1201            }
1202        }
1203        if let Ok(v) = std::env::var("UCOTRON_GDPR_DEFAULT_RETENTION_TTL_SECS") {
1204            if let Ok(s) = v.parse::<u64>() {
1205                self.gdpr.default_retention_ttl_secs = s;
1206            }
1207        }
1208
1209        // Audit overrides
1210        if let Ok(v) = std::env::var("UCOTRON_AUDIT_ENABLED") {
1211            if let Ok(b) = v.parse::<bool>() {
1212                self.audit.enabled = b;
1213            }
1214        }
1215        if let Ok(v) = std::env::var("UCOTRON_AUDIT_RETENTION_SECS") {
1216            if let Ok(s) = v.parse::<u64>() {
1217                self.audit.retention_secs = s;
1218            }
1219        }
1220        if let Ok(v) = std::env::var("UCOTRON_AUDIT_MAX_ENTRIES") {
1221            if let Ok(n) = v.parse::<usize>() {
1222                self.audit.max_entries = n;
1223            }
1224        }
1225
1226        // Instance overrides
1227        if let Ok(v) = std::env::var("UCOTRON_INSTANCE_ID") {
1228            self.instance.instance_id = v;
1229        }
1230        if let Ok(v) = std::env::var("UCOTRON_INSTANCE_ROLE") {
1231            self.instance.role = v;
1232        }
1233        if let Ok(v) = std::env::var("UCOTRON_INSTANCE_ID_RANGE_START") {
1234            if let Ok(n) = v.parse::<u64>() {
1235                self.instance.id_range_start = n;
1236            }
1237        }
1238        if let Ok(v) = std::env::var("UCOTRON_INSTANCE_ID_RANGE_SIZE") {
1239            if let Ok(n) = v.parse::<u64>() {
1240                self.instance.id_range_size = n;
1241            }
1242        }
1243
1244        // Telemetry overrides
1245        if let Ok(v) = std::env::var("UCOTRON_TELEMETRY_ENABLED") {
1246            if let Ok(b) = v.parse::<bool>() {
1247                self.telemetry.enabled = b;
1248            }
1249        }
1250        if let Ok(v) = std::env::var("UCOTRON_TELEMETRY_OTLP_ENDPOINT") {
1251            self.telemetry.otlp_endpoint = v;
1252        }
1253        if let Ok(v) = std::env::var("UCOTRON_TELEMETRY_SERVICE_NAME") {
1254            self.telemetry.service_name = v;
1255        }
1256        if let Ok(v) = std::env::var("UCOTRON_TELEMETRY_SAMPLE_RATE") {
1257            if let Ok(r) = v.parse::<f64>() {
1258                self.telemetry.sample_rate = r;
1259            }
1260        }
1261        if let Ok(v) = std::env::var("UCOTRON_TELEMETRY_EXPORT_TRACES") {
1262            if let Ok(b) = v.parse::<bool>() {
1263                self.telemetry.export_traces = b;
1264            }
1265        }
1266        if let Ok(v) = std::env::var("UCOTRON_TELEMETRY_EXPORT_METRICS") {
1267            if let Ok(b) = v.parse::<bool>() {
1268                self.telemetry.export_metrics = b;
1269            }
1270        }
1271        if let Ok(v) = std::env::var("UCOTRON_TELEMETRY_EXPORT_LOGS") {
1272            if let Ok(b) = v.parse::<bool>() {
1273                self.telemetry.export_logs = b;
1274            }
1275        }
1276
1277        // Connectors overrides
1278        if let Ok(v) = std::env::var("UCOTRON_CONNECTORS_ENABLED") {
1279            if let Ok(b) = v.parse::<bool>() {
1280                self.connectors.enabled = b;
1281            }
1282        }
1283        if let Ok(v) = std::env::var("UCOTRON_CONNECTORS_CHECK_INTERVAL_SECS") {
1284            if let Ok(n) = v.parse::<u64>() {
1285                self.connectors.check_interval_secs = n;
1286            }
1287        }
1288    }
1289
1290    // --- Telemetry accessors ---
1291
1292    /// Whether OTLP telemetry export is enabled.
1293    pub fn telemetry_enabled(&self) -> bool {
1294        self.telemetry.enabled
1295    }
1296
1297    /// OTLP gRPC collector endpoint.
1298    pub fn telemetry_otlp_endpoint(&self) -> String {
1299        self.telemetry.otlp_endpoint.clone()
1300    }
1301
1302    /// Service name reported in OTLP traces.
1303    pub fn telemetry_service_name(&self) -> String {
1304        self.telemetry.service_name.clone()
1305    }
1306
1307    /// Trace sampling ratio (0.0–1.0).
1308    pub fn telemetry_sample_rate(&self) -> f64 {
1309        self.telemetry.sample_rate
1310    }
1311
1312    /// Validate configuration values with detailed error messages.
1313    pub fn validate(&self) -> anyhow::Result<()> {
1314        // --- Server validation ---
1315        if self.server.port == 0 {
1316            anyhow::bail!(
1317                "server.port must be > 0 (got 0). Set a valid port in ucotron.toml or via UCOTRON_SERVER_PORT env var."
1318            );
1319        }
1320        if self.server.workers == 0 {
1321            anyhow::bail!(
1322                "server.workers must be > 0 (got 0). Set the number of worker threads in ucotron.toml or via UCOTRON_SERVER_WORKERS env var."
1323            );
1324        }
1325        let valid_log_levels = ["trace", "debug", "info", "warn", "error"];
1326        if !valid_log_levels.contains(&self.server.log_level.as_str()) {
1327            anyhow::bail!(
1328                "server.log_level must be one of: {} (got '{}').",
1329                valid_log_levels.join(", "),
1330                self.server.log_level
1331            );
1332        }
1333        let valid_log_formats = ["text", "json"];
1334        if !valid_log_formats.contains(&self.server.log_format.as_str()) {
1335            anyhow::bail!(
1336                "server.log_format must be one of: {} (got '{}').",
1337                valid_log_formats.join(", "),
1338                self.server.log_format
1339            );
1340        }
1341
1342        // --- Storage validation ---
1343        let valid_modes = ["embedded", "external", "shared"];
1344        if !valid_modes.contains(&self.storage.mode.as_str()) {
1345            anyhow::bail!(
1346                "storage.mode must be one of: {} (got '{}').",
1347                valid_modes.join(", "),
1348                self.storage.mode
1349            );
1350        }
1351        let valid_vector_backends = ["helix", "qdrant", "custom"];
1352        if !valid_vector_backends.contains(&self.storage.vector.backend.as_str()) {
1353            anyhow::bail!(
1354                "storage.vector.backend must be one of: {} (got '{}').",
1355                valid_vector_backends.join(", "),
1356                self.storage.vector.backend
1357            );
1358        }
1359        let valid_graph_backends = ["helix", "falkordb", "custom"];
1360        if !valid_graph_backends.contains(&self.storage.graph.backend.as_str()) {
1361            anyhow::bail!(
1362                "storage.graph.backend must be one of: {} (got '{}').",
1363                valid_graph_backends.join(", "),
1364                self.storage.graph.backend
1365            );
1366        }
1367        // Shared mode requires shared_data_dir when using helix backends
1368        if self.storage.mode == "shared"
1369            && self.storage.vector.backend == "helix"
1370            && self.storage.graph.backend == "helix"
1371            && self.storage.shared_data_dir.is_none()
1372        {
1373            anyhow::bail!(
1374                "storage.shared_data_dir is required when storage.mode is 'shared' with helix backends. \
1375                 All instances must point to the same directory. \
1376                 Set via ucotron.toml or UCOTRON_STORAGE_SHARED_DATA_DIR env var."
1377            );
1378        }
1379
1380        // External backends require a URL
1381        if self.storage.mode == "external" || self.storage.mode == "shared" {
1382            if self.storage.vector.backend != "helix" && self.storage.vector.url.is_none() {
1383                anyhow::bail!(
1384                    "storage.vector.url is required when using external vector backend '{}' in '{}' mode.",
1385                    self.storage.vector.backend,
1386                    self.storage.mode
1387                );
1388            }
1389            if self.storage.graph.backend != "helix" && self.storage.graph.url.is_none() {
1390                anyhow::bail!(
1391                    "storage.graph.url is required when using external graph backend '{}' in '{}' mode.",
1392                    self.storage.graph.backend,
1393                    self.storage.mode
1394                );
1395            }
1396        }
1397
1398        // --- HNSW validation ---
1399        if self.storage.vector.hnsw.ef_construction == 0 {
1400            anyhow::bail!("storage.vector.hnsw.ef_construction must be > 0.");
1401        }
1402        if self.storage.vector.hnsw.ef_search == 0 {
1403            anyhow::bail!("storage.vector.hnsw.ef_search must be > 0.");
1404        }
1405
1406        // --- Models validation ---
1407        let valid_llm_backends = ["candle", "llama_cpp"];
1408        if !valid_llm_backends.contains(&self.models.llm_backend.as_str()) {
1409            anyhow::bail!(
1410                "models.llm_backend must be one of: {} (got '{}').",
1411                valid_llm_backends.join(", "),
1412                self.models.llm_backend
1413            );
1414        }
1415        if self.models.embedding_model.is_empty() {
1416            anyhow::bail!("models.embedding_model must not be empty.");
1417        }
1418
1419        // --- MCP validation ---
1420        let valid_transports = ["stdio", "sse"];
1421        if !valid_transports.contains(&self.mcp.transport.as_str()) {
1422            anyhow::bail!(
1423                "mcp.transport must be one of: {} (got '{}').",
1424                valid_transports.join(", "),
1425                self.mcp.transport
1426            );
1427        }
1428        if self.mcp.transport == "sse" && self.mcp.port == 0 {
1429            anyhow::bail!("mcp.port must be > 0 when mcp.transport is 'sse'.");
1430        }
1431        // MCP and server ports must not collide
1432        if self.mcp.enabled && self.mcp.transport == "sse" && self.mcp.port == self.server.port {
1433            anyhow::bail!(
1434                "mcp.port ({}) must differ from server.port ({}) to avoid port collision.",
1435                self.mcp.port,
1436                self.server.port
1437            );
1438        }
1439
1440        // --- Auth validation ---
1441        if self.auth.enabled
1442            && self.auth.api_key.is_none()
1443            && self.auth.jwt_secret.is_none()
1444            && self.auth.api_keys.is_empty()
1445        {
1446            anyhow::bail!(
1447                "auth.enabled is true but no authentication method is configured. \
1448                 Provide auth.api_key, auth.jwt_secret, or at least one [[auth.api_keys]] entry."
1449            );
1450        }
1451        let valid_auth_roles = ["admin", "writer", "reader", "viewer"];
1452        for (i, entry) in self.auth.api_keys.iter().enumerate() {
1453            if entry.name.is_empty() {
1454                anyhow::bail!("auth.api_keys[{}].name must not be empty.", i);
1455            }
1456            if entry.key.is_empty() {
1457                anyhow::bail!(
1458                    "auth.api_keys[{}].key must not be empty (name='{}').",
1459                    i,
1460                    entry.name
1461                );
1462            }
1463            if !valid_auth_roles.contains(&entry.role.as_str()) {
1464                anyhow::bail!(
1465                    "auth.api_keys[{}].role must be one of: {} (got '{}', name='{}').",
1466                    i,
1467                    valid_auth_roles.join(", "),
1468                    entry.role,
1469                    entry.name
1470                );
1471            }
1472        }
1473
1474        // --- Telemetry validation ---
1475        if self.telemetry.sample_rate < 0.0 || self.telemetry.sample_rate > 1.0 {
1476            anyhow::bail!(
1477                "telemetry.sample_rate must be between 0.0 and 1.0 (got {}).",
1478                self.telemetry.sample_rate
1479            );
1480        }
1481        if self.telemetry.enabled && self.telemetry.otlp_endpoint.is_empty() {
1482            anyhow::bail!("telemetry.otlp_endpoint must not be empty when telemetry is enabled.");
1483        }
1484        if self.telemetry.enabled && self.telemetry.service_name.is_empty() {
1485            anyhow::bail!("telemetry.service_name must not be empty when telemetry is enabled.");
1486        }
1487
1488        // --- Namespaces validation ---
1489        if self.namespaces.default_namespace.is_empty() {
1490            anyhow::bail!("namespaces.default_namespace must not be empty.");
1491        }
1492
1493        // --- Instance validation ---
1494        let valid_roles = ["auto", "writer", "reader"];
1495        if !valid_roles.contains(&self.instance.role.as_str()) {
1496            anyhow::bail!(
1497                "instance.role must be one of: {} (got '{}').",
1498                valid_roles.join(", "),
1499                self.instance.role
1500            );
1501        }
1502        if self.instance.id_range_size == 0 {
1503            anyhow::bail!("instance.id_range_size must be > 0.");
1504        }
1505        // Shared mode requires either writer or reader role
1506        if self.storage.mode == "shared" && self.instance.role == "auto" {
1507            anyhow::bail!(
1508                "instance.role must be 'writer' or 'reader' when storage.mode is 'shared'. \
1509                 Set via ucotron.toml or UCOTRON_INSTANCE_ROLE env var."
1510            );
1511        }
1512
1513        // --- Connectors validation ---
1514        if self.connectors.check_interval_secs == 0 {
1515            anyhow::bail!("connectors.check_interval_secs must be > 0.");
1516        }
1517        for (i, entry) in self.connectors.schedules.iter().enumerate() {
1518            if entry.connector_id.is_empty() {
1519                anyhow::bail!(
1520                    "connectors.schedules[{}].connector_id must not be empty.",
1521                    i
1522                );
1523            }
1524            if entry.timeout_secs == 0 {
1525                anyhow::bail!(
1526                    "connectors.schedules[{}].timeout_secs must be > 0 (connector_id='{}').",
1527                    i,
1528                    entry.connector_id
1529                );
1530            }
1531        }
1532
1533        Ok(())
1534    }
1535
1536    /// Generate an example configuration as a TOML string (plain, no comments).
1537    pub fn example_toml() -> String {
1538        let config = UcotronConfig::default();
1539        toml::to_string_pretty(&config)
1540            .unwrap_or_else(|_| "# Failed to generate example".to_string())
1541    }
1542
1543    /// Generate a fully commented example configuration file.
1544    ///
1545    /// This is suitable for `ucotron_server --init-config` output.
1546    pub fn example_toml_commented() -> String {
1547        format!(
1548            r#"# =============================================================================
1549# Ucotron Configuration File
1550# =============================================================================
1551# This file configures the Ucotron cognitive memory server.
1552# All values shown below are defaults — uncomment and modify as needed.
1553#
1554# Environment variables override TOML values. Use the UCOTRON_ prefix:
1555#   UCOTRON_SERVER_PORT=9000 ucotron_server
1556#
1557# For full documentation, see: https://ucotron.com/docs/server/configuration
1558
1559# -----------------------------------------------------------------------------
1560# [server] — HTTP server settings
1561# -----------------------------------------------------------------------------
1562[server]
1563# Bind address for the REST API.
1564host = "0.0.0.0"
1565# HTTP port for the REST API.
1566port = 8420
1567# Number of worker threads for request handling.
1568workers = 4
1569# Log level: trace, debug, info, warn, error
1570log_level = "info"
1571# Log format: "text" (human-readable) or "json" (structured with trace IDs)
1572log_format = "text"
1573
1574# -----------------------------------------------------------------------------
1575# [storage] — Backend storage configuration
1576# -----------------------------------------------------------------------------
1577[storage]
1578# Storage mode:
1579#   "embedded" — All data stored locally in LMDB (default, single-instance)
1580#   "external" — Use external backends (Qdrant, FalkorDB, etc.)
1581#   "shared"   — Multiple server instances sharing the same storage directory
1582mode = "embedded"
1583# Shared data directory for multi-instance mode (required when mode = "shared").
1584# All instances must point to the same directory (e.g., NFS mount, shared volume).
1585# shared_data_dir = "/data/ucotron-shared"
1586
1587# Vector backend configuration.
1588[storage.vector]
1589# Backend type: "helix" (embedded LMDB+HNSW), "qdrant" (external), "custom"
1590backend = "helix"
1591# Data directory for embedded backends (relative to working dir).
1592data_dir = "data"
1593# Maximum database size in bytes (10 GB default, for LMDB map_size).
1594max_db_size = {max_db_size}
1595# URL for external vector backend (required when backend != "helix").
1596# url = "http://localhost:6333"
1597
1598# HNSW vector index parameters.
1599[storage.vector.hnsw]
1600# Number of bi-directional links per node during index construction.
1601# Higher = better recall, more memory, slower build.
1602ef_construction = 200
1603# Number of candidates evaluated during search.
1604# Higher = better recall, slower search.
1605ef_search = 200
1606# Enable HNSW index. When false, falls back to brute-force SIMD search.
1607enabled = true
1608
1609# Graph backend configuration.
1610[storage.graph]
1611# Backend type: "helix" (embedded LMDB), "falkordb" (external), "custom"
1612backend = "helix"
1613# Data directory for embedded backends.
1614data_dir = "data"
1615# Maximum database size in bytes (10 GB default).
1616max_db_size = {max_db_size}
1617# Batch size for bulk operations (node/edge inserts).
1618batch_size = 10000
1619# URL for external graph backend (required when backend != "helix").
1620# url = "redis://localhost:6379"
1621
1622# -----------------------------------------------------------------------------
1623# [models] — ML model configuration
1624# -----------------------------------------------------------------------------
1625[models]
1626# Sentence embedding model (ONNX format, 384-dim output).
1627embedding_model = "all-MiniLM-L6-v2"
1628# Named Entity Recognition model (GLiNER, ONNX format).
1629ner_model = "gliner-multi-v2.1"
1630# LLM model for relation extraction (GGUF quantized).
1631# Set to "none" or "" to use co-occurrence fallback (no LLM).
1632llm_model = "Qwen3-4B-GGUF"
1633# LLM backend: "candle" (Rust native) or "llama_cpp" (C++ bindings).
1634llm_backend = "candle"
1635# Directory containing model files (downloaded via scripts/download_models.sh).
1636models_dir = "models"
1637# Enable document OCR pipeline (PDF text extraction + Tesseract image OCR).
1638enable_ocr = true
1639# Language for Tesseract OCR (e.g., "eng", "spa", "deu", "eng+spa").
1640ocr_language = "eng"
1641# Path to the tesseract binary. Set to full path if not on PATH.
1642tesseract_path = "tesseract"
1643
1644# -----------------------------------------------------------------------------
1645# [consolidation] — Background "dreaming" worker
1646# -----------------------------------------------------------------------------
1647[consolidation]
1648# Number of ingested messages between consolidation runs.
1649trigger_interval = 100
1650# Enable temporal memory decay for old, unaccessed nodes.
1651enable_decay = true
1652# Decay half-life in seconds (default: 30 days = 2592000).
1653decay_halflife_secs = 2592000
1654
1655# -----------------------------------------------------------------------------
1656# [telemetry] — OpenTelemetry observability
1657# -----------------------------------------------------------------------------
1658[telemetry]
1659# Enable OTLP telemetry export (traces, metrics, logs).
1660# Requires a running OpenTelemetry collector (e.g., Jaeger, Grafana Tempo).
1661enabled = false
1662# OTLP gRPC collector endpoint.
1663otlp_endpoint = "http://localhost:4317"
1664# Service name reported in traces and metrics.
1665service_name = "ucotron"
1666# Trace sampling ratio: 0.0 (no traces) to 1.0 (all traces).
1667sample_rate = 1.0
1668# Export traces via OTLP.
1669export_traces = true
1670# Export metrics via OTLP.
1671export_metrics = true
1672# Export logs via OTLP (may be verbose, disabled by default).
1673export_logs = false
1674
1675# -----------------------------------------------------------------------------
1676# [mcp] — Model Context Protocol server
1677# -----------------------------------------------------------------------------
1678[mcp]
1679# Enable MCP server for Claude Desktop, Cursor, etc.
1680enabled = true
1681# Transport mode: "stdio" (default, for CLI tools) or "sse" (HTTP streaming).
1682transport = "stdio"
1683# Port for SSE transport (only used when transport = "sse").
1684port = 8421
1685
1686# -----------------------------------------------------------------------------
1687# [namespaces] — Multi-tenancy configuration
1688# -----------------------------------------------------------------------------
1689[namespaces]
1690# Default namespace when no X-Ucotron-Namespace header is provided.
1691default_namespace = "default"
1692# Restrict to specific namespaces (empty = allow any).
1693# allowed_namespaces = ["org1", "org2"]
1694# Maximum number of namespaces (0 = unlimited).
1695max_namespaces = 0
1696
1697# -----------------------------------------------------------------------------
1698# [auth] — Authentication (optional)
1699# -----------------------------------------------------------------------------
1700[auth]
1701# Enable authentication. When false, all requests are accepted.
1702enabled = false
1703# API key for Bearer token auth. Set via UCOTRON_AUTH_API_KEY env var.
1704# api_key = "your-secret-api-key"
1705# JWT secret for token-based auth (future use).
1706# jwt_secret = "your-jwt-secret"
1707# JWT issuer (future use).
1708# jwt_issuer = "ucotron"
1709
1710# -----------------------------------------------------------------------------
1711# [audit] — Immutable audit logging
1712# -----------------------------------------------------------------------------
1713[audit]
1714# Enable audit logging for all API operations.
1715enabled = true
1716# Retention period in seconds (0 = keep forever). Default: 90 days.
1717retention_secs = 7776000
1718# Maximum entries kept in memory. Oldest entries evicted when exceeded.
1719max_entries = 100000
1720
1721# -----------------------------------------------------------------------------
1722# [instance] — Multi-instance configuration
1723# -----------------------------------------------------------------------------
1724[instance]
1725# Unique identifier for this server instance.
1726# Set to "auto" to generate from hostname + PID.
1727instance_id = "auto"
1728# Instance role:
1729#   "auto"   — Single-instance mode (default), acts as both reader and writer
1730#   "writer" — Can perform writes (ingestion, learn, update, delete)
1731#   "reader" — Read-only (search, augment, get operations only)
1732role = "auto"
1733# Starting node ID for this instance's ID allocation range.
1734# Each instance in a multi-instance deployment needs a non-overlapping range.
1735id_range_start = 1000000
1736# Size of this instance's node ID allocation range (default: 1 billion).
1737id_range_size = 1000000000
1738"#,
1739            max_db_size = 10u64 * 1024 * 1024 * 1024
1740        )
1741    }
1742}
1743
1744#[cfg(test)]
1745mod tests {
1746    use super::*;
1747
1748    #[test]
1749    fn test_default_config() {
1750        let config = UcotronConfig::default();
1751        assert_eq!(config.server.host, "0.0.0.0");
1752        assert_eq!(config.server.port, 8420);
1753        assert_eq!(config.server.workers, 4);
1754        assert_eq!(config.server.log_level, "info");
1755        assert_eq!(config.server.log_format, "text");
1756        assert_eq!(config.storage.mode, "embedded");
1757        assert_eq!(config.storage.vector.backend, "helix");
1758        assert_eq!(config.storage.graph.backend, "helix");
1759        assert_eq!(config.models.embedding_model, "all-MiniLM-L6-v2");
1760        assert_eq!(config.namespaces.default_namespace, "default");
1761        assert!(config.namespaces.allowed_namespaces.is_empty());
1762        assert!(!config.auth.enabled);
1763        assert!(config.auth.api_key.is_none());
1764    }
1765
1766    #[test]
1767    fn test_parse_minimal_toml() {
1768        let toml = "";
1769        let config = UcotronConfig::parse_toml(toml).unwrap();
1770        assert_eq!(config.server.port, 8420);
1771        assert_eq!(config.namespaces.default_namespace, "default");
1772    }
1773
1774    #[test]
1775    fn test_parse_custom_toml() {
1776        let toml = r#"
1777[server]
1778host = "127.0.0.1"
1779port = 9000
1780workers = 8
1781
1782[storage]
1783mode = "embedded"
1784
1785[storage.vector]
1786backend = "helix"
1787data_dir = "/tmp/ucotron"
1788
1789[storage.graph]
1790backend = "helix"
1791batch_size = 5000
1792
1793[namespaces]
1794default_namespace = "my-project"
1795allowed_namespaces = ["my-project", "staging"]
1796
1797[auth]
1798enabled = true
1799api_key = "test-key-123"
1800"#;
1801        let config = UcotronConfig::parse_toml(toml).unwrap();
1802        assert_eq!(config.server.host, "127.0.0.1");
1803        assert_eq!(config.server.port, 9000);
1804        assert_eq!(config.server.workers, 8);
1805        assert_eq!(config.storage.vector.data_dir, "/tmp/ucotron");
1806        assert_eq!(config.storage.graph.batch_size, 5000);
1807        assert_eq!(config.namespaces.default_namespace, "my-project");
1808        assert_eq!(
1809            config.namespaces.allowed_namespaces,
1810            vec!["my-project", "staging"]
1811        );
1812        assert!(config.auth.enabled);
1813        assert_eq!(config.auth.api_key.as_deref(), Some("test-key-123"));
1814    }
1815
1816    #[test]
1817    fn test_invalid_storage_mode() {
1818        let toml = r#"
1819[storage]
1820mode = "invalid"
1821"#;
1822        let result = UcotronConfig::parse_toml(toml);
1823        assert!(result.is_err());
1824        let err = result.unwrap_err().to_string();
1825        assert!(err.contains("storage.mode"));
1826        assert!(err.contains("invalid"));
1827    }
1828
1829    #[test]
1830    fn test_invalid_port() {
1831        let toml = r#"
1832[server]
1833port = 0
1834"#;
1835        let result = UcotronConfig::parse_toml(toml);
1836        assert!(result.is_err());
1837        assert!(result.unwrap_err().to_string().contains("server.port"));
1838    }
1839
1840    #[test]
1841    fn test_invalid_log_level() {
1842        let toml = r#"
1843[server]
1844log_level = "verbose"
1845"#;
1846        let result = UcotronConfig::parse_toml(toml);
1847        assert!(result.is_err());
1848        let err = result.unwrap_err().to_string();
1849        assert!(err.contains("server.log_level"));
1850        assert!(err.contains("verbose"));
1851    }
1852
1853    #[test]
1854    fn test_invalid_log_format() {
1855        let toml = r#"
1856[server]
1857log_format = "xml"
1858"#;
1859        let result = UcotronConfig::parse_toml(toml);
1860        assert!(result.is_err());
1861        let err = result.unwrap_err().to_string();
1862        assert!(err.contains("server.log_format"));
1863        assert!(err.contains("xml"));
1864    }
1865
1866    #[test]
1867    fn test_log_format_json_valid() {
1868        let toml = r#"
1869[server]
1870log_format = "json"
1871"#;
1872        let config = UcotronConfig::parse_toml(toml).unwrap();
1873        assert_eq!(config.server.log_format, "json");
1874    }
1875
1876    #[test]
1877    fn test_env_override_log_format() {
1878        std::env::set_var("UCOTRON_SERVER_LOG_FORMAT", "json");
1879        let mut config = UcotronConfig::default();
1880        config.apply_env_overrides();
1881        assert_eq!(config.server.log_format, "json");
1882        std::env::remove_var("UCOTRON_SERVER_LOG_FORMAT");
1883    }
1884
1885    #[test]
1886    fn test_example_toml_generation() {
1887        let example = UcotronConfig::example_toml();
1888        assert!(example.contains("port"));
1889        assert!(example.contains("8420"));
1890        assert!(example.contains("helix"));
1891        // Verify it round-trips
1892        let _config = UcotronConfig::parse_toml(&example).unwrap();
1893    }
1894
1895    #[test]
1896    fn test_example_toml_commented() {
1897        let commented = UcotronConfig::example_toml_commented();
1898        // Should contain section headers
1899        assert!(commented.contains("[server]"));
1900        assert!(commented.contains("[storage]"));
1901        assert!(commented.contains("[models]"));
1902        assert!(commented.contains("[consolidation]"));
1903        assert!(commented.contains("[mcp]"));
1904        assert!(commented.contains("[namespaces]"));
1905        assert!(commented.contains("[auth]"));
1906        // Should contain inline comments
1907        assert!(commented.contains("# Bind address"));
1908        assert!(commented.contains("UCOTRON_"));
1909    }
1910
1911    #[test]
1912    fn test_serialization_roundtrip() {
1913        let config = UcotronConfig::default();
1914        let toml_str = toml::to_string_pretty(&config).unwrap();
1915        let parsed: UcotronConfig = toml::from_str(&toml_str).unwrap();
1916        assert_eq!(parsed.server.port, config.server.port);
1917        assert_eq!(parsed.storage.mode, config.storage.mode);
1918        assert_eq!(
1919            parsed.namespaces.default_namespace,
1920            config.namespaces.default_namespace
1921        );
1922    }
1923
1924    #[test]
1925    fn test_env_override_server_port() {
1926        let mut config = UcotronConfig::default();
1927        // Simulate env override
1928        std::env::set_var("UCOTRON_SERVER_PORT", "9999");
1929        config.apply_env_overrides();
1930        assert_eq!(config.server.port, 9999);
1931        std::env::remove_var("UCOTRON_SERVER_PORT");
1932    }
1933
1934    #[test]
1935    fn test_env_override_server_host() {
1936        let mut config = UcotronConfig::default();
1937        std::env::set_var("UCOTRON_SERVER_HOST", "127.0.0.1");
1938        config.apply_env_overrides();
1939        assert_eq!(config.server.host, "127.0.0.1");
1940        std::env::remove_var("UCOTRON_SERVER_HOST");
1941    }
1942
1943    #[test]
1944    fn test_env_override_storage_mode() {
1945        let mut config = UcotronConfig::default();
1946        std::env::set_var("UCOTRON_STORAGE_MODE", "external");
1947        config.apply_env_overrides();
1948        assert_eq!(config.storage.mode, "external");
1949        std::env::remove_var("UCOTRON_STORAGE_MODE");
1950    }
1951
1952    #[test]
1953    fn test_env_override_auth_api_key() {
1954        let mut config = UcotronConfig::default();
1955        std::env::set_var("UCOTRON_AUTH_API_KEY", "secret-from-env");
1956        config.apply_env_overrides();
1957        assert_eq!(config.auth.api_key.as_deref(), Some("secret-from-env"));
1958        std::env::remove_var("UCOTRON_AUTH_API_KEY");
1959    }
1960
1961    #[test]
1962    fn test_env_override_models_dir() {
1963        let mut config = UcotronConfig::default();
1964        std::env::set_var("UCOTRON_MODELS_DIR", "/opt/models");
1965        config.apply_env_overrides();
1966        assert_eq!(config.models.models_dir, "/opt/models");
1967        std::env::remove_var("UCOTRON_MODELS_DIR");
1968    }
1969
1970    #[test]
1971    fn test_env_override_invalid_port_ignored() {
1972        let mut config = UcotronConfig::default();
1973        std::env::set_var("UCOTRON_SERVER_PORT", "not-a-number");
1974        config.apply_env_overrides();
1975        // Should keep the default since parse fails
1976        assert_eq!(config.server.port, 8420);
1977        std::env::remove_var("UCOTRON_SERVER_PORT");
1978    }
1979
1980    #[test]
1981    fn test_auth_enabled_without_credentials() {
1982        let toml = r#"
1983[auth]
1984enabled = true
1985"#;
1986        let result = UcotronConfig::parse_toml(toml);
1987        assert!(result.is_err());
1988        let err = result.unwrap_err().to_string();
1989        assert!(err.contains("auth.enabled"));
1990        assert!(err.contains("api_key"));
1991    }
1992
1993    #[test]
1994    fn test_auth_enabled_with_api_key() {
1995        let toml = r#"
1996[auth]
1997enabled = true
1998api_key = "my-secret"
1999"#;
2000        let config = UcotronConfig::parse_toml(toml).unwrap();
2001        assert!(config.auth.enabled);
2002        assert_eq!(config.auth.api_key.as_deref(), Some("my-secret"));
2003    }
2004
2005    #[test]
2006    fn test_external_mode_requires_url() {
2007        let toml = r#"
2008[storage]
2009mode = "external"
2010
2011[storage.vector]
2012backend = "qdrant"
2013"#;
2014        let result = UcotronConfig::parse_toml(toml);
2015        assert!(result.is_err());
2016        let err = result.unwrap_err().to_string();
2017        assert!(err.contains("storage.vector.url"));
2018        assert!(err.contains("qdrant"));
2019    }
2020
2021    #[test]
2022    fn test_external_mode_helix_no_url_needed() {
2023        // Helix in external mode doesn't need URL (it's embedded)
2024        let toml = r#"
2025[storage]
2026mode = "external"
2027
2028[storage.vector]
2029backend = "helix"
2030
2031[storage.graph]
2032backend = "helix"
2033"#;
2034        let config = UcotronConfig::parse_toml(toml).unwrap();
2035        assert_eq!(config.storage.mode, "external");
2036    }
2037
2038    #[test]
2039    fn test_mcp_sse_port_collision() {
2040        let toml = r#"
2041[server]
2042port = 8420
2043
2044[mcp]
2045enabled = true
2046transport = "sse"
2047port = 8420
2048"#;
2049        let result = UcotronConfig::parse_toml(toml);
2050        assert!(result.is_err());
2051        let err = result.unwrap_err().to_string();
2052        assert!(err.contains("port collision"));
2053    }
2054
2055    #[test]
2056    fn test_mcp_transport_validation() {
2057        let toml = r#"
2058[mcp]
2059transport = "grpc"
2060"#;
2061        let result = UcotronConfig::parse_toml(toml);
2062        assert!(result.is_err());
2063        assert!(result.unwrap_err().to_string().contains("mcp.transport"));
2064    }
2065
2066    #[test]
2067    fn test_empty_namespace_rejected() {
2068        let toml = r#"
2069[namespaces]
2070default_namespace = ""
2071"#;
2072        let result = UcotronConfig::parse_toml(toml);
2073        assert!(result.is_err());
2074        assert!(result
2075            .unwrap_err()
2076            .to_string()
2077            .contains("namespaces.default_namespace"));
2078    }
2079
2080    #[test]
2081    fn test_hnsw_zero_ef_construction() {
2082        let toml = r#"
2083[storage.vector.hnsw]
2084ef_construction = 0
2085"#;
2086        let result = UcotronConfig::parse_toml(toml);
2087        assert!(result.is_err());
2088        assert!(result.unwrap_err().to_string().contains("ef_construction"));
2089    }
2090
2091    #[test]
2092    fn test_empty_embedding_model_rejected() {
2093        let toml = r#"
2094[models]
2095embedding_model = ""
2096"#;
2097        let result = UcotronConfig::parse_toml(toml);
2098        assert!(result.is_err());
2099        assert!(result
2100            .unwrap_err()
2101            .to_string()
2102            .contains("models.embedding_model"));
2103    }
2104
2105    // --- Instance config tests ---
2106
2107    #[test]
2108    fn test_instance_config_defaults() {
2109        let config = UcotronConfig::default();
2110        assert_eq!(config.instance.instance_id, "auto");
2111        assert_eq!(config.instance.role, "auto");
2112        assert_eq!(config.instance.id_range_start, 1_000_000);
2113        assert_eq!(config.instance.id_range_size, 1_000_000_000);
2114        assert!(config.instance.can_write());
2115        assert!(!config.instance.is_reader_only());
2116    }
2117
2118    #[test]
2119    fn test_instance_config_writer_role() {
2120        let toml = r#"
2121[instance]
2122instance_id = "writer-1"
2123role = "writer"
2124id_range_start = 0
2125id_range_size = 500000000
2126"#;
2127        let config = UcotronConfig::parse_toml(toml).unwrap();
2128        assert_eq!(config.instance.instance_id, "writer-1");
2129        assert_eq!(config.instance.role, "writer");
2130        assert_eq!(config.instance.id_range_start, 0);
2131        assert_eq!(config.instance.id_range_size, 500_000_000);
2132        assert!(config.instance.can_write());
2133        assert!(!config.instance.is_reader_only());
2134    }
2135
2136    #[test]
2137    fn test_instance_config_reader_role() {
2138        let toml = r#"
2139[instance]
2140instance_id = "reader-1"
2141role = "reader"
2142"#;
2143        let config = UcotronConfig::parse_toml(toml).unwrap();
2144        assert_eq!(config.instance.role, "reader");
2145        assert!(!config.instance.can_write());
2146        assert!(config.instance.is_reader_only());
2147    }
2148
2149    #[test]
2150    fn test_instance_invalid_role() {
2151        let toml = r#"
2152[instance]
2153role = "master"
2154"#;
2155        let result = UcotronConfig::parse_toml(toml);
2156        assert!(result.is_err());
2157        let err = result.unwrap_err().to_string();
2158        assert!(err.contains("instance.role"));
2159        assert!(err.contains("master"));
2160    }
2161
2162    #[test]
2163    fn test_instance_zero_range_size_rejected() {
2164        let toml = r#"
2165[instance]
2166id_range_size = 0
2167"#;
2168        let result = UcotronConfig::parse_toml(toml);
2169        assert!(result.is_err());
2170        assert!(result.unwrap_err().to_string().contains("id_range_size"));
2171    }
2172
2173    #[test]
2174    fn test_shared_mode_requires_explicit_role() {
2175        let toml = r#"
2176[storage]
2177mode = "shared"
2178shared_data_dir = "/data/shared"
2179
2180[instance]
2181role = "auto"
2182"#;
2183        let result = UcotronConfig::parse_toml(toml);
2184        assert!(result.is_err());
2185        let err = result.unwrap_err().to_string();
2186        assert!(err.contains("instance.role"));
2187        assert!(err.contains("shared"));
2188    }
2189
2190    #[test]
2191    fn test_shared_mode_writer_role_ok() {
2192        let toml = r#"
2193[storage]
2194mode = "shared"
2195shared_data_dir = "/data/shared"
2196
2197[instance]
2198instance_id = "w1"
2199role = "writer"
2200"#;
2201        let config = UcotronConfig::parse_toml(toml).unwrap();
2202        assert_eq!(config.storage.mode, "shared");
2203        assert_eq!(config.instance.role, "writer");
2204        assert_eq!(
2205            config.storage.shared_data_dir.as_deref(),
2206            Some("/data/shared")
2207        );
2208    }
2209
2210    #[test]
2211    fn test_shared_mode_reader_role_ok() {
2212        let toml = r#"
2213[storage]
2214mode = "shared"
2215shared_data_dir = "/data/shared"
2216
2217[instance]
2218instance_id = "r1"
2219role = "reader"
2220"#;
2221        let config = UcotronConfig::parse_toml(toml).unwrap();
2222        assert_eq!(config.storage.mode, "shared");
2223        assert_eq!(config.instance.role, "reader");
2224    }
2225
2226    #[test]
2227    fn test_shared_mode_requires_shared_data_dir() {
2228        let toml = r#"
2229[storage]
2230mode = "shared"
2231
2232[instance]
2233instance_id = "w1"
2234role = "writer"
2235"#;
2236        let result = UcotronConfig::parse_toml(toml);
2237        assert!(result.is_err());
2238        let err = result.unwrap_err().to_string();
2239        assert!(err.contains("shared_data_dir"));
2240    }
2241
2242    #[test]
2243    fn test_effective_data_dirs_embedded() {
2244        let config = UcotronConfig::default();
2245        assert_eq!(config.storage.effective_vector_data_dir(), "data");
2246        assert_eq!(config.storage.effective_graph_data_dir(), "data");
2247    }
2248
2249    #[test]
2250    fn test_effective_data_dirs_shared() {
2251        let toml = r#"
2252[storage]
2253mode = "shared"
2254shared_data_dir = "/mnt/shared"
2255
2256[instance]
2257role = "writer"
2258"#;
2259        let config = UcotronConfig::parse_toml(toml).unwrap();
2260        assert_eq!(config.storage.effective_vector_data_dir(), "/mnt/shared");
2261        assert_eq!(config.storage.effective_graph_data_dir(), "/mnt/shared");
2262    }
2263
2264    #[test]
2265    fn test_instance_resolved_id_auto() {
2266        let config = UcotronConfig::default();
2267        let resolved = config.instance.resolved_instance_id();
2268        // Should contain hostname and PID
2269        assert!(resolved.contains('-'));
2270        assert!(!resolved.is_empty());
2271        assert_ne!(resolved, "auto");
2272    }
2273
2274    #[test]
2275    fn test_instance_resolved_id_explicit() {
2276        let toml = r#"
2277[instance]
2278instance_id = "my-server-1"
2279"#;
2280        let config = UcotronConfig::parse_toml(toml).unwrap();
2281        assert_eq!(config.instance.resolved_instance_id(), "my-server-1");
2282    }
2283
2284    #[test]
2285    fn test_env_override_instance_role() {
2286        let mut config = UcotronConfig::default();
2287        std::env::set_var("UCOTRON_INSTANCE_ROLE", "reader");
2288        config.apply_env_overrides();
2289        assert_eq!(config.instance.role, "reader");
2290        assert!(config.instance.is_reader_only());
2291        std::env::remove_var("UCOTRON_INSTANCE_ROLE");
2292    }
2293
2294    #[test]
2295    fn test_env_override_instance_id() {
2296        let mut config = UcotronConfig::default();
2297        std::env::set_var("UCOTRON_INSTANCE_ID", "env-instance-42");
2298        config.apply_env_overrides();
2299        assert_eq!(config.instance.instance_id, "env-instance-42");
2300        assert_eq!(config.instance.resolved_instance_id(), "env-instance-42");
2301        std::env::remove_var("UCOTRON_INSTANCE_ID");
2302    }
2303
2304    #[test]
2305    fn test_env_override_id_range() {
2306        let mut config = UcotronConfig::default();
2307        std::env::set_var("UCOTRON_INSTANCE_ID_RANGE_START", "2000000000");
2308        std::env::set_var("UCOTRON_INSTANCE_ID_RANGE_SIZE", "500000000");
2309        config.apply_env_overrides();
2310        assert_eq!(config.instance.id_range_start, 2_000_000_000);
2311        assert_eq!(config.instance.id_range_size, 500_000_000);
2312        std::env::remove_var("UCOTRON_INSTANCE_ID_RANGE_START");
2313        std::env::remove_var("UCOTRON_INSTANCE_ID_RANGE_SIZE");
2314    }
2315
2316    #[test]
2317    fn test_example_toml_contains_instance_section() {
2318        let commented = UcotronConfig::example_toml_commented();
2319        assert!(commented.contains("[instance]"));
2320        assert!(commented.contains("instance_id"));
2321        assert!(commented.contains("role"));
2322        assert!(commented.contains("id_range_start"));
2323        assert!(commented.contains("id_range_size"));
2324    }
2325
2326    #[test]
2327    fn test_full_config_all_sections() {
2328        let toml = r#"
2329[server]
2330host = "10.0.0.1"
2331port = 3000
2332workers = 16
2333log_level = "debug"
2334
2335[storage]
2336mode = "embedded"
2337
2338[storage.vector]
2339backend = "helix"
2340data_dir = "/data/vectors"
2341
2342[storage.vector.hnsw]
2343ef_construction = 100
2344ef_search = 100
2345enabled = true
2346
2347[storage.graph]
2348backend = "helix"
2349data_dir = "/data/graph"
2350batch_size = 20000
2351
2352[models]
2353embedding_model = "custom-model"
2354ner_model = "custom-ner"
2355llm_model = "none"
2356llm_backend = "candle"
2357models_dir = "/opt/models"
2358
2359[consolidation]
2360trigger_interval = 50
2361enable_decay = false
2362decay_halflife_secs = 86400
2363
2364[mcp]
2365enabled = false
2366transport = "stdio"
2367port = 9000
2368
2369[namespaces]
2370default_namespace = "prod"
2371allowed_namespaces = ["prod", "staging", "dev"]
2372max_namespaces = 10
2373
2374[auth]
2375enabled = true
2376api_key = "super-secret-key"
2377"#;
2378        let config = UcotronConfig::parse_toml(toml).unwrap();
2379        assert_eq!(config.server.host, "10.0.0.1");
2380        assert_eq!(config.server.port, 3000);
2381        assert_eq!(config.server.workers, 16);
2382        assert_eq!(config.server.log_level, "debug");
2383        assert_eq!(config.storage.vector.data_dir, "/data/vectors");
2384        assert_eq!(config.storage.vector.hnsw.ef_construction, 100);
2385        assert_eq!(config.storage.graph.data_dir, "/data/graph");
2386        assert_eq!(config.storage.graph.batch_size, 20000);
2387        assert_eq!(config.models.llm_model, "none");
2388        assert_eq!(config.consolidation.trigger_interval, 50);
2389        assert!(!config.consolidation.enable_decay);
2390        assert!(!config.mcp.enabled);
2391        assert_eq!(config.namespaces.default_namespace, "prod");
2392        assert_eq!(config.namespaces.allowed_namespaces.len(), 3);
2393        assert_eq!(config.namespaces.max_namespaces, 10);
2394        assert!(config.auth.enabled);
2395    }
2396
2397    // --- Telemetry config tests ---
2398
2399    #[test]
2400    fn test_telemetry_config_defaults() {
2401        let config = UcotronConfig::default();
2402        assert!(!config.telemetry.enabled);
2403        assert_eq!(config.telemetry.otlp_endpoint, "http://localhost:4317");
2404        assert_eq!(config.telemetry.service_name, "ucotron");
2405        assert_eq!(config.telemetry.sample_rate, 1.0);
2406        assert!(config.telemetry.export_traces);
2407        assert!(config.telemetry.export_metrics);
2408        assert!(!config.telemetry.export_logs);
2409    }
2410
2411    #[test]
2412    fn test_telemetry_config_parse_toml() {
2413        let toml = r#"
2414[telemetry]
2415enabled = true
2416otlp_endpoint = "http://otel-collector:4317"
2417service_name = "ucotron-prod"
2418sample_rate = 0.5
2419export_traces = true
2420export_metrics = false
2421export_logs = true
2422"#;
2423        let config = UcotronConfig::parse_toml(toml).unwrap();
2424        assert!(config.telemetry.enabled);
2425        assert_eq!(config.telemetry.otlp_endpoint, "http://otel-collector:4317");
2426        assert_eq!(config.telemetry.service_name, "ucotron-prod");
2427        assert_eq!(config.telemetry.sample_rate, 0.5);
2428        assert!(config.telemetry.export_traces);
2429        assert!(!config.telemetry.export_metrics);
2430        assert!(config.telemetry.export_logs);
2431    }
2432
2433    #[test]
2434    fn test_telemetry_accessors() {
2435        let toml = r#"
2436[telemetry]
2437enabled = true
2438otlp_endpoint = "http://collector:4317"
2439service_name = "test-svc"
2440sample_rate = 0.75
2441"#;
2442        let config = UcotronConfig::parse_toml(toml).unwrap();
2443        assert!(config.telemetry_enabled());
2444        assert_eq!(config.telemetry_otlp_endpoint(), "http://collector:4317");
2445        assert_eq!(config.telemetry_service_name(), "test-svc");
2446        assert_eq!(config.telemetry_sample_rate(), 0.75);
2447    }
2448
2449    #[test]
2450    fn test_telemetry_sample_rate_out_of_range() {
2451        let toml = r#"
2452[telemetry]
2453sample_rate = 1.5
2454"#;
2455        let result = UcotronConfig::parse_toml(toml);
2456        assert!(result.is_err());
2457        let err = result.unwrap_err().to_string();
2458        assert!(err.contains("telemetry.sample_rate"));
2459        assert!(err.contains("1.5"));
2460    }
2461
2462    #[test]
2463    fn test_telemetry_sample_rate_negative() {
2464        let toml = r#"
2465[telemetry]
2466sample_rate = -0.1
2467"#;
2468        let result = UcotronConfig::parse_toml(toml);
2469        assert!(result.is_err());
2470        assert!(result
2471            .unwrap_err()
2472            .to_string()
2473            .contains("telemetry.sample_rate"));
2474    }
2475
2476    #[test]
2477    fn test_telemetry_enabled_empty_endpoint_rejected() {
2478        let toml = r#"
2479[telemetry]
2480enabled = true
2481otlp_endpoint = ""
2482"#;
2483        let result = UcotronConfig::parse_toml(toml);
2484        assert!(result.is_err());
2485        assert!(result
2486            .unwrap_err()
2487            .to_string()
2488            .contains("telemetry.otlp_endpoint"));
2489    }
2490
2491    #[test]
2492    fn test_telemetry_enabled_empty_service_name_rejected() {
2493        let toml = r#"
2494[telemetry]
2495enabled = true
2496service_name = ""
2497"#;
2498        let result = UcotronConfig::parse_toml(toml);
2499        assert!(result.is_err());
2500        assert!(result
2501            .unwrap_err()
2502            .to_string()
2503            .contains("telemetry.service_name"));
2504    }
2505
2506    #[test]
2507    fn test_env_override_telemetry_enabled() {
2508        let mut config = UcotronConfig::default();
2509        std::env::set_var("UCOTRON_TELEMETRY_ENABLED", "true");
2510        config.apply_env_overrides();
2511        assert!(config.telemetry.enabled);
2512        std::env::remove_var("UCOTRON_TELEMETRY_ENABLED");
2513    }
2514
2515    #[test]
2516    fn test_env_override_telemetry_endpoint() {
2517        let mut config = UcotronConfig::default();
2518        std::env::set_var("UCOTRON_TELEMETRY_OTLP_ENDPOINT", "http://custom:4317");
2519        config.apply_env_overrides();
2520        assert_eq!(config.telemetry.otlp_endpoint, "http://custom:4317");
2521        std::env::remove_var("UCOTRON_TELEMETRY_OTLP_ENDPOINT");
2522    }
2523
2524    #[test]
2525    fn test_env_override_telemetry_service_name() {
2526        let mut config = UcotronConfig::default();
2527        std::env::set_var("UCOTRON_TELEMETRY_SERVICE_NAME", "my-svc");
2528        config.apply_env_overrides();
2529        assert_eq!(config.telemetry.service_name, "my-svc");
2530        std::env::remove_var("UCOTRON_TELEMETRY_SERVICE_NAME");
2531    }
2532
2533    #[test]
2534    fn test_env_override_telemetry_sample_rate() {
2535        let mut config = UcotronConfig::default();
2536        std::env::set_var("UCOTRON_TELEMETRY_SAMPLE_RATE", "0.25");
2537        config.apply_env_overrides();
2538        assert_eq!(config.telemetry.sample_rate, 0.25);
2539        std::env::remove_var("UCOTRON_TELEMETRY_SAMPLE_RATE");
2540    }
2541
2542    #[test]
2543    fn test_example_toml_contains_telemetry_section() {
2544        let commented = UcotronConfig::example_toml_commented();
2545        assert!(commented.contains("[telemetry]"));
2546        assert!(commented.contains("otlp_endpoint"));
2547        assert!(commented.contains("service_name"));
2548        assert!(commented.contains("sample_rate"));
2549        assert!(commented.contains("export_traces"));
2550        assert!(commented.contains("export_metrics"));
2551        assert!(commented.contains("export_logs"));
2552    }
2553
2554    // --- Mindset config tests ---
2555
2556    #[test]
2557    fn test_mindset_config_defaults() {
2558        let config = UcotronConfig::default();
2559        assert!(config.mindset.enabled);
2560        assert!(!config.mindset.algorithmic_keywords.is_empty());
2561        assert!(!config.mindset.divergent_keywords.is_empty());
2562        assert!(!config.mindset.convergent_keywords.is_empty());
2563        assert!(config
2564            .mindset
2565            .algorithmic_keywords
2566            .contains(&"verify".to_string()));
2567        assert!(config
2568            .mindset
2569            .divergent_keywords
2570            .contains(&"explore".to_string()));
2571        assert!(config
2572            .mindset
2573            .convergent_keywords
2574            .contains(&"summarize".to_string()));
2575    }
2576
2577    #[test]
2578    fn test_mindset_config_parse_toml() {
2579        let toml = r#"
2580[mindset]
2581enabled = false
2582algorithmic_keywords = ["audit", "fact-check"]
2583divergent_keywords = ["hypothesize"]
2584convergent_keywords = ["wrap up"]
2585"#;
2586        let config = UcotronConfig::parse_toml(toml).unwrap();
2587        assert!(!config.mindset.enabled);
2588        assert_eq!(
2589            config.mindset.algorithmic_keywords,
2590            vec!["audit", "fact-check"]
2591        );
2592        assert_eq!(config.mindset.divergent_keywords, vec!["hypothesize"]);
2593        assert_eq!(config.mindset.convergent_keywords, vec!["wrap up"]);
2594    }
2595
2596    #[test]
2597    fn test_mindset_config_empty_uses_defaults() {
2598        let toml = "";
2599        let config = UcotronConfig::parse_toml(toml).unwrap();
2600        assert!(config.mindset.enabled);
2601        assert_eq!(config.mindset.algorithmic_keywords.len(), 6);
2602        assert_eq!(config.mindset.divergent_keywords.len(), 6);
2603        assert_eq!(config.mindset.convergent_keywords.len(), 6);
2604    }
2605}