roboticus_core/config/
model_core.rs

1#[derive(Debug, Clone, Serialize, Deserialize)]
2pub struct ServerConfig {
3    #[serde(default = "default_port")]
4    pub port: u16,
5    #[serde(default = "default_bind")]
6    pub bind: String,
7    #[serde(default, skip_serializing)]
8    pub api_key: Option<String>,
9    #[serde(default = "default_log_dir")]
10    pub log_dir: PathBuf,
11    #[serde(default = "default_log_max_days")]
12    pub log_max_days: u32,
13    #[serde(default = "default_rate_limit_requests")]
14    pub rate_limit_requests: u32,
15    #[serde(default = "default_rate_limit_window_secs")]
16    pub rate_limit_window_secs: u64,
17    #[serde(default = "default_per_ip_rate_limit_requests")]
18    pub per_ip_rate_limit_requests: u32,
19    #[serde(default = "default_per_actor_rate_limit_requests")]
20    pub per_actor_rate_limit_requests: u32,
21    #[serde(default)]
22    pub trusted_proxy_cidrs: Vec<String>,
23    #[serde(default = "default_cron_max_concurrency")]
24    pub cron_max_concurrency: u8,
25}
26
27impl Default for ServerConfig {
28    fn default() -> Self {
29        Self {
30            port: default_port(),
31            bind: default_bind(),
32            api_key: None,
33            log_dir: default_log_dir(),
34            log_max_days: default_log_max_days(),
35            rate_limit_requests: default_rate_limit_requests(),
36            rate_limit_window_secs: default_rate_limit_window_secs(),
37            per_ip_rate_limit_requests: default_per_ip_rate_limit_requests(),
38            per_actor_rate_limit_requests: default_per_actor_rate_limit_requests(),
39            trusted_proxy_cidrs: Vec::new(),
40            cron_max_concurrency: default_cron_max_concurrency(),
41        }
42    }
43}
44
45fn default_rate_limit_requests() -> u32 {
46    100
47}
48
49fn default_rate_limit_window_secs() -> u64 {
50    60
51}
52
53fn default_per_ip_rate_limit_requests() -> u32 {
54    300
55}
56
57fn default_per_actor_rate_limit_requests() -> u32 {
58    200
59}
60
61fn default_cron_max_concurrency() -> u8 {
62    8
63}
64
65fn default_port() -> u16 {
66    18789
67}
68
69fn default_bind() -> String {
70    "localhost".into()
71}
72
73// ── Context budget configuration ────────────────────────────────────────────
74
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct ContextBudgetConfig {
77    /// Token budget for L0 (trivial) complexity.
78    #[serde(default = "default_budget_l0")]
79    pub l0: usize,
80    /// Token budget for L1 (low) complexity.
81    #[serde(default = "default_budget_l1")]
82    pub l1: usize,
83    /// Token budget for L2 (moderate) complexity.
84    #[serde(default = "default_budget_l2")]
85    pub l2: usize,
86    /// Token budget for L3 (high) complexity.
87    #[serde(default = "default_budget_l3")]
88    pub l3: usize,
89    /// Minimum complexity level for channel (non-web) messages.
90    #[serde(default = "default_channel_minimum")]
91    pub channel_minimum: String,
92    /// Maximum percentage (0.0–1.0) of context budget the personality soul
93    /// can consume. Values outside [0.05, 1.0] are clamped on validation.
94    #[serde(default = "default_soul_max_context_pct")]
95    pub soul_max_context_pct: f64,
96}
97
98impl ContextBudgetConfig {
99    /// Minimum allowed token budget for any tier.
100    pub const MIN_BUDGET: usize = 512;
101    /// Maximum allowed token budget for any tier.
102    pub const MAX_BUDGET: usize = 2_000_000;
103
104    /// Clamp all tier budgets and `soul_max_context_pct` to safe ranges and
105    /// enforce that tiers are non-decreasing (l0 ≤ l1 ≤ l2 ≤ l3).
106    pub fn validate_and_clamp(&mut self) {
107        self.l0 = self.l0.clamp(Self::MIN_BUDGET, Self::MAX_BUDGET);
108        self.l1 = self.l1.clamp(Self::MIN_BUDGET, Self::MAX_BUDGET);
109        self.l2 = self.l2.clamp(Self::MIN_BUDGET, Self::MAX_BUDGET);
110        self.l3 = self.l3.clamp(Self::MIN_BUDGET, Self::MAX_BUDGET);
111        self.soul_max_context_pct = self.soul_max_context_pct.clamp(0.05, 1.0);
112        // Enforce tier ordering
113        self.l1 = self.l1.max(self.l0);
114        self.l2 = self.l2.max(self.l1);
115        self.l3 = self.l3.max(self.l2);
116    }
117
118    /// Return the maximum tokens the soul personality may consume given
119    /// `total_budget`. Returns 0 if `total_budget` is 0 to avoid
120    /// division-by-zero.
121    pub fn soul_token_cap(&self, total_budget: usize) -> usize {
122        if total_budget == 0 {
123            return 0;
124        }
125        (total_budget as f64 * self.soul_max_context_pct) as usize
126    }
127}
128
129impl Default for ContextBudgetConfig {
130    fn default() -> Self {
131        Self {
132            l0: default_budget_l0(),
133            l1: default_budget_l1(),
134            l2: default_budget_l2(),
135            l3: default_budget_l3(),
136            channel_minimum: default_channel_minimum(),
137            soul_max_context_pct: default_soul_max_context_pct(),
138        }
139    }
140}
141
142fn default_budget_l0() -> usize { 4_000 }
143fn default_budget_l1() -> usize { 8_000 }
144fn default_budget_l2() -> usize { 16_000 }
145fn default_budget_l3() -> usize { 32_000 }
146fn default_channel_minimum() -> String { "L1".into() }
147fn default_soul_max_context_pct() -> f64 { 0.4 }
148
149#[cfg(test)]
150mod context_budget_tests {
151    use super::ContextBudgetConfig;
152
153    #[test]
154    fn context_budget_validates_and_clamps() {
155        let mut cfg = ContextBudgetConfig {
156            l0: 0,
157            l1: 0,
158            l2: 0,
159            l3: 999_999_999,
160            channel_minimum: "L1".into(),
161            soul_max_context_pct: 2.5,
162        };
163        cfg.validate_and_clamp();
164
165        assert!(cfg.l0 >= ContextBudgetConfig::MIN_BUDGET, "l0 must be >= MIN_BUDGET");
166        assert!(cfg.l3 <= ContextBudgetConfig::MAX_BUDGET, "l3 must be <= MAX_BUDGET");
167        assert!(cfg.soul_max_context_pct <= 1.0, "soul_max_context_pct must be <= 1.0");
168        assert!(cfg.soul_max_context_pct >= 0.05, "soul_max_context_pct must be >= 0.05");
169        // Tier ordering must be preserved
170        assert!(cfg.l0 <= cfg.l1);
171        assert!(cfg.l1 <= cfg.l2);
172        assert!(cfg.l2 <= cfg.l3);
173    }
174
175    #[test]
176    fn context_budget_validate_preserves_valid_values() {
177        let mut cfg = ContextBudgetConfig::default();
178        let before = cfg.clone();
179        cfg.validate_and_clamp();
180
181        assert_eq!(cfg.l0, before.l0);
182        assert_eq!(cfg.l1, before.l1);
183        assert_eq!(cfg.l2, before.l2);
184        assert_eq!(cfg.l3, before.l3);
185        assert_eq!(cfg.soul_max_context_pct, before.soul_max_context_pct);
186    }
187
188    #[test]
189    fn soul_token_cap_safe_with_zero_budget() {
190        let cfg = ContextBudgetConfig::default();
191        let cap = cfg.soul_token_cap(0);
192        assert_eq!(cap, 0, "soul_token_cap must return 0 for zero total_budget");
193    }
194}
195
196#[derive(Debug, Clone, Serialize, Deserialize)]
197pub struct DatabaseConfig {
198    #[serde(default = "default_db_path")]
199    pub path: PathBuf,
200}
201
202impl Default for DatabaseConfig {
203    fn default() -> Self {
204        Self {
205            path: default_db_path(),
206        }
207    }
208}
209
210fn default_db_path() -> PathBuf {
211    dirs_next().join("state.db")
212}
213
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct ModelsConfig {
216    pub primary: String,
217    #[serde(default)]
218    pub fallbacks: Vec<String>,
219    #[serde(default)]
220    pub routing: RoutingConfig,
221    #[serde(default)]
222    pub model_overrides: HashMap<String, ModelOverride>,
223    #[serde(default)]
224    pub stream_by_default: bool,
225    #[serde(default)]
226    pub tiered_inference: TieredInferenceConfig,
227}
228
229#[derive(Debug, Clone, Serialize, Deserialize)]
230pub struct TieredInferenceConfig {
231    #[serde(default)]
232    pub enabled: bool,
233    #[serde(default = "default_confidence_floor")]
234    pub confidence_floor: f64,
235    #[serde(default = "default_escalation_latency_ms")]
236    pub escalation_latency_budget_ms: u64,
237}
238
239fn default_confidence_floor() -> f64 {
240    0.6
241}
242fn default_escalation_latency_ms() -> u64 {
243    3000
244}
245
246impl Default for TieredInferenceConfig {
247    fn default() -> Self {
248        Self {
249            enabled: false,
250            confidence_floor: default_confidence_floor(),
251            escalation_latency_budget_ms: default_escalation_latency_ms(),
252        }
253    }
254}
255
256#[derive(Debug, Clone, Serialize, Deserialize)]
257pub struct RoutingConfig {
258    #[serde(default = "default_routing_mode")]
259    pub mode: String,
260    #[serde(default = "default_confidence_threshold")]
261    pub confidence_threshold: f64,
262    #[serde(default = "default_true")]
263    pub local_first: bool,
264    #[serde(default)]
265    pub cost_aware: bool,
266    #[serde(default = "default_estimated_output_tokens")]
267    pub estimated_output_tokens: u32,
268    /// Minimum observed quality score (0.0–1.0) for a model to be considered
269    /// during metascore routing.  Models with fewer than `accuracy_min_obs`
270    /// observations are exempt (insufficient data). Set to 0.0 to disable.
271    #[serde(default)]
272    pub accuracy_floor: f64,
273    /// Minimum observations before the accuracy floor applies to a model.
274    #[serde(default = "default_accuracy_min_obs")]
275    pub accuracy_min_obs: usize,
276    /// Custom cost weight for metascore \[0.0–1.0\]. When set, replaces the
277    /// binary `cost_aware` toggle with a continuous dial: 0.0 = ignore cost,
278    /// 1.0 = maximize savings. Efficacy weight adjusts inversely.
279    /// When `None`, falls back to `cost_aware` boolean behavior.
280    #[serde(default)]
281    pub cost_weight: Option<f64>,
282    /// Canary model to route a fraction of traffic through for A/B validation.
283    /// When set, `canary_fraction` of requests are routed to this model instead
284    /// of the metascore winner. Set to `None` to disable canary routing.
285    #[serde(default)]
286    pub canary_model: Option<String>,
287    /// Fraction of requests routed to the canary model [0.0–1.0].
288    /// Only effective when `canary_model` is set. Default: 0.0 (disabled).
289    #[serde(default)]
290    pub canary_fraction: f64,
291    /// Static model blocklist — models listed here are unconditionally excluded
292    /// from all routing paths (override, metascore, fallback). Useful as an
293    /// instant kill-switch without restarting the server.
294    #[serde(default)]
295    pub blocked_models: Vec<String>,
296    /// Per-provider timeout in seconds for interactive inference. If a single
297    /// model doesn't respond within this window, the fallback chain advances.
298    /// Increase for slow local models (e.g. large quantized models on CPU/GPU).
299    #[serde(default = "default_per_provider_timeout")]
300    pub per_provider_timeout_seconds: u64,
301    /// Total wall-clock budget in seconds for the entire inference fallback
302    /// chain (all attempts combined). Increase if you have many fallback
303    /// candidates or slow providers.
304    #[serde(default = "default_max_total_inference")]
305    pub max_total_inference_seconds: u64,
306    /// Maximum number of fallback attempts before giving up.
307    #[serde(default = "default_max_fallback_attempts")]
308    pub max_fallback_attempts: usize,
309}
310
311impl Default for RoutingConfig {
312    fn default() -> Self {
313        Self {
314            mode: default_routing_mode(),
315            confidence_threshold: default_confidence_threshold(),
316            local_first: true,
317            cost_aware: false,
318            estimated_output_tokens: default_estimated_output_tokens(),
319            accuracy_floor: 0.0,
320            accuracy_min_obs: default_accuracy_min_obs(),
321            cost_weight: None,
322            canary_model: None,
323            canary_fraction: 0.0,
324            blocked_models: Vec::new(),
325            per_provider_timeout_seconds: default_per_provider_timeout(),
326            max_total_inference_seconds: default_max_total_inference(),
327            max_fallback_attempts: default_max_fallback_attempts(),
328        }
329    }
330}
331
332fn default_accuracy_min_obs() -> usize {
333    10
334}
335
336fn default_per_provider_timeout() -> u64 {
337    30
338}
339
340fn default_max_total_inference() -> u64 {
341    120
342}
343
344fn default_max_fallback_attempts() -> usize {
345    6
346}
347
348fn default_estimated_output_tokens() -> u32 {
349    500
350}
351
352fn default_routing_mode() -> String {
353    "metascore".into()
354}
355
356fn default_confidence_threshold() -> f64 {
357    0.9
358}
359
360fn default_true() -> bool {
361    true
362}
363
364#[derive(Debug, Clone, Serialize, Deserialize)]
365pub struct ProviderConfig {
366    pub url: String,
367    pub tier: String,
368    #[serde(default)]
369    pub format: Option<String>,
370    #[serde(default)]
371    pub api_key_env: Option<String>,
372    #[serde(default)]
373    pub chat_path: Option<String>,
374    #[serde(default)]
375    pub embedding_path: Option<String>,
376    #[serde(default)]
377    pub embedding_model: Option<String>,
378    #[serde(default)]
379    pub embedding_dimensions: Option<usize>,
380    #[serde(default)]
381    pub is_local: Option<bool>,
382    #[serde(default)]
383    pub cost_per_input_token: Option<f64>,
384    #[serde(default)]
385    pub cost_per_output_token: Option<f64>,
386    #[serde(default)]
387    pub auth_header: Option<String>,
388    #[serde(default)]
389    pub extra_headers: Option<HashMap<String, String>>,
390    #[serde(default)]
391    pub tpm_limit: Option<u64>,
392    #[serde(default)]
393    pub rpm_limit: Option<u64>,
394    #[serde(default)]
395    pub auth_mode: Option<String>,
396    #[serde(default)]
397    pub oauth_client_id: Option<String>,
398    #[serde(default)]
399    pub oauth_redirect_uri: Option<String>,
400    #[serde(default)]
401    pub api_key_ref: Option<String>,
402}
403
404impl ProviderConfig {
405    pub fn new(url: impl Into<String>, tier: impl Into<String>) -> Self {
406        Self {
407            url: url.into(),
408            tier: tier.into(),
409            format: None,
410            api_key_env: None,
411            chat_path: None,
412            embedding_path: None,
413            embedding_model: None,
414            embedding_dimensions: None,
415            is_local: None,
416            cost_per_input_token: None,
417            cost_per_output_token: None,
418            auth_header: None,
419            extra_headers: None,
420            tpm_limit: None,
421            rpm_limit: None,
422            auth_mode: None,
423            oauth_client_id: None,
424            oauth_redirect_uri: None,
425            api_key_ref: None,
426        }
427    }
428}
429
430#[derive(Debug, Clone, Serialize, Deserialize)]
431pub struct ModelOverride {
432    #[serde(default)]
433    pub tier: Option<String>,
434    #[serde(default)]
435    pub cost_per_input_token: Option<f64>,
436    #[serde(default)]
437    pub cost_per_output_token: Option<f64>,
438}
439
440#[derive(Debug, Clone, Serialize, Deserialize)]
441pub struct TierAdaptConfig {
442    #[serde(default)]
443    pub t1_strip_system: bool,
444    #[serde(default)]
445    pub t1_condense_turns: bool,
446    #[serde(default = "default_t2_preamble")]
447    pub t2_default_preamble: Option<String>,
448    #[serde(default = "default_true")]
449    pub t3_t4_passthrough: bool,
450}
451
452impl Default for TierAdaptConfig {
453    fn default() -> Self {
454        Self {
455            t1_strip_system: false,
456            t1_condense_turns: false,
457            t2_default_preamble: default_t2_preamble(),
458            t3_t4_passthrough: true,
459        }
460    }
461}
462
463fn default_t2_preamble() -> Option<String> {
464    Some("Be concise and direct. Focus on accuracy.".into())
465}
466
467#[derive(Debug, Clone, Serialize, Deserialize)]
468pub struct CircuitBreakerConfig {
469    #[serde(default = "default_cb_threshold")]
470    pub threshold: u32,
471    #[serde(default = "default_cb_window")]
472    pub window_seconds: u64,
473    #[serde(default = "default_cb_cooldown")]
474    pub cooldown_seconds: u64,
475    #[serde(default = "default_cb_max_cooldown")]
476    pub max_cooldown_seconds: u64,
477}
478
479impl Default for CircuitBreakerConfig {
480    fn default() -> Self {
481        Self {
482            threshold: default_cb_threshold(),
483            window_seconds: default_cb_window(),
484            cooldown_seconds: default_cb_cooldown(),
485            max_cooldown_seconds: default_cb_max_cooldown(),
486        }
487    }
488}
489
490fn default_cb_threshold() -> u32 {
491    3
492}
493fn default_cb_window() -> u64 {
494    60
495}
496fn default_cb_cooldown() -> u64 {
497    60
498}
499fn default_cb_max_cooldown() -> u64 {
500    900
501}
502
503#[derive(Debug, Clone, Serialize, Deserialize)]
504pub struct MemoryConfig {
505    #[serde(default = "default_working_pct")]
506    pub working_budget_pct: f64,
507    #[serde(default = "default_episodic_pct")]
508    pub episodic_budget_pct: f64,
509    #[serde(default = "default_semantic_pct")]
510    pub semantic_budget_pct: f64,
511    #[serde(default = "default_procedural_pct")]
512    pub procedural_budget_pct: f64,
513    #[serde(default = "default_relationship_pct")]
514    pub relationship_budget_pct: f64,
515    #[serde(default)]
516    pub embedding_provider: Option<String>,
517    #[serde(default)]
518    pub embedding_model: Option<String>,
519    #[serde(default = "default_hybrid_weight")]
520    pub hybrid_weight: f64,
521    #[serde(default)]
522    pub ann_index: bool,
523    /// Minimum similarity score for retrieval results. Default 0.0 (no floor).
524    #[serde(default)]
525    pub similarity_threshold: f64,
526    /// Half-life in days for episodic memory decay. Default: 7.0.
527    #[serde(default = "default_decay_half_life")]
528    pub decay_half_life_days: f64,
529    /// Embeddings above which ANN index replaces brute-force scan. Default: 1000.
530    #[serde(default = "default_ann_threshold")]
531    pub ann_activation_threshold: usize,
532}
533
534impl Default for MemoryConfig {
535    fn default() -> Self {
536        Self {
537            working_budget_pct: default_working_pct(),
538            episodic_budget_pct: default_episodic_pct(),
539            semantic_budget_pct: default_semantic_pct(),
540            procedural_budget_pct: default_procedural_pct(),
541            relationship_budget_pct: default_relationship_pct(),
542            embedding_provider: None,
543            embedding_model: None,
544            hybrid_weight: default_hybrid_weight(),
545            ann_index: false,
546            similarity_threshold: 0.0,
547            decay_half_life_days: default_decay_half_life(),
548            ann_activation_threshold: default_ann_threshold(),
549        }
550    }
551}
552
553fn default_decay_half_life() -> f64 {
554    7.0
555}
556
557fn default_ann_threshold() -> usize {
558    1000
559}
560
561fn default_hybrid_weight() -> f64 {
562    0.5
563}
564
565fn default_working_pct() -> f64 {
566    30.0
567}
568fn default_episodic_pct() -> f64 {
569    25.0
570}
571fn default_semantic_pct() -> f64 {
572    20.0
573}
574fn default_procedural_pct() -> f64 {
575    15.0
576}
577fn default_relationship_pct() -> f64 {
578    10.0
579}
580
581#[derive(Debug, Clone, Serialize, Deserialize)]
582pub struct CacheConfig {
583    #[serde(default = "default_true")]
584    pub enabled: bool,
585    #[serde(default = "default_cache_ttl")]
586    pub exact_match_ttl_seconds: u64,
587    #[serde(default = "default_semantic_threshold")]
588    pub semantic_threshold: f64,
589    #[serde(default = "default_max_entries")]
590    pub max_entries: usize,
591    #[serde(default)]
592    pub prompt_compression: bool,
593    #[serde(default = "default_compression_ratio")]
594    pub compression_target_ratio: f64,
595}
596
597impl Default for CacheConfig {
598    fn default() -> Self {
599        Self {
600            enabled: true,
601            exact_match_ttl_seconds: default_cache_ttl(),
602            semantic_threshold: default_semantic_threshold(),
603            max_entries: default_max_entries(),
604            prompt_compression: false,
605            compression_target_ratio: default_compression_ratio(),
606        }
607    }
608}
609
610fn default_compression_ratio() -> f64 {
611    0.5
612}
613
614fn default_cache_ttl() -> u64 {
615    3600
616}
617fn default_semantic_threshold() -> f64 {
618    0.95
619}
620fn default_max_entries() -> usize {
621    10000
622}
623
624#[cfg(test)]
625mod memory_config_tests {
626    use super::MemoryConfig;
627
628    #[test]
629    fn memory_config_tuning_defaults() {
630        let cfg = MemoryConfig::default();
631        assert_eq!(cfg.similarity_threshold, 0.0);
632        assert_eq!(cfg.decay_half_life_days, 7.0);
633        assert_eq!(cfg.ann_activation_threshold, 1000);
634    }
635
636    #[test]
637    fn memory_config_tuning_deserializes() {
638        let toml = r#"
639            similarity_threshold = 0.3
640            decay_half_life_days = 14.0
641            ann_activation_threshold = 500
642        "#;
643        let cfg: MemoryConfig = toml::from_str(toml).unwrap();
644        assert!((cfg.similarity_threshold - 0.3).abs() < f64::EPSILON);
645        assert!((cfg.decay_half_life_days - 14.0).abs() < f64::EPSILON);
646        assert_eq!(cfg.ann_activation_threshold, 500);
647    }
648}
649
roboticus_core/config/model_core.rs

roboticus_core/config/
model_core.rs