Skip to main content

roboticus_core/config/
model_core.rs

1#[derive(Debug, Clone, Serialize, Deserialize)]
2pub struct ServerConfig {
3    #[serde(default = "default_port")]
4    pub port: u16,
5    #[serde(default = "default_bind")]
6    pub bind: String,
7    #[serde(default, skip_serializing)]
8    pub api_key: Option<String>,
9    #[serde(default = "default_log_dir")]
10    pub log_dir: PathBuf,
11    #[serde(default = "default_log_max_days")]
12    pub log_max_days: u32,
13    #[serde(default = "default_rate_limit_requests")]
14    pub rate_limit_requests: u32,
15    #[serde(default = "default_rate_limit_window_secs")]
16    pub rate_limit_window_secs: u64,
17    #[serde(default = "default_per_ip_rate_limit_requests")]
18    pub per_ip_rate_limit_requests: u32,
19    #[serde(default = "default_per_actor_rate_limit_requests")]
20    pub per_actor_rate_limit_requests: u32,
21    #[serde(default)]
22    pub trusted_proxy_cidrs: Vec<String>,
23    #[serde(default = "default_cron_max_concurrency")]
24    pub cron_max_concurrency: u8,
25}
26
27impl Default for ServerConfig {
28    fn default() -> Self {
29        Self {
30            port: default_port(),
31            bind: default_bind(),
32            api_key: None,
33            log_dir: default_log_dir(),
34            log_max_days: default_log_max_days(),
35            rate_limit_requests: default_rate_limit_requests(),
36            rate_limit_window_secs: default_rate_limit_window_secs(),
37            per_ip_rate_limit_requests: default_per_ip_rate_limit_requests(),
38            per_actor_rate_limit_requests: default_per_actor_rate_limit_requests(),
39            trusted_proxy_cidrs: Vec::new(),
40            cron_max_concurrency: default_cron_max_concurrency(),
41        }
42    }
43}
44
45fn default_rate_limit_requests() -> u32 {
46    100
47}
48
49fn default_rate_limit_window_secs() -> u64 {
50    60
51}
52
53fn default_per_ip_rate_limit_requests() -> u32 {
54    300
55}
56
57fn default_per_actor_rate_limit_requests() -> u32 {
58    200
59}
60
61fn default_cron_max_concurrency() -> u8 {
62    8
63}
64
65fn default_port() -> u16 {
66    18789
67}
68
69fn default_bind() -> String {
70    "localhost".into()
71}
72
73// ── Context budget configuration ────────────────────────────────────────────
74
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct ContextBudgetConfig {
77    /// Token budget for L0 (trivial) complexity.
78    #[serde(default = "default_budget_l0")]
79    pub l0: usize,
80    /// Token budget for L1 (low) complexity.
81    #[serde(default = "default_budget_l1")]
82    pub l1: usize,
83    /// Token budget for L2 (moderate) complexity.
84    #[serde(default = "default_budget_l2")]
85    pub l2: usize,
86    /// Token budget for L3 (high) complexity.
87    #[serde(default = "default_budget_l3")]
88    pub l3: usize,
89    /// Minimum complexity level for channel (non-web) messages.
90    #[serde(default = "default_channel_minimum")]
91    pub channel_minimum: String,
92    /// Maximum percentage (0.0–1.0) of context budget the personality soul
93    /// can consume. Values outside [0.05, 1.0] are clamped on validation.
94    #[serde(default = "default_soul_max_context_pct")]
95    pub soul_max_context_pct: f64,
96}
97
98impl ContextBudgetConfig {
99    /// Minimum allowed token budget for any tier.
100    pub const MIN_BUDGET: usize = 512;
101    /// Maximum allowed token budget for any tier.
102    pub const MAX_BUDGET: usize = 2_000_000;
103
104    /// Clamp all tier budgets and `soul_max_context_pct` to safe ranges and
105    /// enforce that tiers are non-decreasing (l0 ≤ l1 ≤ l2 ≤ l3).
106    pub fn validate_and_clamp(&mut self) {
107        self.l0 = self.l0.clamp(Self::MIN_BUDGET, Self::MAX_BUDGET);
108        self.l1 = self.l1.clamp(Self::MIN_BUDGET, Self::MAX_BUDGET);
109        self.l2 = self.l2.clamp(Self::MIN_BUDGET, Self::MAX_BUDGET);
110        self.l3 = self.l3.clamp(Self::MIN_BUDGET, Self::MAX_BUDGET);
111        self.soul_max_context_pct = self.soul_max_context_pct.clamp(0.05, 1.0);
112        // Enforce tier ordering
113        self.l1 = self.l1.max(self.l0);
114        self.l2 = self.l2.max(self.l1);
115        self.l3 = self.l3.max(self.l2);
116    }
117
118    /// Parse `channel_minimum` string to a complexity level index (0–3).
119    /// Returns the level as a `u8` so callers can compare with
120    /// `ComplexityLevel` variants without pulling in the agent crate.
121    /// Unrecognized values default to 1 (L1).
122    pub fn channel_minimum_level(&self) -> u8 {
123        match self.channel_minimum.to_ascii_uppercase().as_str() {
124            "L0" => 0,
125            "L1" => 1,
126            "L2" => 2,
127            "L3" => 3,
128            _ => 1, // default L1
129        }
130    }
131
132    /// Return the maximum tokens the soul personality may consume given
133    /// `total_budget`. Returns 0 if `total_budget` is 0 to avoid
134    /// division-by-zero.
135    pub fn soul_token_cap(&self, total_budget: usize) -> usize {
136        if total_budget == 0 {
137            return 0;
138        }
139        (total_budget as f64 * self.soul_max_context_pct) as usize
140    }
141}
142
143impl Default for ContextBudgetConfig {
144    fn default() -> Self {
145        Self {
146            l0: default_budget_l0(),
147            l1: default_budget_l1(),
148            l2: default_budget_l2(),
149            l3: default_budget_l3(),
150            channel_minimum: default_channel_minimum(),
151            soul_max_context_pct: default_soul_max_context_pct(),
152        }
153    }
154}
155
156fn default_budget_l0() -> usize { 8_000 }
157fn default_budget_l1() -> usize { 8_000 }
158fn default_budget_l2() -> usize { 16_000 }
159fn default_budget_l3() -> usize { 32_000 }
160fn default_channel_minimum() -> String { "L1".into() }
161fn default_soul_max_context_pct() -> f64 { 0.4 }
162
163#[cfg(test)]
164mod context_budget_tests {
165    use super::ContextBudgetConfig;
166
167    #[test]
168    fn context_budget_validates_and_clamps() {
169        let mut cfg = ContextBudgetConfig {
170            l0: 0,
171            l1: 0,
172            l2: 0,
173            l3: 999_999_999,
174            channel_minimum: "L1".into(),
175            soul_max_context_pct: 2.5,
176        };
177        cfg.validate_and_clamp();
178
179        assert!(cfg.l0 >= ContextBudgetConfig::MIN_BUDGET, "l0 must be >= MIN_BUDGET");
180        assert!(cfg.l3 <= ContextBudgetConfig::MAX_BUDGET, "l3 must be <= MAX_BUDGET");
181        assert!(cfg.soul_max_context_pct <= 1.0, "soul_max_context_pct must be <= 1.0");
182        assert!(cfg.soul_max_context_pct >= 0.05, "soul_max_context_pct must be >= 0.05");
183        // Tier ordering must be preserved
184        assert!(cfg.l0 <= cfg.l1);
185        assert!(cfg.l1 <= cfg.l2);
186        assert!(cfg.l2 <= cfg.l3);
187    }
188
189    #[test]
190    fn context_budget_validate_preserves_valid_values() {
191        let mut cfg = ContextBudgetConfig::default();
192        let before = cfg.clone();
193        cfg.validate_and_clamp();
194
195        assert_eq!(cfg.l0, before.l0);
196        assert_eq!(cfg.l1, before.l1);
197        assert_eq!(cfg.l2, before.l2);
198        assert_eq!(cfg.l3, before.l3);
199        assert_eq!(cfg.soul_max_context_pct, before.soul_max_context_pct);
200    }
201
202    #[test]
203    fn soul_token_cap_safe_with_zero_budget() {
204        let cfg = ContextBudgetConfig::default();
205        let cap = cfg.soul_token_cap(0);
206        assert_eq!(cap, 0, "soul_token_cap must return 0 for zero total_budget");
207    }
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct DatabaseConfig {
212    #[serde(default = "default_db_path")]
213    pub path: PathBuf,
214}
215
216impl Default for DatabaseConfig {
217    fn default() -> Self {
218        Self {
219            path: default_db_path(),
220        }
221    }
222}
223
224fn default_db_path() -> PathBuf {
225    dirs_next().join("state.db")
226}
227
228#[derive(Debug, Clone, Serialize, Deserialize)]
229pub struct ModelsConfig {
230    pub primary: String,
231    #[serde(default)]
232    pub fallbacks: Vec<String>,
233    #[serde(default)]
234    pub routing: RoutingConfig,
235    #[serde(default)]
236    pub model_overrides: HashMap<String, ModelOverride>,
237    #[serde(default)]
238    pub stream_by_default: bool,
239    #[serde(default)]
240    pub tiered_inference: TieredInferenceConfig,
241}
242
243#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct TieredInferenceConfig {
245    #[serde(default)]
246    pub enabled: bool,
247    #[serde(default = "default_confidence_floor")]
248    pub confidence_floor: f64,
249    #[serde(default = "default_escalation_latency_ms")]
250    pub escalation_latency_budget_ms: u64,
251}
252
253fn default_confidence_floor() -> f64 {
254    0.6
255}
256fn default_escalation_latency_ms() -> u64 {
257    3000
258}
259
260impl Default for TieredInferenceConfig {
261    fn default() -> Self {
262        Self {
263            enabled: false,
264            confidence_floor: default_confidence_floor(),
265            escalation_latency_budget_ms: default_escalation_latency_ms(),
266        }
267    }
268}
269
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct RoutingConfig {
272    #[serde(default = "default_routing_mode")]
273    pub mode: String,
274    #[serde(default = "default_confidence_threshold")]
275    pub confidence_threshold: f64,
276    #[serde(default = "default_true")]
277    pub local_first: bool,
278    #[serde(default)]
279    pub cost_aware: bool,
280    #[serde(default = "default_estimated_output_tokens")]
281    pub estimated_output_tokens: u32,
282    /// Minimum observed quality score (0.0–1.0) for a model to be considered
283    /// during metascore routing.  Models with fewer than `accuracy_min_obs`
284    /// observations are exempt (insufficient data). Set to 0.0 to disable.
285    #[serde(default)]
286    pub accuracy_floor: f64,
287    /// Minimum observations before the accuracy floor applies to a model.
288    #[serde(default = "default_accuracy_min_obs")]
289    pub accuracy_min_obs: usize,
290    /// Custom cost weight for metascore \[0.0–1.0\]. When set, replaces the
291    /// binary `cost_aware` toggle with a continuous dial: 0.0 = ignore cost,
292    /// 1.0 = maximize savings. Efficacy weight adjusts inversely.
293    /// When `None`, falls back to `cost_aware` boolean behavior.
294    #[serde(default)]
295    pub cost_weight: Option<f64>,
296    /// Canary model to route a fraction of traffic through for A/B validation.
297    /// When set, `canary_fraction` of requests are routed to this model instead
298    /// of the metascore winner. Set to `None` to disable canary routing.
299    #[serde(default)]
300    pub canary_model: Option<String>,
301    /// Fraction of requests routed to the canary model [0.0–1.0].
302    /// Only effective when `canary_model` is set. Default: 0.0 (disabled).
303    #[serde(default)]
304    pub canary_fraction: f64,
305    /// Static model blocklist — models listed here are unconditionally excluded
306    /// from all routing paths (override, metascore, fallback). Useful as an
307    /// instant kill-switch without restarting the server.
308    #[serde(default)]
309    pub blocked_models: Vec<String>,
310    /// Per-provider timeout in seconds for interactive inference. If a single
311    /// model doesn't respond within this window, the fallback chain advances.
312    /// Increase for slow local models (e.g. large quantized models on CPU/GPU).
313    #[serde(default = "default_per_provider_timeout")]
314    pub per_provider_timeout_seconds: u64,
315    /// Total wall-clock budget in seconds for the entire inference fallback
316    /// chain (all attempts combined). Increase if you have many fallback
317    /// candidates or slow providers.
318    #[serde(default = "default_max_total_inference")]
319    pub max_total_inference_seconds: u64,
320    /// Maximum number of fallback attempts before giving up.
321    #[serde(default = "default_max_fallback_attempts")]
322    pub max_fallback_attempts: usize,
323}
324
325impl Default for RoutingConfig {
326    fn default() -> Self {
327        Self {
328            mode: default_routing_mode(),
329            confidence_threshold: default_confidence_threshold(),
330            local_first: true,
331            cost_aware: false,
332            estimated_output_tokens: default_estimated_output_tokens(),
333            accuracy_floor: 0.0,
334            accuracy_min_obs: default_accuracy_min_obs(),
335            cost_weight: None,
336            canary_model: None,
337            canary_fraction: 0.0,
338            blocked_models: Vec::new(),
339            per_provider_timeout_seconds: default_per_provider_timeout(),
340            max_total_inference_seconds: default_max_total_inference(),
341            max_fallback_attempts: default_max_fallback_attempts(),
342        }
343    }
344}
345
346fn default_accuracy_min_obs() -> usize {
347    10
348}
349
350fn default_per_provider_timeout() -> u64 {
351    30
352}
353
354fn default_max_total_inference() -> u64 {
355    120
356}
357
358fn default_max_fallback_attempts() -> usize {
359    6
360}
361
362fn default_estimated_output_tokens() -> u32 {
363    500
364}
365
366fn default_routing_mode() -> String {
367    "auto".into()
368}
369
370fn default_confidence_threshold() -> f64 {
371    0.9
372}
373
374fn default_true() -> bool {
375    true
376}
377
378#[derive(Debug, Clone, Serialize, Deserialize)]
379pub struct ProviderConfig {
380    pub url: String,
381    pub tier: String,
382    #[serde(default)]
383    pub format: Option<String>,
384    #[serde(default)]
385    pub api_key_env: Option<String>,
386    #[serde(default)]
387    pub chat_path: Option<String>,
388    #[serde(default)]
389    pub embedding_path: Option<String>,
390    #[serde(default)]
391    pub embedding_model: Option<String>,
392    #[serde(default)]
393    pub embedding_dimensions: Option<usize>,
394    #[serde(default)]
395    pub is_local: Option<bool>,
396    #[serde(default)]
397    pub cost_per_input_token: Option<f64>,
398    #[serde(default)]
399    pub cost_per_output_token: Option<f64>,
400    #[serde(default)]
401    pub auth_header: Option<String>,
402    #[serde(default)]
403    pub extra_headers: Option<HashMap<String, String>>,
404    #[serde(default)]
405    pub tpm_limit: Option<u64>,
406    #[serde(default)]
407    pub rpm_limit: Option<u64>,
408    #[serde(default)]
409    pub auth_mode: Option<String>,
410    #[serde(default)]
411    pub oauth_client_id: Option<String>,
412    #[serde(default)]
413    pub oauth_redirect_uri: Option<String>,
414    #[serde(default)]
415    pub api_key_ref: Option<String>,
416}
417
418impl ProviderConfig {
419    pub fn new(url: impl Into<String>, tier: impl Into<String>) -> Self {
420        Self {
421            url: url.into(),
422            tier: tier.into(),
423            format: None,
424            api_key_env: None,
425            chat_path: None,
426            embedding_path: None,
427            embedding_model: None,
428            embedding_dimensions: None,
429            is_local: None,
430            cost_per_input_token: None,
431            cost_per_output_token: None,
432            auth_header: None,
433            extra_headers: None,
434            tpm_limit: None,
435            rpm_limit: None,
436            auth_mode: None,
437            oauth_client_id: None,
438            oauth_redirect_uri: None,
439            api_key_ref: None,
440        }
441    }
442}
443
444#[derive(Debug, Clone, Serialize, Deserialize)]
445pub struct ModelOverride {
446    #[serde(default)]
447    pub tier: Option<String>,
448    #[serde(default)]
449    pub cost_per_input_token: Option<f64>,
450    #[serde(default)]
451    pub cost_per_output_token: Option<f64>,
452    /// Per-model timeout for a single inference attempt (seconds).
453    /// Overrides `routing.per_provider_timeout_seconds` for this model.
454    #[serde(default)]
455    pub per_provider_timeout_seconds: Option<u64>,
456    /// Per-model wall-clock budget for the entire fallback chain (seconds).
457    /// Overrides `routing.max_total_inference_seconds` for this model.
458    #[serde(default)]
459    pub max_total_inference_seconds: Option<u64>,
460}
461
462#[derive(Debug, Clone, Serialize, Deserialize)]
463pub struct TierAdaptConfig {
464    #[serde(default)]
465    pub t1_strip_system: bool,
466    #[serde(default)]
467    pub t1_condense_turns: bool,
468    #[serde(default = "default_t2_preamble")]
469    pub t2_default_preamble: Option<String>,
470    #[serde(default = "default_true")]
471    pub t3_t4_passthrough: bool,
472}
473
474impl Default for TierAdaptConfig {
475    fn default() -> Self {
476        Self {
477            t1_strip_system: false,
478            t1_condense_turns: false,
479            t2_default_preamble: default_t2_preamble(),
480            t3_t4_passthrough: true,
481        }
482    }
483}
484
485fn default_t2_preamble() -> Option<String> {
486    Some("Be concise and direct. Focus on accuracy.".into())
487}
488
489#[derive(Debug, Clone, Serialize, Deserialize)]
490pub struct CircuitBreakerConfig {
491    #[serde(default = "default_cb_threshold")]
492    pub threshold: u32,
493    #[serde(default = "default_cb_window")]
494    pub window_seconds: u64,
495    #[serde(default = "default_cb_cooldown")]
496    pub cooldown_seconds: u64,
497    #[serde(default = "default_cb_max_cooldown")]
498    pub max_cooldown_seconds: u64,
499}
500
501impl Default for CircuitBreakerConfig {
502    fn default() -> Self {
503        Self {
504            threshold: default_cb_threshold(),
505            window_seconds: default_cb_window(),
506            cooldown_seconds: default_cb_cooldown(),
507            max_cooldown_seconds: default_cb_max_cooldown(),
508        }
509    }
510}
511
512fn default_cb_threshold() -> u32 {
513    3
514}
515fn default_cb_window() -> u64 {
516    60
517}
518fn default_cb_cooldown() -> u64 {
519    60
520}
521fn default_cb_max_cooldown() -> u64 {
522    900
523}
524
525#[derive(Debug, Clone, Serialize, Deserialize)]
526pub struct MemoryConfig {
527    #[serde(default = "default_working_pct")]
528    pub working_budget_pct: f64,
529    #[serde(default = "default_episodic_pct")]
530    pub episodic_budget_pct: f64,
531    #[serde(default = "default_semantic_pct")]
532    pub semantic_budget_pct: f64,
533    #[serde(default = "default_procedural_pct")]
534    pub procedural_budget_pct: f64,
535    #[serde(default = "default_relationship_pct")]
536    pub relationship_budget_pct: f64,
537    #[serde(default)]
538    pub embedding_provider: Option<String>,
539    #[serde(default)]
540    pub embedding_model: Option<String>,
541    #[serde(default = "default_hybrid_weight")]
542    pub hybrid_weight: f64,
543    #[serde(default)]
544    pub ann_index: bool,
545    /// Minimum similarity score for retrieval results. Default 0.0 (no floor).
546    #[serde(default)]
547    pub similarity_threshold: f64,
548    /// Half-life in days for episodic memory decay. Default: 7.0.
549    #[serde(default = "default_decay_half_life")]
550    pub decay_half_life_days: f64,
551    /// Embeddings above which ANN index replaces brute-force scan. Default: 1000.
552    #[serde(default = "default_ann_threshold")]
553    pub ann_activation_threshold: usize,
554}
555
556impl Default for MemoryConfig {
557    fn default() -> Self {
558        Self {
559            working_budget_pct: default_working_pct(),
560            episodic_budget_pct: default_episodic_pct(),
561            semantic_budget_pct: default_semantic_pct(),
562            procedural_budget_pct: default_procedural_pct(),
563            relationship_budget_pct: default_relationship_pct(),
564            embedding_provider: None,
565            embedding_model: None,
566            hybrid_weight: default_hybrid_weight(),
567            ann_index: false,
568            similarity_threshold: 0.0,
569            decay_half_life_days: default_decay_half_life(),
570            ann_activation_threshold: default_ann_threshold(),
571        }
572    }
573}
574
575fn default_decay_half_life() -> f64 {
576    7.0
577}
578
579fn default_ann_threshold() -> usize {
580    1000
581}
582
583fn default_hybrid_weight() -> f64 {
584    0.5
585}
586
587fn default_working_pct() -> f64 {
588    30.0
589}
590fn default_episodic_pct() -> f64 {
591    25.0
592}
593fn default_semantic_pct() -> f64 {
594    20.0
595}
596fn default_procedural_pct() -> f64 {
597    15.0
598}
599fn default_relationship_pct() -> f64 {
600    10.0
601}
602
603#[derive(Debug, Clone, Serialize, Deserialize)]
604pub struct CacheConfig {
605    #[serde(default = "default_true")]
606    pub enabled: bool,
607    #[serde(default = "default_cache_ttl")]
608    pub exact_match_ttl_seconds: u64,
609    #[serde(default = "default_semantic_threshold")]
610    pub semantic_threshold: f64,
611    #[serde(default = "default_max_entries")]
612    pub max_entries: usize,
613    #[serde(default)]
614    pub prompt_compression: bool,
615    #[serde(default = "default_compression_ratio")]
616    pub compression_target_ratio: f64,
617}
618
619impl Default for CacheConfig {
620    fn default() -> Self {
621        Self {
622            enabled: true,
623            exact_match_ttl_seconds: default_cache_ttl(),
624            semantic_threshold: default_semantic_threshold(),
625            max_entries: default_max_entries(),
626            prompt_compression: false,
627            compression_target_ratio: default_compression_ratio(),
628        }
629    }
630}
631
632fn default_compression_ratio() -> f64 {
633    0.5
634}
635
636fn default_cache_ttl() -> u64 {
637    3600
638}
639fn default_semantic_threshold() -> f64 {
640    0.95
641}
642fn default_max_entries() -> usize {
643    10000
644}
645
646#[cfg(test)]
647mod memory_config_tests {
648    use super::MemoryConfig;
649
650    #[test]
651    fn memory_config_tuning_defaults() {
652        let cfg = MemoryConfig::default();
653        assert_eq!(cfg.similarity_threshold, 0.0);
654        assert_eq!(cfg.decay_half_life_days, 7.0);
655        assert_eq!(cfg.ann_activation_threshold, 1000);
656    }
657
658    #[test]
659    fn memory_config_tuning_deserializes() {
660        let toml = r#"
661            similarity_threshold = 0.3
662            decay_half_life_days = 14.0
663            ann_activation_threshold = 500
664        "#;
665        let cfg: MemoryConfig = toml::from_str(toml).unwrap();
666        assert!((cfg.similarity_threshold - 0.3).abs() < f64::EPSILON);
667        assert!((cfg.decay_half_life_days - 14.0).abs() < f64::EPSILON);
668        assert_eq!(cfg.ann_activation_threshold, 500);
669    }
670}
671