Skip to main content

roboticus_core/config/
model_core.rs

1#[derive(Debug, Clone, Serialize, Deserialize)]
2pub struct ServerConfig {
3    #[serde(default = "default_port")]
4    pub port: u16,
5    #[serde(default = "default_bind")]
6    pub bind: String,
7    #[serde(default, skip_serializing)]
8    pub api_key: Option<String>,
9    #[serde(default = "default_log_dir")]
10    pub log_dir: PathBuf,
11    #[serde(default = "default_log_max_days")]
12    pub log_max_days: u32,
13    #[serde(default = "default_rate_limit_requests")]
14    pub rate_limit_requests: u32,
15    #[serde(default = "default_rate_limit_window_secs")]
16    pub rate_limit_window_secs: u64,
17    #[serde(default = "default_per_ip_rate_limit_requests")]
18    pub per_ip_rate_limit_requests: u32,
19    #[serde(default = "default_per_actor_rate_limit_requests")]
20    pub per_actor_rate_limit_requests: u32,
21    #[serde(default)]
22    pub trusted_proxy_cidrs: Vec<String>,
23    #[serde(default = "default_cron_max_concurrency")]
24    pub cron_max_concurrency: u8,
25}
26
27impl Default for ServerConfig {
28    fn default() -> Self {
29        Self {
30            port: default_port(),
31            bind: default_bind(),
32            api_key: None,
33            log_dir: default_log_dir(),
34            log_max_days: default_log_max_days(),
35            rate_limit_requests: default_rate_limit_requests(),
36            rate_limit_window_secs: default_rate_limit_window_secs(),
37            per_ip_rate_limit_requests: default_per_ip_rate_limit_requests(),
38            per_actor_rate_limit_requests: default_per_actor_rate_limit_requests(),
39            trusted_proxy_cidrs: Vec::new(),
40            cron_max_concurrency: default_cron_max_concurrency(),
41        }
42    }
43}
44
45fn default_rate_limit_requests() -> u32 {
46    100
47}
48
49fn default_rate_limit_window_secs() -> u64 {
50    60
51}
52
53fn default_per_ip_rate_limit_requests() -> u32 {
54    300
55}
56
57fn default_per_actor_rate_limit_requests() -> u32 {
58    200
59}
60
61fn default_cron_max_concurrency() -> u8 {
62    8
63}
64
65fn default_port() -> u16 {
66    18789
67}
68
69fn default_bind() -> String {
70    "127.0.0.1".into()
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct DatabaseConfig {
75    #[serde(default = "default_db_path")]
76    pub path: PathBuf,
77}
78
79impl Default for DatabaseConfig {
80    fn default() -> Self {
81        Self {
82            path: default_db_path(),
83        }
84    }
85}
86
87fn default_db_path() -> PathBuf {
88    dirs_next().join("state.db")
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct ModelsConfig {
93    pub primary: String,
94    #[serde(default)]
95    pub fallbacks: Vec<String>,
96    #[serde(default)]
97    pub routing: RoutingConfig,
98    #[serde(default)]
99    pub model_overrides: HashMap<String, ModelOverride>,
100    #[serde(default)]
101    pub stream_by_default: bool,
102    #[serde(default)]
103    pub tiered_inference: TieredInferenceConfig,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct TieredInferenceConfig {
108    #[serde(default)]
109    pub enabled: bool,
110    #[serde(default = "default_confidence_floor")]
111    pub confidence_floor: f64,
112    #[serde(default = "default_escalation_latency_ms")]
113    pub escalation_latency_budget_ms: u64,
114}
115
116fn default_confidence_floor() -> f64 {
117    0.6
118}
119fn default_escalation_latency_ms() -> u64 {
120    3000
121}
122
123impl Default for TieredInferenceConfig {
124    fn default() -> Self {
125        Self {
126            enabled: false,
127            confidence_floor: default_confidence_floor(),
128            escalation_latency_budget_ms: default_escalation_latency_ms(),
129        }
130    }
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct RoutingConfig {
135    #[serde(default = "default_routing_mode")]
136    pub mode: String,
137    #[serde(default = "default_confidence_threshold")]
138    pub confidence_threshold: f64,
139    #[serde(default = "default_true")]
140    pub local_first: bool,
141    #[serde(default)]
142    pub cost_aware: bool,
143    #[serde(default = "default_estimated_output_tokens")]
144    pub estimated_output_tokens: u32,
145    /// Minimum observed quality score (0.0–1.0) for a model to be considered
146    /// during metascore routing.  Models with fewer than `accuracy_min_obs`
147    /// observations are exempt (insufficient data). Set to 0.0 to disable.
148    #[serde(default)]
149    pub accuracy_floor: f64,
150    /// Minimum observations before the accuracy floor applies to a model.
151    #[serde(default = "default_accuracy_min_obs")]
152    pub accuracy_min_obs: usize,
153    /// Custom cost weight for metascore \[0.0–1.0\]. When set, replaces the
154    /// binary `cost_aware` toggle with a continuous dial: 0.0 = ignore cost,
155    /// 1.0 = maximize savings. Efficacy weight adjusts inversely.
156    /// When `None`, falls back to `cost_aware` boolean behavior.
157    #[serde(default)]
158    pub cost_weight: Option<f64>,
159    /// Canary model to route a fraction of traffic through for A/B validation.
160    /// When set, `canary_fraction` of requests are routed to this model instead
161    /// of the metascore winner. Set to `None` to disable canary routing.
162    #[serde(default)]
163    pub canary_model: Option<String>,
164    /// Fraction of requests routed to the canary model [0.0–1.0].
165    /// Only effective when `canary_model` is set. Default: 0.0 (disabled).
166    #[serde(default)]
167    pub canary_fraction: f64,
168    /// Static model blocklist — models listed here are unconditionally excluded
169    /// from all routing paths (override, metascore, fallback). Useful as an
170    /// instant kill-switch without restarting the server.
171    #[serde(default)]
172    pub blocked_models: Vec<String>,
173    /// Per-provider timeout in seconds for interactive inference. If a single
174    /// model doesn't respond within this window, the fallback chain advances.
175    /// Increase for slow local models (e.g. large quantized models on CPU/GPU).
176    #[serde(default = "default_per_provider_timeout")]
177    pub per_provider_timeout_seconds: u64,
178    /// Total wall-clock budget in seconds for the entire inference fallback
179    /// chain (all attempts combined). Increase if you have many fallback
180    /// candidates or slow providers.
181    #[serde(default = "default_max_total_inference")]
182    pub max_total_inference_seconds: u64,
183    /// Maximum number of fallback attempts before giving up.
184    #[serde(default = "default_max_fallback_attempts")]
185    pub max_fallback_attempts: usize,
186}
187
188impl Default for RoutingConfig {
189    fn default() -> Self {
190        Self {
191            mode: default_routing_mode(),
192            confidence_threshold: default_confidence_threshold(),
193            local_first: true,
194            cost_aware: false,
195            estimated_output_tokens: default_estimated_output_tokens(),
196            accuracy_floor: 0.0,
197            accuracy_min_obs: default_accuracy_min_obs(),
198            cost_weight: None,
199            canary_model: None,
200            canary_fraction: 0.0,
201            blocked_models: Vec::new(),
202            per_provider_timeout_seconds: default_per_provider_timeout(),
203            max_total_inference_seconds: default_max_total_inference(),
204            max_fallback_attempts: default_max_fallback_attempts(),
205        }
206    }
207}
208
209fn default_accuracy_min_obs() -> usize {
210    10
211}
212
213fn default_per_provider_timeout() -> u64 {
214    30
215}
216
217fn default_max_total_inference() -> u64 {
218    120
219}
220
221fn default_max_fallback_attempts() -> usize {
222    6
223}
224
225fn default_estimated_output_tokens() -> u32 {
226    500
227}
228
229fn default_routing_mode() -> String {
230    "metascore".into()
231}
232
233fn default_confidence_threshold() -> f64 {
234    0.9
235}
236
237fn default_true() -> bool {
238    true
239}
240
241#[derive(Debug, Clone, Serialize, Deserialize)]
242pub struct ProviderConfig {
243    pub url: String,
244    pub tier: String,
245    #[serde(default)]
246    pub format: Option<String>,
247    #[serde(default)]
248    pub api_key_env: Option<String>,
249    #[serde(default)]
250    pub chat_path: Option<String>,
251    #[serde(default)]
252    pub embedding_path: Option<String>,
253    #[serde(default)]
254    pub embedding_model: Option<String>,
255    #[serde(default)]
256    pub embedding_dimensions: Option<usize>,
257    #[serde(default)]
258    pub is_local: Option<bool>,
259    #[serde(default)]
260    pub cost_per_input_token: Option<f64>,
261    #[serde(default)]
262    pub cost_per_output_token: Option<f64>,
263    #[serde(default)]
264    pub auth_header: Option<String>,
265    #[serde(default)]
266    pub extra_headers: Option<HashMap<String, String>>,
267    #[serde(default)]
268    pub tpm_limit: Option<u64>,
269    #[serde(default)]
270    pub rpm_limit: Option<u64>,
271    #[serde(default)]
272    pub auth_mode: Option<String>,
273    #[serde(default)]
274    pub oauth_client_id: Option<String>,
275    #[serde(default)]
276    pub oauth_redirect_uri: Option<String>,
277    #[serde(default)]
278    pub api_key_ref: Option<String>,
279}
280
281impl ProviderConfig {
282    pub fn new(url: impl Into<String>, tier: impl Into<String>) -> Self {
283        Self {
284            url: url.into(),
285            tier: tier.into(),
286            format: None,
287            api_key_env: None,
288            chat_path: None,
289            embedding_path: None,
290            embedding_model: None,
291            embedding_dimensions: None,
292            is_local: None,
293            cost_per_input_token: None,
294            cost_per_output_token: None,
295            auth_header: None,
296            extra_headers: None,
297            tpm_limit: None,
298            rpm_limit: None,
299            auth_mode: None,
300            oauth_client_id: None,
301            oauth_redirect_uri: None,
302            api_key_ref: None,
303        }
304    }
305}
306
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct ModelOverride {
309    #[serde(default)]
310    pub tier: Option<String>,
311    #[serde(default)]
312    pub cost_per_input_token: Option<f64>,
313    #[serde(default)]
314    pub cost_per_output_token: Option<f64>,
315}
316
317#[derive(Debug, Clone, Serialize, Deserialize)]
318pub struct TierAdaptConfig {
319    #[serde(default)]
320    pub t1_strip_system: bool,
321    #[serde(default)]
322    pub t1_condense_turns: bool,
323    #[serde(default = "default_t2_preamble")]
324    pub t2_default_preamble: Option<String>,
325    #[serde(default = "default_true")]
326    pub t3_t4_passthrough: bool,
327}
328
329impl Default for TierAdaptConfig {
330    fn default() -> Self {
331        Self {
332            t1_strip_system: false,
333            t1_condense_turns: false,
334            t2_default_preamble: default_t2_preamble(),
335            t3_t4_passthrough: true,
336        }
337    }
338}
339
340fn default_t2_preamble() -> Option<String> {
341    Some("Be concise and direct. Focus on accuracy.".into())
342}
343
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub struct CircuitBreakerConfig {
346    #[serde(default = "default_cb_threshold")]
347    pub threshold: u32,
348    #[serde(default = "default_cb_window")]
349    pub window_seconds: u64,
350    #[serde(default = "default_cb_cooldown")]
351    pub cooldown_seconds: u64,
352    #[serde(default = "default_cb_max_cooldown")]
353    pub max_cooldown_seconds: u64,
354}
355
356impl Default for CircuitBreakerConfig {
357    fn default() -> Self {
358        Self {
359            threshold: default_cb_threshold(),
360            window_seconds: default_cb_window(),
361            cooldown_seconds: default_cb_cooldown(),
362            max_cooldown_seconds: default_cb_max_cooldown(),
363        }
364    }
365}
366
367fn default_cb_threshold() -> u32 {
368    3
369}
370fn default_cb_window() -> u64 {
371    60
372}
373fn default_cb_cooldown() -> u64 {
374    60
375}
376fn default_cb_max_cooldown() -> u64 {
377    900
378}
379
380#[derive(Debug, Clone, Serialize, Deserialize)]
381pub struct MemoryConfig {
382    #[serde(default = "default_working_pct")]
383    pub working_budget_pct: f64,
384    #[serde(default = "default_episodic_pct")]
385    pub episodic_budget_pct: f64,
386    #[serde(default = "default_semantic_pct")]
387    pub semantic_budget_pct: f64,
388    #[serde(default = "default_procedural_pct")]
389    pub procedural_budget_pct: f64,
390    #[serde(default = "default_relationship_pct")]
391    pub relationship_budget_pct: f64,
392    #[serde(default)]
393    pub embedding_provider: Option<String>,
394    #[serde(default)]
395    pub embedding_model: Option<String>,
396    #[serde(default = "default_hybrid_weight")]
397    pub hybrid_weight: f64,
398    #[serde(default)]
399    pub ann_index: bool,
400}
401
402impl Default for MemoryConfig {
403    fn default() -> Self {
404        Self {
405            working_budget_pct: default_working_pct(),
406            episodic_budget_pct: default_episodic_pct(),
407            semantic_budget_pct: default_semantic_pct(),
408            procedural_budget_pct: default_procedural_pct(),
409            relationship_budget_pct: default_relationship_pct(),
410            embedding_provider: None,
411            embedding_model: None,
412            hybrid_weight: default_hybrid_weight(),
413            ann_index: false,
414        }
415    }
416}
417
418fn default_hybrid_weight() -> f64 {
419    0.5
420}
421
422fn default_working_pct() -> f64 {
423    30.0
424}
425fn default_episodic_pct() -> f64 {
426    25.0
427}
428fn default_semantic_pct() -> f64 {
429    20.0
430}
431fn default_procedural_pct() -> f64 {
432    15.0
433}
434fn default_relationship_pct() -> f64 {
435    10.0
436}
437
438#[derive(Debug, Clone, Serialize, Deserialize)]
439pub struct CacheConfig {
440    #[serde(default = "default_true")]
441    pub enabled: bool,
442    #[serde(default = "default_cache_ttl")]
443    pub exact_match_ttl_seconds: u64,
444    #[serde(default = "default_semantic_threshold")]
445    pub semantic_threshold: f64,
446    #[serde(default = "default_max_entries")]
447    pub max_entries: usize,
448    #[serde(default)]
449    pub prompt_compression: bool,
450    #[serde(default = "default_compression_ratio")]
451    pub compression_target_ratio: f64,
452}
453
454impl Default for CacheConfig {
455    fn default() -> Self {
456        Self {
457            enabled: true,
458            exact_match_ttl_seconds: default_cache_ttl(),
459            semantic_threshold: default_semantic_threshold(),
460            max_entries: default_max_entries(),
461            prompt_compression: false,
462            compression_target_ratio: default_compression_ratio(),
463        }
464    }
465}
466
467fn default_compression_ratio() -> f64 {
468    0.5
469}
470
471fn default_cache_ttl() -> u64 {
472    3600
473}
474fn default_semantic_threshold() -> f64 {
475    0.95
476}
477fn default_max_entries() -> usize {
478    10000
479}
480