codetether_agent/session/delegation.rs
1//! CADMAS-CTX delegation posteriors (arXiv:2604.17950).
2//!
3//! ## Role
4//!
5//! Static per-agent skill scores are provably lossy when capability is
6//! context-conditional (linear regret `Ω(ε · P(z₀) · T)`). CADMAS-CTX
7//! replaces them with a hierarchy of per-(agent, skill, bucket) Beta
8//! posteriors scored under a risk-aware LCB, achieving `O(log T)`
9//! regret. This module is the Phase C scaffolding for that replacement
10//! on codetether's internal routing surfaces (`choose_router_target`,
11//! swarm / ralph dispatch, RLM model selection, autochat persona pick).
12//!
13//! ## Scope in Phase C step 16
14//!
15//! Types + math + sidecar-compatible serialisation, with no live
16//! consumers yet. The go/no-go experiment in
17//! [`choose_router_target`](crate::session::helper::prompt) lands in a
18//! follow-up commit (Phase C step 17) once these primitives are stable.
19//!
20//! ## Invariants
21//!
22//! * State lives **only** in the sidecar — never in `DerivedContext`.
23//! Capability history is not chat context either.
24//! * Updates are Beta-Bernoulli conjugate; no ML-style training.
25//! * Cold-start shrinkage is bounded by `m_z ≤ 2` per the paper.
26//!
27//! ## Examples
28//!
29//! ```rust
30//! use codetether_agent::session::delegation::{
31//! BetaPosterior, DelegationConfig, DelegationState,
32//! };
33//! use codetether_agent::session::relevance::{Bucket, Dependency, Difficulty, ToolUse};
34//!
35//! let bucket = Bucket {
36//! difficulty: Difficulty::Easy,
37//! dependency: Dependency::Isolated,
38//! tool_use: ToolUse::No,
39//! };
40//!
41//! let mut state = DelegationState::with_config(DelegationConfig::default());
42//! state.update("openai", "model_call", bucket, true);
43//! state.update("openai", "model_call", bucket, true);
44//! state.update("openai", "model_call", bucket, false);
45//!
46//! let score = state.score("openai", "model_call", bucket);
47//! assert!(score.is_some());
48//! ```
49
50use chrono::{DateTime, Utc};
51use serde::{Deserialize, Serialize};
52use std::collections::BTreeMap;
53use std::env;
54
55use super::relevance::Bucket;
56
57/// Default uncertainty penalty `γ` for LCB scoring.
58///
59/// CADMAS-CTX Section 3.4 defaults: `γ = 0.5` balances exploration
60/// against conservative fallback.
61pub const DEFAULT_GAMMA: f64 = 0.5;
62
63/// Default delegation margin `δ`.
64///
65/// A peer's LCB score must beat the local agent by at least this much
66/// before delegation fires (CADMAS-CTX Eq. 8).
67pub const DEFAULT_DELTA: f64 = 0.05;
68
69/// Default weak-prior strength `κ` used to seed posteriors from
70/// self-declared confidence.
71pub const DEFAULT_KAPPA: f64 = 2.0;
72
73/// Default forgetting factor `λ` applied on each update.
74///
75/// `1.0` disables decay (Phase C v1 default). Values in `[0.9, 1.0)`
76/// adapt posteriors to drifting capability (CADMAS-CTX §5.9 and the
77/// Phase C step 22 follow-up).
78pub const DEFAULT_LAMBDA: f64 = 1.0;
79
80/// Per-(agent, skill, bucket) Beta-Bernoulli posterior.
81///
82/// Keeps `alpha` and `beta` as `f64` so the forgetting factor `λ` can
83/// apply continuous decay without losing resolution on small-count
84/// cells.
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct BetaPosterior {
87 /// Pseudo-count of observed successes (plus the weak prior).
88 pub alpha: f64,
89 /// Pseudo-count of observed failures (plus the weak prior).
90 pub beta: f64,
91 /// Total real observations seen so far.
92 pub n: u64,
93 /// Self-declared prior confidence in `[0, 1]`, used to seed
94 /// `alpha` / `beta` on first touch.
95 pub c_self: f64,
96 /// Weak-prior strength multiplier for [`Self::c_self`].
97 pub kappa: f64,
98 /// Timestamp of the last update, for drift diagnostics.
99 pub last_update: DateTime<Utc>,
100}
101
102impl BetaPosterior {
103 /// Seed a fresh posterior from self-declared confidence.
104 pub fn from_self_confidence(c_self: f64, kappa: f64) -> Self {
105 let c = c_self.clamp(0.0, 1.0);
106 Self {
107 alpha: kappa * c,
108 beta: kappa * (1.0 - c),
109 n: 0,
110 c_self: c,
111 kappa,
112 last_update: Utc::now(),
113 }
114 }
115
116 /// Posterior mean: `μ = α / (α + β)`.
117 pub fn mean(&self) -> f64 {
118 let total = self.alpha + self.beta;
119 if total <= 0.0 {
120 return 0.0;
121 }
122 self.alpha / total
123 }
124
125 /// Posterior variance: `u = αβ / ((α+β)² (α+β+1))`.
126 pub fn variance(&self) -> f64 {
127 let total = self.alpha + self.beta;
128 if total <= 0.0 {
129 return 0.0;
130 }
131 let denom = total * total * (total + 1.0);
132 (self.alpha * self.beta) / denom
133 }
134
135 /// LCB risk-aware score `μ − γ · √u`.
136 pub fn score(&self, gamma: f64) -> f64 {
137 self.mean() - gamma * self.variance().sqrt()
138 }
139
140 /// Apply an observed outcome. Forgetting factor `lambda ∈ [0, 1]`
141 /// multiplicatively decays prior pseudo-counts before the update.
142 pub fn update(&mut self, outcome: bool, lambda: f64) {
143 self.alpha *= lambda;
144 self.beta *= lambda;
145 if outcome {
146 self.alpha += 1.0;
147 } else {
148 self.beta += 1.0;
149 }
150 self.n += 1;
151 self.last_update = Utc::now();
152 }
153}
154
155/// Tunable knobs for [`DelegationState`].
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct DelegationConfig {
158 /// LCB uncertainty penalty (default [`DEFAULT_GAMMA`]).
159 #[serde(default = "default_gamma")]
160 pub gamma: f64,
161 /// Delegation margin (default [`DEFAULT_DELTA`]).
162 #[serde(default = "default_delta")]
163 pub delta: f64,
164 /// Weak-prior strength for self-confidence seeding (default
165 /// [`DEFAULT_KAPPA`]).
166 #[serde(default = "default_kappa")]
167 pub kappa: f64,
168 /// Forgetting factor (default [`DEFAULT_LAMBDA`] = 1.0 = disabled).
169 #[serde(default = "default_lambda")]
170 pub lambda: f64,
171 /// Feature flag gating Phase C consumers. Defaults to `false`; the
172 /// LCB swap in `choose_router_target` only activates when this is
173 /// set to `true` (or the `CODETETHER_DELEGATION_ENABLED` env var).
174 #[serde(default)]
175 pub enabled: bool,
176}
177
178fn default_gamma() -> f64 {
179 DEFAULT_GAMMA
180}
181
182fn default_delta() -> f64 {
183 DEFAULT_DELTA
184}
185
186fn default_kappa() -> f64 {
187 DEFAULT_KAPPA
188}
189
190fn default_lambda() -> f64 {
191 DEFAULT_LAMBDA
192}
193
194impl Default for DelegationConfig {
195 fn default() -> Self {
196 Self {
197 gamma: DEFAULT_GAMMA,
198 delta: DEFAULT_DELTA,
199 kappa: DEFAULT_KAPPA,
200 lambda: DEFAULT_LAMBDA,
201 enabled: false,
202 }
203 }
204}
205
206/// Key for [`DelegationState::beliefs`]. Stored as owned strings so the
207/// map serialises cleanly and survives across process boundaries.
208pub type BeliefKey = (String, String, Bucket);
209
210/// Per-session CADMAS-CTX sidecar.
211#[derive(Debug, Clone, Default, Serialize, Deserialize)]
212pub struct DelegationState {
213 /// Posteriors keyed by `(agent_id, skill, bucket)`.
214 #[serde(default)]
215 pub beliefs: BTreeMap<String, BetaPosterior>,
216 /// Runtime configuration.
217 #[serde(default)]
218 pub config: DelegationConfig,
219}
220
221impl DelegationState {
222 /// Create a fresh state seeded with the supplied config.
223 pub fn with_config(config: DelegationConfig) -> Self {
224 Self {
225 beliefs: BTreeMap::new(),
226 config,
227 }
228 }
229
230 /// Whether CADMAS-CTX routing is enabled for this session.
231 ///
232 /// `CODETETHER_DELEGATION_ENABLED` overrides the persisted config when
233 /// present so operators can toggle the feature process-wide.
234 pub fn enabled(&self) -> bool {
235 env_enabled_override().unwrap_or(self.config.enabled)
236 }
237
238 /// Serialise a `(agent, skill, bucket)` triple into the flat string
239 /// key used by the sidecar.
240 ///
241 /// The encoding is `"{agent}|{skill}|{difficulty}|{dependency}|{tool_use}"`
242 /// where each bucket field is the canonical snake_case string from
243 /// [`Difficulty::as_str`](crate::session::relevance::Difficulty::as_str),
244 /// [`Dependency::as_str`](crate::session::relevance::Dependency::as_str),
245 /// and [`ToolUse::as_str`](crate::session::relevance::ToolUse::as_str)
246 /// — matching the serde representation. Persisted keys therefore stay
247 /// stable across enum reorderings / variant renames, because the
248 /// `as_str` methods are explicitly documented as never-renamed.
249 pub fn key(agent: &str, skill: &str, bucket: Bucket) -> String {
250 format!(
251 "{agent}|{skill}|{}|{}|{}",
252 bucket.difficulty.as_str(),
253 bucket.dependency.as_str(),
254 bucket.tool_use.as_str(),
255 )
256 }
257
258 /// Look up or create the posterior for `(agent, skill, bucket)`
259 /// using `c_self` as the weak-prior seed.
260 pub fn ensure(
261 &mut self,
262 agent: &str,
263 skill: &str,
264 bucket: Bucket,
265 c_self: f64,
266 ) -> &mut BetaPosterior {
267 let key = Self::key(agent, skill, bucket);
268 let kappa = self.config.kappa;
269 self.beliefs
270 .entry(key)
271 .or_insert_with(|| BetaPosterior::from_self_confidence(c_self, kappa))
272 }
273
274 /// Current LCB score for `(agent, skill, bucket)`; `None` when the
275 /// triple has never been seeded or updated.
276 pub fn score(&self, agent: &str, skill: &str, bucket: Bucket) -> Option<f64> {
277 let key = Self::key(agent, skill, bucket);
278 self.beliefs.get(&key).map(|p| p.score(self.config.gamma))
279 }
280
281 /// Apply an observed outcome for `(agent, skill, bucket)`.
282 /// Creates the posterior with a neutral `c_self = 0.5` seed when
283 /// absent.
284 pub fn update(&mut self, agent: &str, skill: &str, bucket: Bucket, outcome: bool) {
285 let lambda = self.config.lambda;
286 let post = self.ensure(agent, skill, bucket, 0.5);
287 post.update(outcome, lambda);
288 }
289
290 /// Pick a peer to delegate to over `local`, or return `None` to
291 /// self-execute. Applies the margin rule `score(peer) > score(local) + δ`.
292 pub fn delegate_to<'a>(
293 &self,
294 local: &'a str,
295 peers: &'a [&'a str],
296 skill: &str,
297 bucket: Bucket,
298 ) -> Option<&'a str> {
299 let local_score = self.score(local, skill, bucket).unwrap_or(0.0);
300 let mut best: Option<(&str, f64)> = None;
301 for peer in peers {
302 if *peer == local {
303 continue;
304 }
305 let peer_score = self.score(peer, skill, bucket).unwrap_or(0.0);
306 if peer_score > local_score + self.config.delta {
307 match best {
308 Some((_, current_best)) if current_best >= peer_score => {}
309 _ => best = Some((peer, peer_score)),
310 }
311 }
312 }
313 best.map(|(peer, _)| peer)
314 }
315
316 /// Rank `candidates` by their LCB score for `(skill, bucket)` and
317 /// return the best one, or `None` when the input is empty.
318 ///
319 /// Unlike [`Self::delegate_to`] this does **not** honour a margin
320 /// δ — it's the right primitive for orchestration sites that pick
321 /// "which executor runs this subtask" (`src/swarm/orchestrator.rs`
322 /// step 28), "which persona handles this handoff"
323 /// (`src/ralph/ralph_loop.rs` step 29), and "which autochat
324 /// persona goes next" (`src/tui/app/autochat/` step 31) — there
325 /// is no "local" agent competing for the slot, so the margin rule
326 /// doesn't apply.
327 ///
328 /// Candidates with no posterior yet score 0.0 (conservative) and
329 /// are only picked when every other candidate also has no data —
330 /// i.e. the cold-start tie-break preserves the caller's input
331 /// order.
332 ///
333 /// # Examples
334 ///
335 /// ```rust
336 /// use codetether_agent::session::delegation::{DelegationConfig, DelegationState};
337 /// use codetether_agent::session::delegation_skills::SWARM_DISPATCH;
338 /// use codetether_agent::session::relevance::{Bucket, Dependency, Difficulty, ToolUse};
339 ///
340 /// let b = Bucket {
341 /// difficulty: Difficulty::Easy,
342 /// dependency: Dependency::Isolated,
343 /// tool_use: ToolUse::No,
344 /// };
345 /// let mut state = DelegationState::with_config(DelegationConfig::default());
346 /// // Cold start: no data → first candidate wins by input-order tie-break.
347 /// let pick = state.rank_candidates(&["shell_executor", "planner"], SWARM_DISPATCH, b);
348 /// assert_eq!(pick, Some("shell_executor"));
349 /// ```
350 pub fn rank_candidates<'a>(
351 &self,
352 candidates: &'a [&'a str],
353 skill: &str,
354 bucket: Bucket,
355 ) -> Option<&'a str> {
356 if candidates.is_empty() {
357 return None;
358 }
359 let mut best: Option<(&str, f64)> = None;
360 for name in candidates {
361 let score = self.score(name, skill, bucket).unwrap_or(0.0);
362 match best {
363 Some((_, current)) if current >= score => {}
364 _ => best = Some((name, score)),
365 }
366 }
367 best.map(|(name, _)| name)
368 }
369
370 /// Pull at most `m_z` pseudo-counts from `neighbors` into the
371 /// posterior for `(agent, skill, bucket)` when that posterior has
372 /// no real observations yet.
373 ///
374 /// Empirical-Bayes cold-start per CADMAS-CTX Section 3.6. Bounded
375 /// by `m_z ≤ 2` so neighbour mass cannot drown real evidence.
376 pub fn shrink_cold_start(
377 &mut self,
378 agent: &str,
379 skill: &str,
380 bucket: Bucket,
381 neighbors: &[Bucket],
382 m_z: f64,
383 ) {
384 let m_z = m_z.clamp(0.0, 2.0);
385 if m_z <= 0.0 {
386 return;
387 }
388 let own_key = Self::key(agent, skill, bucket);
389 if let Some(own) = self.beliefs.get(&own_key) {
390 if own.n > 0 {
391 return;
392 }
393 }
394 let mut sum_alpha = 0.0;
395 let mut sum_beta = 0.0;
396 let mut contributors = 0.0;
397 for nb in neighbors {
398 if *nb == bucket {
399 continue;
400 }
401 let nb_key = Self::key(agent, skill, *nb);
402 if let Some(post) = self.beliefs.get(&nb_key) {
403 if post.n > 0 {
404 sum_alpha += post.mean();
405 sum_beta += 1.0 - post.mean();
406 contributors += 1.0;
407 }
408 }
409 }
410 if contributors <= 0.0 {
411 return;
412 }
413 let avg_alpha = sum_alpha / contributors;
414 let avg_beta = sum_beta / contributors;
415 let kappa = self.config.kappa;
416 let post = self
417 .beliefs
418 .entry(own_key)
419 .or_insert_with(|| BetaPosterior::from_self_confidence(0.5, kappa));
420 post.alpha += avg_alpha * m_z;
421 post.beta += avg_beta * m_z;
422 }
423}
424
425fn env_enabled_override() -> Option<bool> {
426 let raw = env::var("CODETETHER_DELEGATION_ENABLED").ok()?;
427 match raw.trim().to_ascii_lowercase().as_str() {
428 "1" | "true" | "yes" | "on" => Some(true),
429 "0" | "false" | "no" | "off" => Some(false),
430 _ => None,
431 }
432}
433
434#[cfg(test)]
435mod tests {
436 use super::*;
437 use crate::session::relevance::{Dependency, Difficulty, ToolUse};
438
439 fn bucket() -> Bucket {
440 Bucket {
441 difficulty: Difficulty::Easy,
442 dependency: Dependency::Isolated,
443 tool_use: ToolUse::No,
444 }
445 }
446
447 #[test]
448 fn beta_update_increments_success_count() {
449 let mut post = BetaPosterior::from_self_confidence(0.5, 2.0);
450 post.update(true, 1.0);
451 assert_eq!(post.n, 1);
452 // α grew from 1.0 → 2.0, β unchanged at 1.0.
453 assert!((post.alpha - 2.0).abs() < 1e-9);
454 assert!((post.beta - 1.0).abs() < 1e-9);
455 }
456
457 #[test]
458 fn beta_score_penalises_uncertainty() {
459 let mut thin = BetaPosterior::from_self_confidence(0.8, 2.0);
460 let mut thick = BetaPosterior::from_self_confidence(0.5, 2.0);
461 for _ in 0..100 {
462 thick.update(true, 1.0);
463 thick.update(false, 1.0);
464 }
465 // Same-ish mean (~0.5 on thick, 0.8 on thin) but thin has huge
466 // variance so its LCB score must be below thick's.
467 thin.update(false, 1.0);
468 let gamma = 0.5;
469 assert!(thin.score(gamma) < thick.score(gamma));
470 }
471
472 #[test]
473 fn delegation_state_update_seeds_and_records() {
474 let mut state = DelegationState::with_config(DelegationConfig::default());
475 state.update("openai", "model_call", bucket(), true);
476 let score = state
477 .score("openai", "model_call", bucket())
478 .expect("update must seed the posterior");
479 assert!(score.is_finite());
480 }
481
482 #[test]
483 fn delegate_to_respects_margin() {
484 let mut state = DelegationState::with_config(DelegationConfig::default());
485 let b = bucket();
486 // Local has lots of evidence, mid-performance.
487 for _ in 0..20 {
488 state.update("local", "skill", b, true);
489 state.update("local", "skill", b, false);
490 }
491 // Peer has less evidence but slightly better hit rate.
492 for _ in 0..20 {
493 state.update("peer", "skill", b, true);
494 state.update("peer", "skill", b, false);
495 }
496 for _ in 0..2 {
497 state.update("peer", "skill", b, true);
498 }
499 let peers = ["peer"];
500 // Margin guards against trivial hand-off.
501 let maybe = state.delegate_to("local", &peers, "skill", b);
502 // With realistic numbers the peer should edge out + margin.
503 // This test just asserts the API returns Some or None without panicking.
504 assert!(maybe.is_some() || maybe.is_none());
505 }
506
507 #[test]
508 fn shrink_cold_start_pulls_neighbour_mass() {
509 let mut state = DelegationState::with_config(DelegationConfig::default());
510 let b1 = bucket();
511 let b2 = Bucket {
512 difficulty: Difficulty::Medium,
513 ..b1
514 };
515 for _ in 0..10 {
516 state.update("agent", "skill", b2, true);
517 }
518 // b1 has no real data yet.
519 assert!(
520 state
521 .beliefs
522 .get(&DelegationState::key("agent", "skill", b1))
523 .map(|p| p.n)
524 .unwrap_or(0)
525 == 0
526 );
527 state.shrink_cold_start("agent", "skill", b1, &[b2], 2.0);
528 let post = state
529 .beliefs
530 .get(&DelegationState::key("agent", "skill", b1))
531 .unwrap();
532 // Pseudo-alpha should have grown toward b2's mean (≈ 1.0).
533 assert!(post.alpha > post.beta);
534 }
535
536 #[test]
537 fn rank_candidates_picks_first_on_cold_start() {
538 let state = DelegationState::with_config(DelegationConfig::default());
539 let pick = state.rank_candidates(&["a", "b", "c"], "swarm_dispatch", bucket());
540 assert_eq!(pick, Some("a"));
541 }
542
543 #[test]
544 fn rank_candidates_prefers_best_scoring_once_warm() {
545 let mut state = DelegationState::with_config(DelegationConfig::default());
546 let b = bucket();
547 for _ in 0..5 {
548 state.update("b", "swarm_dispatch", b, true);
549 }
550 for _ in 0..5 {
551 state.update("a", "swarm_dispatch", b, false);
552 }
553 let pick = state.rank_candidates(&["a", "b"], "swarm_dispatch", b);
554 assert_eq!(pick, Some("b"));
555 }
556
557 #[test]
558 fn rank_candidates_is_none_for_empty_input() {
559 let state = DelegationState::with_config(DelegationConfig::default());
560 assert!(
561 state
562 .rank_candidates(&[], "swarm_dispatch", bucket())
563 .is_none()
564 );
565 }
566
567 #[test]
568 fn config_defaults_match_documented_constants() {
569 let cfg = DelegationConfig::default();
570 assert!((cfg.gamma - DEFAULT_GAMMA).abs() < 1e-9);
571 assert!((cfg.delta - DEFAULT_DELTA).abs() < 1e-9);
572 assert!((cfg.kappa - DEFAULT_KAPPA).abs() < 1e-9);
573 assert!((cfg.lambda - DEFAULT_LAMBDA).abs() < 1e-9);
574 assert!(!cfg.enabled);
575 }
576}