codetether_agent/session/delegation.rs
1//! CADMAS-CTX delegation posteriors (arXiv:2604.17950).
2//!
3//! ## Role
4//!
5//! Static per-agent skill scores are provably lossy when capability is
6//! context-conditional (linear regret `Ω(ε · P(z₀) · T)`). CADMAS-CTX
7//! replaces them with a hierarchy of per-(agent, skill, bucket) Beta
8//! posteriors scored under a risk-aware LCB, achieving `O(log T)`
9//! regret. This module is the Phase C scaffolding for that replacement
10//! on codetether's internal routing surfaces (`choose_router_target`,
11//! swarm / ralph dispatch, RLM model selection, autochat persona pick).
12//!
13//! ## Scope in Phase C step 16
14//!
15//! Types + math + sidecar-compatible serialisation, with no live
16//! consumers yet. The go/no-go experiment in
17//! [`choose_router_target`](crate::session::helper::prompt) lands in a
18//! follow-up commit (Phase C step 17) once these primitives are stable.
19//!
20//! ## Invariants
21//!
22//! * State lives **only** in the sidecar — never in `DerivedContext`.
23//! Capability history is not chat context either.
24//! * Updates are Beta-Bernoulli conjugate; no ML-style training.
25//! * Cold-start shrinkage is bounded by `m_z ≤ 2` per the paper.
26//!
27//! ## Examples
28//!
29//! ```rust
30//! use codetether_agent::session::delegation::{
31//! BetaPosterior, DelegationConfig, DelegationState,
32//! };
33//! use codetether_agent::session::relevance::{Bucket, Dependency, Difficulty, ToolUse};
34//!
35//! let bucket = Bucket {
36//! difficulty: Difficulty::Easy,
37//! dependency: Dependency::Isolated,
38//! tool_use: ToolUse::No,
39//! };
40//!
41//! let mut state = DelegationState::with_config(DelegationConfig::default());
42//! state.update("openai", "model_call", bucket, true);
43//! state.update("openai", "model_call", bucket, true);
44//! state.update("openai", "model_call", bucket, false);
45//!
46//! let score = state.score("openai", "model_call", bucket);
47//! assert!(score.is_some());
48//! ```
49
50use chrono::{DateTime, Utc};
51use serde::{Deserialize, Serialize};
52use std::collections::BTreeMap;
53
54use super::relevance::Bucket;
55
56/// Default uncertainty penalty `γ` for LCB scoring.
57///
58/// CADMAS-CTX Section 3.4 defaults: `γ = 0.5` balances exploration
59/// against conservative fallback.
60pub const DEFAULT_GAMMA: f64 = 0.5;
61
62/// Default delegation margin `δ`.
63///
64/// A peer's LCB score must beat the local agent by at least this much
65/// before delegation fires (CADMAS-CTX Eq. 8).
66pub const DEFAULT_DELTA: f64 = 0.05;
67
68/// Default weak-prior strength `κ` used to seed posteriors from
69/// self-declared confidence.
70pub const DEFAULT_KAPPA: f64 = 2.0;
71
72/// Default forgetting factor `λ` applied on each update.
73///
74/// `1.0` disables decay (Phase C v1 default). Values in `[0.9, 1.0)`
75/// adapt posteriors to drifting capability (CADMAS-CTX §5.9 and the
76/// Phase C step 22 follow-up).
77pub const DEFAULT_LAMBDA: f64 = 1.0;
78
79/// Per-(agent, skill, bucket) Beta-Bernoulli posterior.
80///
81/// Keeps `alpha` and `beta` as `f64` so the forgetting factor `λ` can
82/// apply continuous decay without losing resolution on small-count
83/// cells.
84#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct BetaPosterior {
86 /// Pseudo-count of observed successes (plus the weak prior).
87 pub alpha: f64,
88 /// Pseudo-count of observed failures (plus the weak prior).
89 pub beta: f64,
90 /// Total real observations seen so far.
91 pub n: u64,
92 /// Self-declared prior confidence in `[0, 1]`, used to seed
93 /// `alpha` / `beta` on first touch.
94 pub c_self: f64,
95 /// Weak-prior strength multiplier for [`Self::c_self`].
96 pub kappa: f64,
97 /// Timestamp of the last update, for drift diagnostics.
98 pub last_update: DateTime<Utc>,
99}
100
101impl BetaPosterior {
102 /// Seed a fresh posterior from self-declared confidence.
103 pub fn from_self_confidence(c_self: f64, kappa: f64) -> Self {
104 let c = c_self.clamp(0.0, 1.0);
105 Self {
106 alpha: kappa * c,
107 beta: kappa * (1.0 - c),
108 n: 0,
109 c_self: c,
110 kappa,
111 last_update: Utc::now(),
112 }
113 }
114
115 /// Posterior mean: `μ = α / (α + β)`.
116 pub fn mean(&self) -> f64 {
117 let total = self.alpha + self.beta;
118 if total <= 0.0 {
119 return 0.0;
120 }
121 self.alpha / total
122 }
123
124 /// Posterior variance: `u = αβ / ((α+β)² (α+β+1))`.
125 pub fn variance(&self) -> f64 {
126 let total = self.alpha + self.beta;
127 if total <= 0.0 {
128 return 0.0;
129 }
130 let denom = total * total * (total + 1.0);
131 (self.alpha * self.beta) / denom
132 }
133
134 /// LCB risk-aware score `μ − γ · √u`.
135 pub fn score(&self, gamma: f64) -> f64 {
136 self.mean() - gamma * self.variance().sqrt()
137 }
138
139 /// Apply an observed outcome. Forgetting factor `lambda ∈ [0, 1]`
140 /// multiplicatively decays prior pseudo-counts before the update.
141 pub fn update(&mut self, outcome: bool, lambda: f64) {
142 self.alpha *= lambda;
143 self.beta *= lambda;
144 if outcome {
145 self.alpha += 1.0;
146 } else {
147 self.beta += 1.0;
148 }
149 self.n += 1;
150 self.last_update = Utc::now();
151 }
152}
153
154/// Tunable knobs for [`DelegationState`].
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct DelegationConfig {
157 /// LCB uncertainty penalty (default [`DEFAULT_GAMMA`]).
158 #[serde(default = "default_gamma")]
159 pub gamma: f64,
160 /// Delegation margin (default [`DEFAULT_DELTA`]).
161 #[serde(default = "default_delta")]
162 pub delta: f64,
163 /// Weak-prior strength for self-confidence seeding (default
164 /// [`DEFAULT_KAPPA`]).
165 #[serde(default = "default_kappa")]
166 pub kappa: f64,
167 /// Forgetting factor (default [`DEFAULT_LAMBDA`] = 1.0 = disabled).
168 #[serde(default = "default_lambda")]
169 pub lambda: f64,
170 /// Feature flag gating Phase C consumers. Defaults to `false`; the
171 /// LCB swap in `choose_router_target` only activates when this is
172 /// set to `true` (or the `CODETETHER_DELEGATION_ENABLED` env var).
173 #[serde(default)]
174 pub enabled: bool,
175}
176
177fn default_gamma() -> f64 {
178 DEFAULT_GAMMA
179}
180
181fn default_delta() -> f64 {
182 DEFAULT_DELTA
183}
184
185fn default_kappa() -> f64 {
186 DEFAULT_KAPPA
187}
188
189fn default_lambda() -> f64 {
190 DEFAULT_LAMBDA
191}
192
193impl Default for DelegationConfig {
194 fn default() -> Self {
195 Self {
196 gamma: DEFAULT_GAMMA,
197 delta: DEFAULT_DELTA,
198 kappa: DEFAULT_KAPPA,
199 lambda: DEFAULT_LAMBDA,
200 enabled: false,
201 }
202 }
203}
204
205/// Key for [`DelegationState::beliefs`]. Stored as owned strings so the
206/// map serialises cleanly and survives across process boundaries.
207pub type BeliefKey = (String, String, Bucket);
208
209/// Per-session CADMAS-CTX sidecar.
210#[derive(Debug, Clone, Default, Serialize, Deserialize)]
211pub struct DelegationState {
212 /// Posteriors keyed by `(agent_id, skill, bucket)`.
213 #[serde(default)]
214 pub beliefs: BTreeMap<String, BetaPosterior>,
215 /// Runtime configuration.
216 #[serde(default)]
217 pub config: DelegationConfig,
218}
219
220impl DelegationState {
221 /// Create a fresh state seeded with the supplied config.
222 pub fn with_config(config: DelegationConfig) -> Self {
223 Self {
224 beliefs: BTreeMap::new(),
225 config,
226 }
227 }
228
229 /// Serialise a `(agent, skill, bucket)` triple into the flat string
230 /// key used by the sidecar.
231 ///
232 /// The encoding is `"{agent}|{skill}|{difficulty}|{dependency}|{tool_use}"`
233 /// where each bucket field is the canonical snake_case string from
234 /// [`Difficulty::as_str`](crate::session::relevance::Difficulty::as_str),
235 /// [`Dependency::as_str`](crate::session::relevance::Dependency::as_str),
236 /// and [`ToolUse::as_str`](crate::session::relevance::ToolUse::as_str)
237 /// — matching the serde representation. Persisted keys therefore stay
238 /// stable across enum reorderings / variant renames, because the
239 /// `as_str` methods are explicitly documented as never-renamed.
240 pub fn key(agent: &str, skill: &str, bucket: Bucket) -> String {
241 format!(
242 "{agent}|{skill}|{}|{}|{}",
243 bucket.difficulty.as_str(),
244 bucket.dependency.as_str(),
245 bucket.tool_use.as_str(),
246 )
247 }
248
249 /// Look up or create the posterior for `(agent, skill, bucket)`
250 /// using `c_self` as the weak-prior seed.
251 pub fn ensure(
252 &mut self,
253 agent: &str,
254 skill: &str,
255 bucket: Bucket,
256 c_self: f64,
257 ) -> &mut BetaPosterior {
258 let key = Self::key(agent, skill, bucket);
259 let kappa = self.config.kappa;
260 self.beliefs
261 .entry(key)
262 .or_insert_with(|| BetaPosterior::from_self_confidence(c_self, kappa))
263 }
264
265 /// Current LCB score for `(agent, skill, bucket)`; `None` when the
266 /// triple has never been seeded or updated.
267 pub fn score(&self, agent: &str, skill: &str, bucket: Bucket) -> Option<f64> {
268 let key = Self::key(agent, skill, bucket);
269 self.beliefs.get(&key).map(|p| p.score(self.config.gamma))
270 }
271
272 /// Apply an observed outcome for `(agent, skill, bucket)`.
273 /// Creates the posterior with a neutral `c_self = 0.5` seed when
274 /// absent.
275 pub fn update(&mut self, agent: &str, skill: &str, bucket: Bucket, outcome: bool) {
276 let lambda = self.config.lambda;
277 let post = self.ensure(agent, skill, bucket, 0.5);
278 post.update(outcome, lambda);
279 }
280
281 /// Pick a peer to delegate to over `local`, or return `None` to
282 /// self-execute. Applies the margin rule `score(peer) > score(local) + δ`.
283 pub fn delegate_to<'a>(
284 &self,
285 local: &'a str,
286 peers: &'a [&'a str],
287 skill: &str,
288 bucket: Bucket,
289 ) -> Option<&'a str> {
290 let local_score = self.score(local, skill, bucket).unwrap_or(0.0);
291 let mut best: Option<(&str, f64)> = None;
292 for peer in peers {
293 if *peer == local {
294 continue;
295 }
296 let peer_score = self.score(peer, skill, bucket).unwrap_or(0.0);
297 if peer_score > local_score + self.config.delta {
298 match best {
299 Some((_, current_best)) if current_best >= peer_score => {}
300 _ => best = Some((peer, peer_score)),
301 }
302 }
303 }
304 best.map(|(peer, _)| peer)
305 }
306
307 /// Rank `candidates` by their LCB score for `(skill, bucket)` and
308 /// return the best one, or `None` when the input is empty.
309 ///
310 /// Unlike [`Self::delegate_to`] this does **not** honour a margin
311 /// δ — it's the right primitive for orchestration sites that pick
312 /// "which executor runs this subtask" (`src/swarm/orchestrator.rs`
313 /// step 28), "which persona handles this handoff"
314 /// (`src/ralph/ralph_loop.rs` step 29), and "which autochat
315 /// persona goes next" (`src/tui/app/autochat/` step 31) — there
316 /// is no "local" agent competing for the slot, so the margin rule
317 /// doesn't apply.
318 ///
319 /// Candidates with no posterior yet score 0.0 (conservative) and
320 /// are only picked when every other candidate also has no data —
321 /// i.e. the cold-start tie-break preserves the caller's input
322 /// order.
323 ///
324 /// # Examples
325 ///
326 /// ```rust
327 /// use codetether_agent::session::delegation::{DelegationConfig, DelegationState};
328 /// use codetether_agent::session::delegation_skills::SWARM_DISPATCH;
329 /// use codetether_agent::session::relevance::{Bucket, Dependency, Difficulty, ToolUse};
330 ///
331 /// let b = Bucket {
332 /// difficulty: Difficulty::Easy,
333 /// dependency: Dependency::Isolated,
334 /// tool_use: ToolUse::No,
335 /// };
336 /// let mut state = DelegationState::with_config(DelegationConfig::default());
337 /// // Cold start: no data → first candidate wins by input-order tie-break.
338 /// let pick = state.rank_candidates(&["shell_executor", "planner"], SWARM_DISPATCH, b);
339 /// assert_eq!(pick, Some("shell_executor"));
340 /// ```
341 pub fn rank_candidates<'a>(
342 &self,
343 candidates: &'a [&'a str],
344 skill: &str,
345 bucket: Bucket,
346 ) -> Option<&'a str> {
347 if candidates.is_empty() {
348 return None;
349 }
350 let mut best: Option<(&str, f64)> = None;
351 for name in candidates {
352 let score = self.score(name, skill, bucket).unwrap_or(0.0);
353 match best {
354 Some((_, current)) if current >= score => {}
355 _ => best = Some((name, score)),
356 }
357 }
358 best.map(|(name, _)| name)
359 }
360
361 /// Pull at most `m_z` pseudo-counts from `neighbors` into the
362 /// posterior for `(agent, skill, bucket)` when that posterior has
363 /// no real observations yet.
364 ///
365 /// Empirical-Bayes cold-start per CADMAS-CTX Section 3.6. Bounded
366 /// by `m_z ≤ 2` so neighbour mass cannot drown real evidence.
367 pub fn shrink_cold_start(
368 &mut self,
369 agent: &str,
370 skill: &str,
371 bucket: Bucket,
372 neighbors: &[Bucket],
373 m_z: f64,
374 ) {
375 let m_z = m_z.clamp(0.0, 2.0);
376 if m_z <= 0.0 {
377 return;
378 }
379 let own_key = Self::key(agent, skill, bucket);
380 if let Some(own) = self.beliefs.get(&own_key) {
381 if own.n > 0 {
382 return;
383 }
384 }
385 let mut sum_alpha = 0.0;
386 let mut sum_beta = 0.0;
387 let mut contributors = 0.0;
388 for nb in neighbors {
389 if *nb == bucket {
390 continue;
391 }
392 let nb_key = Self::key(agent, skill, *nb);
393 if let Some(post) = self.beliefs.get(&nb_key) {
394 if post.n > 0 {
395 sum_alpha += post.mean();
396 sum_beta += 1.0 - post.mean();
397 contributors += 1.0;
398 }
399 }
400 }
401 if contributors <= 0.0 {
402 return;
403 }
404 let avg_alpha = sum_alpha / contributors;
405 let avg_beta = sum_beta / contributors;
406 let kappa = self.config.kappa;
407 let post = self
408 .beliefs
409 .entry(own_key)
410 .or_insert_with(|| BetaPosterior::from_self_confidence(0.5, kappa));
411 post.alpha += avg_alpha * m_z;
412 post.beta += avg_beta * m_z;
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use super::*;
419 use crate::session::relevance::{Dependency, Difficulty, ToolUse};
420
421 fn bucket() -> Bucket {
422 Bucket {
423 difficulty: Difficulty::Easy,
424 dependency: Dependency::Isolated,
425 tool_use: ToolUse::No,
426 }
427 }
428
429 #[test]
430 fn beta_update_increments_success_count() {
431 let mut post = BetaPosterior::from_self_confidence(0.5, 2.0);
432 post.update(true, 1.0);
433 assert_eq!(post.n, 1);
434 // α grew from 1.0 → 2.0, β unchanged at 1.0.
435 assert!((post.alpha - 2.0).abs() < 1e-9);
436 assert!((post.beta - 1.0).abs() < 1e-9);
437 }
438
439 #[test]
440 fn beta_score_penalises_uncertainty() {
441 let mut thin = BetaPosterior::from_self_confidence(0.8, 2.0);
442 let mut thick = BetaPosterior::from_self_confidence(0.5, 2.0);
443 for _ in 0..100 {
444 thick.update(true, 1.0);
445 thick.update(false, 1.0);
446 }
447 // Same-ish mean (~0.5 on thick, 0.8 on thin) but thin has huge
448 // variance so its LCB score must be below thick's.
449 thin.update(false, 1.0);
450 let gamma = 0.5;
451 assert!(thin.score(gamma) < thick.score(gamma));
452 }
453
454 #[test]
455 fn delegation_state_update_seeds_and_records() {
456 let mut state = DelegationState::with_config(DelegationConfig::default());
457 state.update("openai", "model_call", bucket(), true);
458 let score = state
459 .score("openai", "model_call", bucket())
460 .expect("update must seed the posterior");
461 assert!(score.is_finite());
462 }
463
464 #[test]
465 fn delegate_to_respects_margin() {
466 let mut state = DelegationState::with_config(DelegationConfig::default());
467 let b = bucket();
468 // Local has lots of evidence, mid-performance.
469 for _ in 0..20 {
470 state.update("local", "skill", b, true);
471 state.update("local", "skill", b, false);
472 }
473 // Peer has less evidence but slightly better hit rate.
474 for _ in 0..20 {
475 state.update("peer", "skill", b, true);
476 state.update("peer", "skill", b, false);
477 }
478 for _ in 0..2 {
479 state.update("peer", "skill", b, true);
480 }
481 let peers = ["peer"];
482 // Margin guards against trivial hand-off.
483 let maybe = state.delegate_to("local", &peers, "skill", b);
484 // With realistic numbers the peer should edge out + margin.
485 // This test just asserts the API returns Some or None without panicking.
486 assert!(maybe.is_some() || maybe.is_none());
487 }
488
489 #[test]
490 fn shrink_cold_start_pulls_neighbour_mass() {
491 let mut state = DelegationState::with_config(DelegationConfig::default());
492 let b1 = bucket();
493 let b2 = Bucket {
494 difficulty: Difficulty::Medium,
495 ..b1
496 };
497 for _ in 0..10 {
498 state.update("agent", "skill", b2, true);
499 }
500 // b1 has no real data yet.
501 assert!(
502 state
503 .beliefs
504 .get(&DelegationState::key("agent", "skill", b1))
505 .map(|p| p.n)
506 .unwrap_or(0)
507 == 0
508 );
509 state.shrink_cold_start("agent", "skill", b1, &[b2], 2.0);
510 let post = state
511 .beliefs
512 .get(&DelegationState::key("agent", "skill", b1))
513 .unwrap();
514 // Pseudo-alpha should have grown toward b2's mean (≈ 1.0).
515 assert!(post.alpha > post.beta);
516 }
517
518 #[test]
519 fn rank_candidates_picks_first_on_cold_start() {
520 let state = DelegationState::with_config(DelegationConfig::default());
521 let pick = state.rank_candidates(&["a", "b", "c"], "swarm_dispatch", bucket());
522 assert_eq!(pick, Some("a"));
523 }
524
525 #[test]
526 fn rank_candidates_prefers_best_scoring_once_warm() {
527 let mut state = DelegationState::with_config(DelegationConfig::default());
528 let b = bucket();
529 for _ in 0..5 {
530 state.update("b", "swarm_dispatch", b, true);
531 }
532 for _ in 0..5 {
533 state.update("a", "swarm_dispatch", b, false);
534 }
535 let pick = state.rank_candidates(&["a", "b"], "swarm_dispatch", b);
536 assert_eq!(pick, Some("b"));
537 }
538
539 #[test]
540 fn rank_candidates_is_none_for_empty_input() {
541 let state = DelegationState::with_config(DelegationConfig::default());
542 assert!(
543 state
544 .rank_candidates(&[], "swarm_dispatch", bucket())
545 .is_none()
546 );
547 }
548
549 #[test]
550 fn config_defaults_match_documented_constants() {
551 let cfg = DelegationConfig::default();
552 assert!((cfg.gamma - DEFAULT_GAMMA).abs() < 1e-9);
553 assert!((cfg.delta - DEFAULT_DELTA).abs() < 1e-9);
554 assert!((cfg.kappa - DEFAULT_KAPPA).abs() < 1e-9);
555 assert!((cfg.lambda - DEFAULT_LAMBDA).abs() < 1e-9);
556 assert!(!cfg.enabled);
557 }
558}