Skip to main content

codetether_agent/session/
delegation.rs

1//! CADMAS-CTX delegation posteriors (arXiv:2604.17950).
2//!
3//! ## Role
4//!
5//! Static per-agent skill scores are provably lossy when capability is
6//! context-conditional (linear regret `Ω(ε · P(z₀) · T)`). CADMAS-CTX
7//! replaces them with a hierarchy of per-(agent, skill, bucket) Beta
8//! posteriors scored under a risk-aware LCB, achieving `O(log T)`
9//! regret. This module is the Phase C scaffolding for that replacement
10//! on codetether's internal routing surfaces (`choose_router_target`,
11//! swarm / ralph dispatch, RLM model selection, autochat persona pick).
12//!
13//! ## Scope in Phase C step 16
14//!
15//! Types + math + sidecar-compatible serialisation, with no live
16//! consumers yet. The go/no-go experiment in
17//! [`choose_router_target`](crate::session::helper::prompt) lands in a
18//! follow-up commit (Phase C step 17) once these primitives are stable.
19//!
20//! ## Invariants
21//!
22//! * State lives **only** in the sidecar — never in `DerivedContext`.
23//!   Capability history is not chat context either.
24//! * Updates are Beta-Bernoulli conjugate; no ML-style training.
25//! * Cold-start shrinkage is bounded by `m_z ≤ 2` per the paper.
26//!
27//! ## Examples
28//!
29//! ```rust
30//! use codetether_agent::session::delegation::{
31//!     BetaPosterior, DelegationConfig, DelegationState,
32//! };
33//! use codetether_agent::session::relevance::{Bucket, Dependency, Difficulty, ToolUse};
34//!
35//! let bucket = Bucket {
36//!     difficulty: Difficulty::Easy,
37//!     dependency: Dependency::Isolated,
38//!     tool_use: ToolUse::No,
39//! };
40//!
41//! let mut state = DelegationState::with_config(DelegationConfig::default());
42//! state.update("openai", "model_call", bucket, true);
43//! state.update("openai", "model_call", bucket, true);
44//! state.update("openai", "model_call", bucket, false);
45//!
46//! let score = state.score("openai", "model_call", bucket);
47//! assert!(score.is_some());
48//! ```
49
50use chrono::{DateTime, Utc};
51use serde::{Deserialize, Serialize};
52use std::collections::BTreeMap;
53
54use super::relevance::Bucket;
55
56/// Default uncertainty penalty `γ` for LCB scoring.
57///
58/// CADMAS-CTX Section 3.4 defaults: `γ = 0.5` balances exploration
59/// against conservative fallback.
60pub const DEFAULT_GAMMA: f64 = 0.5;
61
62/// Default delegation margin `δ`.
63///
64/// A peer's LCB score must beat the local agent by at least this much
65/// before delegation fires (CADMAS-CTX Eq. 8).
66pub const DEFAULT_DELTA: f64 = 0.05;
67
68/// Default weak-prior strength `κ` used to seed posteriors from
69/// self-declared confidence.
70pub const DEFAULT_KAPPA: f64 = 2.0;
71
72/// Default forgetting factor `λ` applied on each update.
73///
74/// `1.0` disables decay (Phase C v1 default). Values in `[0.9, 1.0)`
75/// adapt posteriors to drifting capability (CADMAS-CTX §5.9 and the
76/// Phase C step 22 follow-up).
77pub const DEFAULT_LAMBDA: f64 = 1.0;
78
79/// Per-(agent, skill, bucket) Beta-Bernoulli posterior.
80///
81/// Keeps `alpha` and `beta` as `f64` so the forgetting factor `λ` can
82/// apply continuous decay without losing resolution on small-count
83/// cells.
84#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct BetaPosterior {
86    /// Pseudo-count of observed successes (plus the weak prior).
87    pub alpha: f64,
88    /// Pseudo-count of observed failures (plus the weak prior).
89    pub beta: f64,
90    /// Total real observations seen so far.
91    pub n: u64,
92    /// Self-declared prior confidence in `[0, 1]`, used to seed
93    /// `alpha` / `beta` on first touch.
94    pub c_self: f64,
95    /// Weak-prior strength multiplier for [`Self::c_self`].
96    pub kappa: f64,
97    /// Timestamp of the last update, for drift diagnostics.
98    pub last_update: DateTime<Utc>,
99}
100
101impl BetaPosterior {
102    /// Seed a fresh posterior from self-declared confidence.
103    pub fn from_self_confidence(c_self: f64, kappa: f64) -> Self {
104        let c = c_self.clamp(0.0, 1.0);
105        Self {
106            alpha: kappa * c,
107            beta: kappa * (1.0 - c),
108            n: 0,
109            c_self: c,
110            kappa,
111            last_update: Utc::now(),
112        }
113    }
114
115    /// Posterior mean: `μ = α / (α + β)`.
116    pub fn mean(&self) -> f64 {
117        let total = self.alpha + self.beta;
118        if total <= 0.0 {
119            return 0.0;
120        }
121        self.alpha / total
122    }
123
124    /// Posterior variance: `u = αβ / ((α+β)² (α+β+1))`.
125    pub fn variance(&self) -> f64 {
126        let total = self.alpha + self.beta;
127        if total <= 0.0 {
128            return 0.0;
129        }
130        let denom = total * total * (total + 1.0);
131        (self.alpha * self.beta) / denom
132    }
133
134    /// LCB risk-aware score `μ − γ · √u`.
135    pub fn score(&self, gamma: f64) -> f64 {
136        self.mean() - gamma * self.variance().sqrt()
137    }
138
139    /// Apply an observed outcome. Forgetting factor `lambda ∈ [0, 1]`
140    /// multiplicatively decays prior pseudo-counts before the update.
141    pub fn update(&mut self, outcome: bool, lambda: f64) {
142        self.alpha *= lambda;
143        self.beta *= lambda;
144        if outcome {
145            self.alpha += 1.0;
146        } else {
147            self.beta += 1.0;
148        }
149        self.n += 1;
150        self.last_update = Utc::now();
151    }
152}
153
154/// Tunable knobs for [`DelegationState`].
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct DelegationConfig {
157    /// LCB uncertainty penalty (default [`DEFAULT_GAMMA`]).
158    #[serde(default = "default_gamma")]
159    pub gamma: f64,
160    /// Delegation margin (default [`DEFAULT_DELTA`]).
161    #[serde(default = "default_delta")]
162    pub delta: f64,
163    /// Weak-prior strength for self-confidence seeding (default
164    /// [`DEFAULT_KAPPA`]).
165    #[serde(default = "default_kappa")]
166    pub kappa: f64,
167    /// Forgetting factor (default [`DEFAULT_LAMBDA`] = 1.0 = disabled).
168    #[serde(default = "default_lambda")]
169    pub lambda: f64,
170    /// Feature flag gating Phase C consumers. Defaults to `false`; the
171    /// LCB swap in `choose_router_target` only activates when this is
172    /// set to `true` (or the `CODETETHER_DELEGATION_ENABLED` env var).
173    #[serde(default)]
174    pub enabled: bool,
175}
176
177fn default_gamma() -> f64 {
178    DEFAULT_GAMMA
179}
180
181fn default_delta() -> f64 {
182    DEFAULT_DELTA
183}
184
185fn default_kappa() -> f64 {
186    DEFAULT_KAPPA
187}
188
189fn default_lambda() -> f64 {
190    DEFAULT_LAMBDA
191}
192
193impl Default for DelegationConfig {
194    fn default() -> Self {
195        Self {
196            gamma: DEFAULT_GAMMA,
197            delta: DEFAULT_DELTA,
198            kappa: DEFAULT_KAPPA,
199            lambda: DEFAULT_LAMBDA,
200            enabled: false,
201        }
202    }
203}
204
205/// Key for [`DelegationState::beliefs`]. Stored as owned strings so the
206/// map serialises cleanly and survives across process boundaries.
207pub type BeliefKey = (String, String, Bucket);
208
209/// Per-session CADMAS-CTX sidecar.
210#[derive(Debug, Clone, Default, Serialize, Deserialize)]
211pub struct DelegationState {
212    /// Posteriors keyed by `(agent_id, skill, bucket)`.
213    #[serde(default)]
214    pub beliefs: BTreeMap<String, BetaPosterior>,
215    /// Runtime configuration.
216    #[serde(default)]
217    pub config: DelegationConfig,
218}
219
220impl DelegationState {
221    /// Create a fresh state seeded with the supplied config.
222    pub fn with_config(config: DelegationConfig) -> Self {
223        Self {
224            beliefs: BTreeMap::new(),
225            config,
226        }
227    }
228
229    /// Serialise a `(agent, skill, bucket)` triple into the flat string
230    /// key used by the sidecar.
231    ///
232    /// The encoding is `"{agent}|{skill}|{difficulty}|{dependency}|{tool_use}"`
233    /// where each bucket field is the canonical snake_case string from
234    /// [`Difficulty::as_str`](crate::session::relevance::Difficulty::as_str),
235    /// [`Dependency::as_str`](crate::session::relevance::Dependency::as_str),
236    /// and [`ToolUse::as_str`](crate::session::relevance::ToolUse::as_str)
237    /// — matching the serde representation. Persisted keys therefore stay
238    /// stable across enum reorderings / variant renames, because the
239    /// `as_str` methods are explicitly documented as never-renamed.
240    pub fn key(agent: &str, skill: &str, bucket: Bucket) -> String {
241        format!(
242            "{agent}|{skill}|{}|{}|{}",
243            bucket.difficulty.as_str(),
244            bucket.dependency.as_str(),
245            bucket.tool_use.as_str(),
246        )
247    }
248
249    /// Look up or create the posterior for `(agent, skill, bucket)`
250    /// using `c_self` as the weak-prior seed.
251    pub fn ensure(
252        &mut self,
253        agent: &str,
254        skill: &str,
255        bucket: Bucket,
256        c_self: f64,
257    ) -> &mut BetaPosterior {
258        let key = Self::key(agent, skill, bucket);
259        let kappa = self.config.kappa;
260        self.beliefs
261            .entry(key)
262            .or_insert_with(|| BetaPosterior::from_self_confidence(c_self, kappa))
263    }
264
265    /// Current LCB score for `(agent, skill, bucket)`; `None` when the
266    /// triple has never been seeded or updated.
267    pub fn score(&self, agent: &str, skill: &str, bucket: Bucket) -> Option<f64> {
268        let key = Self::key(agent, skill, bucket);
269        self.beliefs.get(&key).map(|p| p.score(self.config.gamma))
270    }
271
272    /// Apply an observed outcome for `(agent, skill, bucket)`.
273    /// Creates the posterior with a neutral `c_self = 0.5` seed when
274    /// absent.
275    pub fn update(&mut self, agent: &str, skill: &str, bucket: Bucket, outcome: bool) {
276        let lambda = self.config.lambda;
277        let post = self.ensure(agent, skill, bucket, 0.5);
278        post.update(outcome, lambda);
279    }
280
281    /// Pick a peer to delegate to over `local`, or return `None` to
282    /// self-execute. Applies the margin rule `score(peer) > score(local) + δ`.
283    pub fn delegate_to<'a>(
284        &self,
285        local: &'a str,
286        peers: &'a [&'a str],
287        skill: &str,
288        bucket: Bucket,
289    ) -> Option<&'a str> {
290        let local_score = self.score(local, skill, bucket).unwrap_or(0.0);
291        let mut best: Option<(&str, f64)> = None;
292        for peer in peers {
293            if *peer == local {
294                continue;
295            }
296            let peer_score = self.score(peer, skill, bucket).unwrap_or(0.0);
297            if peer_score > local_score + self.config.delta {
298                match best {
299                    Some((_, current_best)) if current_best >= peer_score => {}
300                    _ => best = Some((peer, peer_score)),
301                }
302            }
303        }
304        best.map(|(peer, _)| peer)
305    }
306
307    /// Rank `candidates` by their LCB score for `(skill, bucket)` and
308    /// return the best one, or `None` when the input is empty.
309    ///
310    /// Unlike [`Self::delegate_to`] this does **not** honour a margin
311    /// δ — it's the right primitive for orchestration sites that pick
312    /// "which executor runs this subtask" (`src/swarm/orchestrator.rs`
313    /// step 28), "which persona handles this handoff"
314    /// (`src/ralph/ralph_loop.rs` step 29), and "which autochat
315    /// persona goes next" (`src/tui/app/autochat/` step 31) — there
316    /// is no "local" agent competing for the slot, so the margin rule
317    /// doesn't apply.
318    ///
319    /// Candidates with no posterior yet score 0.0 (conservative) and
320    /// are only picked when every other candidate also has no data —
321    /// i.e. the cold-start tie-break preserves the caller's input
322    /// order.
323    ///
324    /// # Examples
325    ///
326    /// ```rust
327    /// use codetether_agent::session::delegation::{DelegationConfig, DelegationState};
328    /// use codetether_agent::session::delegation_skills::SWARM_DISPATCH;
329    /// use codetether_agent::session::relevance::{Bucket, Dependency, Difficulty, ToolUse};
330    ///
331    /// let b = Bucket {
332    ///     difficulty: Difficulty::Easy,
333    ///     dependency: Dependency::Isolated,
334    ///     tool_use: ToolUse::No,
335    /// };
336    /// let mut state = DelegationState::with_config(DelegationConfig::default());
337    /// // Cold start: no data → first candidate wins by input-order tie-break.
338    /// let pick = state.rank_candidates(&["shell_executor", "planner"], SWARM_DISPATCH, b);
339    /// assert_eq!(pick, Some("shell_executor"));
340    /// ```
341    pub fn rank_candidates<'a>(
342        &self,
343        candidates: &'a [&'a str],
344        skill: &str,
345        bucket: Bucket,
346    ) -> Option<&'a str> {
347        if candidates.is_empty() {
348            return None;
349        }
350        let mut best: Option<(&str, f64)> = None;
351        for name in candidates {
352            let score = self.score(name, skill, bucket).unwrap_or(0.0);
353            match best {
354                Some((_, current)) if current >= score => {}
355                _ => best = Some((name, score)),
356            }
357        }
358        best.map(|(name, _)| name)
359    }
360
361    /// Pull at most `m_z` pseudo-counts from `neighbors` into the
362    /// posterior for `(agent, skill, bucket)` when that posterior has
363    /// no real observations yet.
364    ///
365    /// Empirical-Bayes cold-start per CADMAS-CTX Section 3.6. Bounded
366    /// by `m_z ≤ 2` so neighbour mass cannot drown real evidence.
367    pub fn shrink_cold_start(
368        &mut self,
369        agent: &str,
370        skill: &str,
371        bucket: Bucket,
372        neighbors: &[Bucket],
373        m_z: f64,
374    ) {
375        let m_z = m_z.clamp(0.0, 2.0);
376        if m_z <= 0.0 {
377            return;
378        }
379        let own_key = Self::key(agent, skill, bucket);
380        if let Some(own) = self.beliefs.get(&own_key) {
381            if own.n > 0 {
382                return;
383            }
384        }
385        let mut sum_alpha = 0.0;
386        let mut sum_beta = 0.0;
387        let mut contributors = 0.0;
388        for nb in neighbors {
389            if *nb == bucket {
390                continue;
391            }
392            let nb_key = Self::key(agent, skill, *nb);
393            if let Some(post) = self.beliefs.get(&nb_key) {
394                if post.n > 0 {
395                    sum_alpha += post.mean();
396                    sum_beta += 1.0 - post.mean();
397                    contributors += 1.0;
398                }
399            }
400        }
401        if contributors <= 0.0 {
402            return;
403        }
404        let avg_alpha = sum_alpha / contributors;
405        let avg_beta = sum_beta / contributors;
406        let kappa = self.config.kappa;
407        let post = self
408            .beliefs
409            .entry(own_key)
410            .or_insert_with(|| BetaPosterior::from_self_confidence(0.5, kappa));
411        post.alpha += avg_alpha * m_z;
412        post.beta += avg_beta * m_z;
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419    use crate::session::relevance::{Dependency, Difficulty, ToolUse};
420
421    fn bucket() -> Bucket {
422        Bucket {
423            difficulty: Difficulty::Easy,
424            dependency: Dependency::Isolated,
425            tool_use: ToolUse::No,
426        }
427    }
428
429    #[test]
430    fn beta_update_increments_success_count() {
431        let mut post = BetaPosterior::from_self_confidence(0.5, 2.0);
432        post.update(true, 1.0);
433        assert_eq!(post.n, 1);
434        // α grew from 1.0 → 2.0, β unchanged at 1.0.
435        assert!((post.alpha - 2.0).abs() < 1e-9);
436        assert!((post.beta - 1.0).abs() < 1e-9);
437    }
438
439    #[test]
440    fn beta_score_penalises_uncertainty() {
441        let mut thin = BetaPosterior::from_self_confidence(0.8, 2.0);
442        let mut thick = BetaPosterior::from_self_confidence(0.5, 2.0);
443        for _ in 0..100 {
444            thick.update(true, 1.0);
445            thick.update(false, 1.0);
446        }
447        // Same-ish mean (~0.5 on thick, 0.8 on thin) but thin has huge
448        // variance so its LCB score must be below thick's.
449        thin.update(false, 1.0);
450        let gamma = 0.5;
451        assert!(thin.score(gamma) < thick.score(gamma));
452    }
453
454    #[test]
455    fn delegation_state_update_seeds_and_records() {
456        let mut state = DelegationState::with_config(DelegationConfig::default());
457        state.update("openai", "model_call", bucket(), true);
458        let score = state
459            .score("openai", "model_call", bucket())
460            .expect("update must seed the posterior");
461        assert!(score.is_finite());
462    }
463
464    #[test]
465    fn delegate_to_respects_margin() {
466        let mut state = DelegationState::with_config(DelegationConfig::default());
467        let b = bucket();
468        // Local has lots of evidence, mid-performance.
469        for _ in 0..20 {
470            state.update("local", "skill", b, true);
471            state.update("local", "skill", b, false);
472        }
473        // Peer has less evidence but slightly better hit rate.
474        for _ in 0..20 {
475            state.update("peer", "skill", b, true);
476            state.update("peer", "skill", b, false);
477        }
478        for _ in 0..2 {
479            state.update("peer", "skill", b, true);
480        }
481        let peers = ["peer"];
482        // Margin guards against trivial hand-off.
483        let maybe = state.delegate_to("local", &peers, "skill", b);
484        // With realistic numbers the peer should edge out + margin.
485        // This test just asserts the API returns Some or None without panicking.
486        assert!(maybe.is_some() || maybe.is_none());
487    }
488
489    #[test]
490    fn shrink_cold_start_pulls_neighbour_mass() {
491        let mut state = DelegationState::with_config(DelegationConfig::default());
492        let b1 = bucket();
493        let b2 = Bucket {
494            difficulty: Difficulty::Medium,
495            ..b1
496        };
497        for _ in 0..10 {
498            state.update("agent", "skill", b2, true);
499        }
500        // b1 has no real data yet.
501        assert!(
502            state
503                .beliefs
504                .get(&DelegationState::key("agent", "skill", b1))
505                .map(|p| p.n)
506                .unwrap_or(0)
507                == 0
508        );
509        state.shrink_cold_start("agent", "skill", b1, &[b2], 2.0);
510        let post = state
511            .beliefs
512            .get(&DelegationState::key("agent", "skill", b1))
513            .unwrap();
514        // Pseudo-alpha should have grown toward b2's mean (≈ 1.0).
515        assert!(post.alpha > post.beta);
516    }
517
518    #[test]
519    fn rank_candidates_picks_first_on_cold_start() {
520        let state = DelegationState::with_config(DelegationConfig::default());
521        let pick = state.rank_candidates(&["a", "b", "c"], "swarm_dispatch", bucket());
522        assert_eq!(pick, Some("a"));
523    }
524
525    #[test]
526    fn rank_candidates_prefers_best_scoring_once_warm() {
527        let mut state = DelegationState::with_config(DelegationConfig::default());
528        let b = bucket();
529        for _ in 0..5 {
530            state.update("b", "swarm_dispatch", b, true);
531        }
532        for _ in 0..5 {
533            state.update("a", "swarm_dispatch", b, false);
534        }
535        let pick = state.rank_candidates(&["a", "b"], "swarm_dispatch", b);
536        assert_eq!(pick, Some("b"));
537    }
538
539    #[test]
540    fn rank_candidates_is_none_for_empty_input() {
541        let state = DelegationState::with_config(DelegationConfig::default());
542        assert!(
543            state
544                .rank_candidates(&[], "swarm_dispatch", bucket())
545                .is_none()
546        );
547    }
548
549    #[test]
550    fn config_defaults_match_documented_constants() {
551        let cfg = DelegationConfig::default();
552        assert!((cfg.gamma - DEFAULT_GAMMA).abs() < 1e-9);
553        assert!((cfg.delta - DEFAULT_DELTA).abs() < 1e-9);
554        assert!((cfg.kappa - DEFAULT_KAPPA).abs() < 1e-9);
555        assert!((cfg.lambda - DEFAULT_LAMBDA).abs() < 1e-9);
556        assert!(!cfg.enabled);
557    }
558}