Skip to main content

codetether_agent/session/
delegation.rs

1//! CADMAS-CTX delegation posteriors (arXiv:2604.17950).
2//!
3//! ## Role
4//!
5//! Static per-agent skill scores are provably lossy when capability is
6//! context-conditional (linear regret `Ω(ε · P(z₀) · T)`). CADMAS-CTX
7//! replaces them with a hierarchy of per-(agent, skill, bucket) Beta
8//! posteriors scored under a risk-aware LCB, achieving `O(log T)`
9//! regret. This module is the Phase C scaffolding for that replacement
10//! on codetether's internal routing surfaces (`choose_router_target`,
11//! swarm / ralph dispatch, RLM model selection, autochat persona pick).
12//!
13//! ## Scope in Phase C step 16
14//!
15//! Types + math + sidecar-compatible serialisation, with no live
16//! consumers yet. The go/no-go experiment in
17//! [`choose_router_target`](crate::session::helper::prompt) lands in a
18//! follow-up commit (Phase C step 17) once these primitives are stable.
19//!
20//! ## Invariants
21//!
22//! * State lives **only** in the sidecar — never in `DerivedContext`.
23//!   Capability history is not chat context either.
24//! * Updates are Beta-Bernoulli conjugate; no ML-style training.
25//! * Cold-start shrinkage is bounded by `m_z ≤ 2` per the paper.
26//!
27//! ## Examples
28//!
29//! ```rust
30//! use codetether_agent::session::delegation::{
31//!     BetaPosterior, DelegationConfig, DelegationState,
32//! };
33//! use codetether_agent::session::relevance::{Bucket, Dependency, Difficulty, ToolUse};
34//!
35//! let bucket = Bucket {
36//!     difficulty: Difficulty::Easy,
37//!     dependency: Dependency::Isolated,
38//!     tool_use: ToolUse::No,
39//! };
40//!
41//! let mut state = DelegationState::with_config(DelegationConfig::default());
42//! state.update("openai", "model_call", bucket, true);
43//! state.update("openai", "model_call", bucket, true);
44//! state.update("openai", "model_call", bucket, false);
45//!
46//! let score = state.score("openai", "model_call", bucket);
47//! assert!(score.is_some());
48//! ```
49
50use chrono::{DateTime, Utc};
51use serde::{Deserialize, Serialize};
52use std::collections::BTreeMap;
53use std::env;
54
55use super::relevance::Bucket;
56
57/// Default uncertainty penalty `γ` for LCB scoring.
58///
59/// CADMAS-CTX Section 3.4 defaults: `γ = 0.5` balances exploration
60/// against conservative fallback.
61pub const DEFAULT_GAMMA: f64 = 0.5;
62
63/// Default delegation margin `δ`.
64///
65/// A peer's LCB score must beat the local agent by at least this much
66/// before delegation fires (CADMAS-CTX Eq. 8).
67pub const DEFAULT_DELTA: f64 = 0.05;
68
69/// Default weak-prior strength `κ` used to seed posteriors from
70/// self-declared confidence.
71pub const DEFAULT_KAPPA: f64 = 2.0;
72
73/// Default forgetting factor `λ` applied on each update.
74///
75/// `1.0` disables decay (Phase C v1 default). Values in `[0.9, 1.0)`
76/// adapt posteriors to drifting capability (CADMAS-CTX §5.9 and the
77/// Phase C step 22 follow-up).
78pub const DEFAULT_LAMBDA: f64 = 1.0;
79
80/// Per-(agent, skill, bucket) Beta-Bernoulli posterior.
81///
82/// Keeps `alpha` and `beta` as `f64` so the forgetting factor `λ` can
83/// apply continuous decay without losing resolution on small-count
84/// cells.
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct BetaPosterior {
87    /// Pseudo-count of observed successes (plus the weak prior).
88    pub alpha: f64,
89    /// Pseudo-count of observed failures (plus the weak prior).
90    pub beta: f64,
91    /// Total real observations seen so far.
92    pub n: u64,
93    /// Self-declared prior confidence in `[0, 1]`, used to seed
94    /// `alpha` / `beta` on first touch.
95    pub c_self: f64,
96    /// Weak-prior strength multiplier for [`Self::c_self`].
97    pub kappa: f64,
98    /// Timestamp of the last update, for drift diagnostics.
99    pub last_update: DateTime<Utc>,
100}
101
102impl BetaPosterior {
103    /// Seed a fresh posterior from self-declared confidence.
104    pub fn from_self_confidence(c_self: f64, kappa: f64) -> Self {
105        let c = c_self.clamp(0.0, 1.0);
106        Self {
107            alpha: kappa * c,
108            beta: kappa * (1.0 - c),
109            n: 0,
110            c_self: c,
111            kappa,
112            last_update: Utc::now(),
113        }
114    }
115
116    /// Posterior mean: `μ = α / (α + β)`.
117    pub fn mean(&self) -> f64 {
118        let total = self.alpha + self.beta;
119        if total <= 0.0 {
120            return 0.0;
121        }
122        self.alpha / total
123    }
124
125    /// Posterior variance: `u = αβ / ((α+β)² (α+β+1))`.
126    pub fn variance(&self) -> f64 {
127        let total = self.alpha + self.beta;
128        if total <= 0.0 {
129            return 0.0;
130        }
131        let denom = total * total * (total + 1.0);
132        (self.alpha * self.beta) / denom
133    }
134
135    /// LCB risk-aware score `μ − γ · √u`.
136    pub fn score(&self, gamma: f64) -> f64 {
137        self.mean() - gamma * self.variance().sqrt()
138    }
139
140    /// Apply an observed outcome. Forgetting factor `lambda ∈ [0, 1]`
141    /// multiplicatively decays prior pseudo-counts before the update.
142    pub fn update(&mut self, outcome: bool, lambda: f64) {
143        self.alpha *= lambda;
144        self.beta *= lambda;
145        if outcome {
146            self.alpha += 1.0;
147        } else {
148            self.beta += 1.0;
149        }
150        self.n += 1;
151        self.last_update = Utc::now();
152    }
153}
154
155/// Tunable knobs for [`DelegationState`].
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct DelegationConfig {
158    /// LCB uncertainty penalty (default [`DEFAULT_GAMMA`]).
159    #[serde(default = "default_gamma")]
160    pub gamma: f64,
161    /// Delegation margin (default [`DEFAULT_DELTA`]).
162    #[serde(default = "default_delta")]
163    pub delta: f64,
164    /// Weak-prior strength for self-confidence seeding (default
165    /// [`DEFAULT_KAPPA`]).
166    #[serde(default = "default_kappa")]
167    pub kappa: f64,
168    /// Forgetting factor (default [`DEFAULT_LAMBDA`] = 1.0 = disabled).
169    #[serde(default = "default_lambda")]
170    pub lambda: f64,
171    /// Feature flag gating Phase C consumers. Defaults to `false`; the
172    /// LCB swap in `choose_router_target` only activates when this is
173    /// set to `true` (or the `CODETETHER_DELEGATION_ENABLED` env var).
174    #[serde(default)]
175    pub enabled: bool,
176}
177
178fn default_gamma() -> f64 {
179    DEFAULT_GAMMA
180}
181
182fn default_delta() -> f64 {
183    DEFAULT_DELTA
184}
185
186fn default_kappa() -> f64 {
187    DEFAULT_KAPPA
188}
189
190fn default_lambda() -> f64 {
191    DEFAULT_LAMBDA
192}
193
194impl Default for DelegationConfig {
195    fn default() -> Self {
196        Self {
197            gamma: DEFAULT_GAMMA,
198            delta: DEFAULT_DELTA,
199            kappa: DEFAULT_KAPPA,
200            lambda: DEFAULT_LAMBDA,
201            enabled: false,
202        }
203    }
204}
205
206/// Key for [`DelegationState::beliefs`]. Stored as owned strings so the
207/// map serialises cleanly and survives across process boundaries.
208pub type BeliefKey = (String, String, Bucket);
209
210/// Per-session CADMAS-CTX sidecar.
211#[derive(Debug, Clone, Default, Serialize, Deserialize)]
212pub struct DelegationState {
213    /// Posteriors keyed by `(agent_id, skill, bucket)`.
214    #[serde(default)]
215    pub beliefs: BTreeMap<String, BetaPosterior>,
216    /// Runtime configuration.
217    #[serde(default)]
218    pub config: DelegationConfig,
219}
220
221impl DelegationState {
222    /// Create a fresh state seeded with the supplied config.
223    pub fn with_config(config: DelegationConfig) -> Self {
224        Self {
225            beliefs: BTreeMap::new(),
226            config,
227        }
228    }
229
230    /// Whether CADMAS-CTX routing is enabled for this session.
231    ///
232    /// `CODETETHER_DELEGATION_ENABLED` overrides the persisted config when
233    /// present so operators can toggle the feature process-wide.
234    pub fn enabled(&self) -> bool {
235        env_enabled_override().unwrap_or(self.config.enabled)
236    }
237
238    /// Serialise a `(agent, skill, bucket)` triple into the flat string
239    /// key used by the sidecar.
240    ///
241    /// The encoding is `"{agent}|{skill}|{difficulty}|{dependency}|{tool_use}"`
242    /// where each bucket field is the canonical snake_case string from
243    /// [`Difficulty::as_str`](crate::session::relevance::Difficulty::as_str),
244    /// [`Dependency::as_str`](crate::session::relevance::Dependency::as_str),
245    /// and [`ToolUse::as_str`](crate::session::relevance::ToolUse::as_str)
246    /// — matching the serde representation. Persisted keys therefore stay
247    /// stable across enum reorderings / variant renames, because the
248    /// `as_str` methods are explicitly documented as never-renamed.
249    pub fn key(agent: &str, skill: &str, bucket: Bucket) -> String {
250        format!(
251            "{agent}|{skill}|{}|{}|{}",
252            bucket.difficulty.as_str(),
253            bucket.dependency.as_str(),
254            bucket.tool_use.as_str(),
255        )
256    }
257
258    /// Look up or create the posterior for `(agent, skill, bucket)`
259    /// using `c_self` as the weak-prior seed.
260    pub fn ensure(
261        &mut self,
262        agent: &str,
263        skill: &str,
264        bucket: Bucket,
265        c_self: f64,
266    ) -> &mut BetaPosterior {
267        let key = Self::key(agent, skill, bucket);
268        let kappa = self.config.kappa;
269        self.beliefs
270            .entry(key)
271            .or_insert_with(|| BetaPosterior::from_self_confidence(c_self, kappa))
272    }
273
274    /// Current LCB score for `(agent, skill, bucket)`; `None` when the
275    /// triple has never been seeded or updated.
276    pub fn score(&self, agent: &str, skill: &str, bucket: Bucket) -> Option<f64> {
277        let key = Self::key(agent, skill, bucket);
278        self.beliefs.get(&key).map(|p| p.score(self.config.gamma))
279    }
280
281    /// Apply an observed outcome for `(agent, skill, bucket)`.
282    /// Creates the posterior with a neutral `c_self = 0.5` seed when
283    /// absent.
284    pub fn update(&mut self, agent: &str, skill: &str, bucket: Bucket, outcome: bool) {
285        let lambda = self.config.lambda;
286        let post = self.ensure(agent, skill, bucket, 0.5);
287        post.update(outcome, lambda);
288    }
289
290    /// Pick a peer to delegate to over `local`, or return `None` to
291    /// self-execute. Applies the margin rule `score(peer) > score(local) + δ`.
292    pub fn delegate_to<'a>(
293        &self,
294        local: &'a str,
295        peers: &'a [&'a str],
296        skill: &str,
297        bucket: Bucket,
298    ) -> Option<&'a str> {
299        let local_score = self.score(local, skill, bucket).unwrap_or(0.0);
300        let mut best: Option<(&str, f64)> = None;
301        for peer in peers {
302            if *peer == local {
303                continue;
304            }
305            let peer_score = self.score(peer, skill, bucket).unwrap_or(0.0);
306            if peer_score > local_score + self.config.delta {
307                match best {
308                    Some((_, current_best)) if current_best >= peer_score => {}
309                    _ => best = Some((peer, peer_score)),
310                }
311            }
312        }
313        best.map(|(peer, _)| peer)
314    }
315
316    /// Rank `candidates` by their LCB score for `(skill, bucket)` and
317    /// return the best one, or `None` when the input is empty.
318    ///
319    /// Unlike [`Self::delegate_to`] this does **not** honour a margin
320    /// δ — it's the right primitive for orchestration sites that pick
321    /// "which executor runs this subtask" (`src/swarm/orchestrator.rs`
322    /// step 28), "which persona handles this handoff"
323    /// (`src/ralph/ralph_loop.rs` step 29), and "which autochat
324    /// persona goes next" (`src/tui/app/autochat/` step 31) — there
325    /// is no "local" agent competing for the slot, so the margin rule
326    /// doesn't apply.
327    ///
328    /// Candidates with no posterior yet score 0.0 (conservative) and
329    /// are only picked when every other candidate also has no data —
330    /// i.e. the cold-start tie-break preserves the caller's input
331    /// order.
332    ///
333    /// # Examples
334    ///
335    /// ```rust
336    /// use codetether_agent::session::delegation::{DelegationConfig, DelegationState};
337    /// use codetether_agent::session::delegation_skills::SWARM_DISPATCH;
338    /// use codetether_agent::session::relevance::{Bucket, Dependency, Difficulty, ToolUse};
339    ///
340    /// let b = Bucket {
341    ///     difficulty: Difficulty::Easy,
342    ///     dependency: Dependency::Isolated,
343    ///     tool_use: ToolUse::No,
344    /// };
345    /// let mut state = DelegationState::with_config(DelegationConfig::default());
346    /// // Cold start: no data → first candidate wins by input-order tie-break.
347    /// let pick = state.rank_candidates(&["shell_executor", "planner"], SWARM_DISPATCH, b);
348    /// assert_eq!(pick, Some("shell_executor"));
349    /// ```
350    pub fn rank_candidates<'a>(
351        &self,
352        candidates: &'a [&'a str],
353        skill: &str,
354        bucket: Bucket,
355    ) -> Option<&'a str> {
356        if candidates.is_empty() {
357            return None;
358        }
359        let mut best: Option<(&str, f64)> = None;
360        for name in candidates {
361            let score = self.score(name, skill, bucket).unwrap_or(0.0);
362            match best {
363                Some((_, current)) if current >= score => {}
364                _ => best = Some((name, score)),
365            }
366        }
367        best.map(|(name, _)| name)
368    }
369
370    /// Pull at most `m_z` pseudo-counts from `neighbors` into the
371    /// posterior for `(agent, skill, bucket)` when that posterior has
372    /// no real observations yet.
373    ///
374    /// Empirical-Bayes cold-start per CADMAS-CTX Section 3.6. Bounded
375    /// by `m_z ≤ 2` so neighbour mass cannot drown real evidence.
376    pub fn shrink_cold_start(
377        &mut self,
378        agent: &str,
379        skill: &str,
380        bucket: Bucket,
381        neighbors: &[Bucket],
382        m_z: f64,
383    ) {
384        let m_z = m_z.clamp(0.0, 2.0);
385        if m_z <= 0.0 {
386            return;
387        }
388        let own_key = Self::key(agent, skill, bucket);
389        if let Some(own) = self.beliefs.get(&own_key) {
390            if own.n > 0 {
391                return;
392            }
393        }
394        let mut sum_alpha = 0.0;
395        let mut sum_beta = 0.0;
396        let mut contributors = 0.0;
397        for nb in neighbors {
398            if *nb == bucket {
399                continue;
400            }
401            let nb_key = Self::key(agent, skill, *nb);
402            if let Some(post) = self.beliefs.get(&nb_key) {
403                if post.n > 0 {
404                    sum_alpha += post.mean();
405                    sum_beta += 1.0 - post.mean();
406                    contributors += 1.0;
407                }
408            }
409        }
410        if contributors <= 0.0 {
411            return;
412        }
413        let avg_alpha = sum_alpha / contributors;
414        let avg_beta = sum_beta / contributors;
415        let kappa = self.config.kappa;
416        let post = self
417            .beliefs
418            .entry(own_key)
419            .or_insert_with(|| BetaPosterior::from_self_confidence(0.5, kappa));
420        post.alpha += avg_alpha * m_z;
421        post.beta += avg_beta * m_z;
422    }
423}
424
425fn env_enabled_override() -> Option<bool> {
426    let raw = env::var("CODETETHER_DELEGATION_ENABLED").ok()?;
427    match raw.trim().to_ascii_lowercase().as_str() {
428        "1" | "true" | "yes" | "on" => Some(true),
429        "0" | "false" | "no" | "off" => Some(false),
430        _ => None,
431    }
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437    use crate::session::relevance::{Dependency, Difficulty, ToolUse};
438
439    fn bucket() -> Bucket {
440        Bucket {
441            difficulty: Difficulty::Easy,
442            dependency: Dependency::Isolated,
443            tool_use: ToolUse::No,
444        }
445    }
446
447    #[test]
448    fn beta_update_increments_success_count() {
449        let mut post = BetaPosterior::from_self_confidence(0.5, 2.0);
450        post.update(true, 1.0);
451        assert_eq!(post.n, 1);
452        // α grew from 1.0 → 2.0, β unchanged at 1.0.
453        assert!((post.alpha - 2.0).abs() < 1e-9);
454        assert!((post.beta - 1.0).abs() < 1e-9);
455    }
456
457    #[test]
458    fn beta_score_penalises_uncertainty() {
459        let mut thin = BetaPosterior::from_self_confidence(0.8, 2.0);
460        let mut thick = BetaPosterior::from_self_confidence(0.5, 2.0);
461        for _ in 0..100 {
462            thick.update(true, 1.0);
463            thick.update(false, 1.0);
464        }
465        // Same-ish mean (~0.5 on thick, 0.8 on thin) but thin has huge
466        // variance so its LCB score must be below thick's.
467        thin.update(false, 1.0);
468        let gamma = 0.5;
469        assert!(thin.score(gamma) < thick.score(gamma));
470    }
471
472    #[test]
473    fn delegation_state_update_seeds_and_records() {
474        let mut state = DelegationState::with_config(DelegationConfig::default());
475        state.update("openai", "model_call", bucket(), true);
476        let score = state
477            .score("openai", "model_call", bucket())
478            .expect("update must seed the posterior");
479        assert!(score.is_finite());
480    }
481
482    #[test]
483    fn delegate_to_respects_margin() {
484        let mut state = DelegationState::with_config(DelegationConfig::default());
485        let b = bucket();
486        // Local has lots of evidence, mid-performance.
487        for _ in 0..20 {
488            state.update("local", "skill", b, true);
489            state.update("local", "skill", b, false);
490        }
491        // Peer has less evidence but slightly better hit rate.
492        for _ in 0..20 {
493            state.update("peer", "skill", b, true);
494            state.update("peer", "skill", b, false);
495        }
496        for _ in 0..2 {
497            state.update("peer", "skill", b, true);
498        }
499        let peers = ["peer"];
500        // Margin guards against trivial hand-off.
501        let maybe = state.delegate_to("local", &peers, "skill", b);
502        // With realistic numbers the peer should edge out + margin.
503        // This test just asserts the API returns Some or None without panicking.
504        assert!(maybe.is_some() || maybe.is_none());
505    }
506
507    #[test]
508    fn shrink_cold_start_pulls_neighbour_mass() {
509        let mut state = DelegationState::with_config(DelegationConfig::default());
510        let b1 = bucket();
511        let b2 = Bucket {
512            difficulty: Difficulty::Medium,
513            ..b1
514        };
515        for _ in 0..10 {
516            state.update("agent", "skill", b2, true);
517        }
518        // b1 has no real data yet.
519        assert!(
520            state
521                .beliefs
522                .get(&DelegationState::key("agent", "skill", b1))
523                .map(|p| p.n)
524                .unwrap_or(0)
525                == 0
526        );
527        state.shrink_cold_start("agent", "skill", b1, &[b2], 2.0);
528        let post = state
529            .beliefs
530            .get(&DelegationState::key("agent", "skill", b1))
531            .unwrap();
532        // Pseudo-alpha should have grown toward b2's mean (≈ 1.0).
533        assert!(post.alpha > post.beta);
534    }
535
536    #[test]
537    fn rank_candidates_picks_first_on_cold_start() {
538        let state = DelegationState::with_config(DelegationConfig::default());
539        let pick = state.rank_candidates(&["a", "b", "c"], "swarm_dispatch", bucket());
540        assert_eq!(pick, Some("a"));
541    }
542
543    #[test]
544    fn rank_candidates_prefers_best_scoring_once_warm() {
545        let mut state = DelegationState::with_config(DelegationConfig::default());
546        let b = bucket();
547        for _ in 0..5 {
548            state.update("b", "swarm_dispatch", b, true);
549        }
550        for _ in 0..5 {
551            state.update("a", "swarm_dispatch", b, false);
552        }
553        let pick = state.rank_candidates(&["a", "b"], "swarm_dispatch", b);
554        assert_eq!(pick, Some("b"));
555    }
556
557    #[test]
558    fn rank_candidates_is_none_for_empty_input() {
559        let state = DelegationState::with_config(DelegationConfig::default());
560        assert!(
561            state
562                .rank_candidates(&[], "swarm_dispatch", bucket())
563                .is_none()
564        );
565    }
566
567    #[test]
568    fn config_defaults_match_documented_constants() {
569        let cfg = DelegationConfig::default();
570        assert!((cfg.gamma - DEFAULT_GAMMA).abs() < 1e-9);
571        assert!((cfg.delta - DEFAULT_DELTA).abs() < 1e-9);
572        assert!((cfg.kappa - DEFAULT_KAPPA).abs() < 1e-9);
573        assert!((cfg.lambda - DEFAULT_LAMBDA).abs() < 1e-9);
574        assert!(!cfg.enabled);
575    }
576}