awaken_runtime_contract/registry_spec/model_pool_spec.rs
1//! Serializable model pool: a named set of model offerings that behaves like
2//! a single model to agents, with sticky per-session routing and failover.
3//!
4//! Carved out of `registry_spec/mod.rs` so the file stays under the
5//! repository's per-file line cap. Public types are re-exported from
6//! `registry_spec` so import paths remain unchanged.
7//!
8//! A pool is referenced by `AgentSpec.model_id` exactly where a `ModelSpec`
9//! id would be. To the runtime it resolves to a single `LlmExecutor`, so the
10//! run loop, streaming, retry, and context-window clamping all treat it
11//! identically to a plain model. Each agent is routed to one stable "home"
12//! member (prompt-cache affinity); the active member is held for the duration
13//! of a session and only changes on sustained failure or quota pressure.
14
15use serde::{Deserialize, Serialize};
16
17/// A named pool of member models, addressable like a single model.
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, schemars::JsonSchema)]
19#[serde(deny_unknown_fields)]
20pub struct ModelPoolSpec {
21 /// Stable id, unique across the combined model + pool id namespace.
22 pub id: String,
23 /// Ordered set of member models. Must be non-empty.
24 pub members: Vec<PoolMemberSpec>,
25 /// Home-selection and stickiness policy.
26 #[serde(default)]
27 pub routing: PoolRoutingPolicy,
28 /// When the pool abandons the active member for another.
29 #[serde(default)]
30 pub switch: PoolSwitchPolicy,
31}
32
33/// One member of a [`ModelPoolSpec`], referencing a `ModelSpec` by id.
34#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, schemars::JsonSchema)]
35#[serde(deny_unknown_fields)]
36pub struct PoolMemberSpec {
37 /// References a `ModelSpec.id` in the same registry.
38 pub model_id: String,
39 /// Relative selection weight for home distribution. `None` is treated
40 /// as `1`. Must be greater than zero when present.
41 #[serde(default, skip_serializing_if = "Option::is_none")]
42 pub weight: Option<u32>,
43 /// Whether the member is a home candidate or a failover-only target.
44 #[serde(default)]
45 pub role: PoolMemberRole,
46}
47
48/// Eligibility of a pool member for initial home selection.
49#[derive(
50 Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize, schemars::JsonSchema,
51)]
52#[serde(rename_all = "snake_case")]
53pub enum PoolMemberRole {
54 /// Eligible as both a home target and a failover target.
55 #[default]
56 Member,
57 /// Never selected as home; used only after failover from other members.
58 FailoverOnly,
59}
60
61/// How a session picks its initial member and how long that choice is held.
62#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize, schemars::JsonSchema)]
63#[serde(deny_unknown_fields)]
64pub struct PoolRoutingPolicy {
65 /// Strategy for choosing the home member at session start.
66 #[serde(default)]
67 pub home: HomeStrategy,
68 /// Lifetime over which the active member is held.
69 #[serde(default)]
70 pub sticky_scope: StickyScope,
71}
72
73/// Strategy for choosing a session's home member.
74#[derive(
75 Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize, schemars::JsonSchema,
76)]
77#[serde(rename_all = "snake_case")]
78pub enum HomeStrategy {
79 /// Stable hash of the routing key over healthy members: the same agent
80 /// always homes to the same member (prompt-cache affinity) while
81 /// different agents spread across the pool.
82 #[default]
83 Deterministic,
84 /// Assign homes round-robin as sessions start. Spreads load but provides
85 /// no cache affinity across process restarts.
86 RoundRobin,
87 /// Always home to the first healthy member in declaration order.
88 FirstHealthy,
89}
90
91/// Lifetime over which a session holds its active member.
92#[derive(
93 Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize, schemars::JsonSchema,
94)]
95#[serde(rename_all = "snake_case")]
96pub enum StickyScope {
97 /// Hold routing for the lifetime of a thread (conversation), maximizing
98 /// within-conversation prompt-cache reuse across runs.
99 #[default]
100 Thread,
101 /// Hold routing only for a single run.
102 Run,
103}
104
105/// When the pool abandons the active member for another one.
106#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, schemars::JsonSchema)]
107#[serde(deny_unknown_fields)]
108pub struct PoolSwitchPolicy {
109 /// Switch when the active member's circuit breaker is open (sustained
110 /// failure). Transient single-request errors are absorbed by the
111 /// member's own retry policy and never trigger a switch.
112 #[serde(default = "default_true")]
113 pub on_circuit_open: bool,
114 /// Switch on rate-limit / overload (quota) signals.
115 #[serde(default = "default_true")]
116 pub on_quota: bool,
117 /// Only treat a quota signal as switch-worthy when the provider's
118 /// retry-after hint meets or exceeds this many seconds. `None` switches
119 /// on any quota signal.
120 #[serde(default, skip_serializing_if = "Option::is_none")]
121 pub quota_retry_after_threshold_secs: Option<u64>,
122 /// Switch on permanent member errors (unauthorized, model-not-found).
123 #[serde(default = "default_true")]
124 pub on_permanent: bool,
125 /// Cap on consecutive member switches within one failure incident for a
126 /// session. A successful request or cleanly drained stream resets this
127 /// budget so long-lived threads can recover from future independent
128 /// incidents. `None` is unbounded (still bounded by the number of members
129 /// for any single call).
130 #[serde(default, skip_serializing_if = "Option::is_none")]
131 pub max_switches_per_session: Option<u32>,
132}
133
134impl Default for PoolSwitchPolicy {
135 fn default() -> Self {
136 Self {
137 on_circuit_open: true,
138 on_quota: true,
139 quota_retry_after_threshold_secs: None,
140 on_permanent: true,
141 max_switches_per_session: None,
142 }
143 }
144}
145
146fn default_true() -> bool {
147 true
148}
149
150impl ModelPoolSpec {
151 /// Convenience constructor for tests and bootstrap code. Routing and
152 /// switch policies default; members are taken as `Member`-role with no
153 /// explicit weight.
154 pub fn new<I, S>(id: impl Into<String>, member_model_ids: I) -> Self
155 where
156 I: IntoIterator<Item = S>,
157 S: Into<String>,
158 {
159 Self {
160 id: id.into(),
161 members: member_model_ids
162 .into_iter()
163 .map(|model_id| PoolMemberSpec {
164 model_id: model_id.into(),
165 weight: None,
166 role: PoolMemberRole::Member,
167 })
168 .collect(),
169 routing: PoolRoutingPolicy::default(),
170 switch: PoolSwitchPolicy::default(),
171 }
172 }
173}