lmm_agent/cognition/learning/config.rs
1// Copyright 2026 Mahmoud Harmouch.
2//
3// Licensed under the MIT license
4// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
5// option. This file may not be copied, modified, or distributed
6// except according to those terms.
7
8//! # `LearningConfig` - HELM hyperparameter configuration.
9//!
10//! All hyperparameters for the HELM learning engine are consolidated here so
11//! callers can tune the system with a single struct rather than passing
12//! individual values to every sub-module.
13//!
14//! ## Defaults
15//!
16//! | Field | Default | Description |
17//! |------------------------|---------|---------------------------------------------------|
18//! | `alpha` | 0.1 | Q-learning rate |
19//! | `gamma` | 0.9 | Discount factor |
20//! | `epsilon` | 0.3 | Initial ε-greedy exploration probability |
21//! | `epsilon_decay` | 0.99 | Per-episode ε decay multiplier |
22//! | `epsilon_min` | 0.01 | Minimum ε floor |
23//! | `distill_top_k` | 8 | Cold entries ingested per distillation pass |
24//! | `distill_threshold` | 0.4 | Minimum reward for distillation eligibility |
25//! | `elastic_lambda` | 0.5 | Elastic penalty strength |
26//! | `elastic_pin_count` | 3 | Activation count before an entry is pinned |
27//! | `federated_blend` | 0.5 | Local weight in federated merge: 0=remote, 1=local|
28//! | `meta_top_k` | 3 | Prototypes considered for meta-adaptation |
29//! | `pmi_min_count` | 2 | Minimum co-occurrence count for PMI pairs |
30//! | `active_modes` | all | Bitmask of enabled `LearningMode`s |
31//!
32//! ## Examples
33//!
34//! ```rust
35//! use lmm_agent::cognition::learning::config::LearningConfig;
36//!
37//! let cfg = LearningConfig::default();
38//! assert_eq!(cfg.alpha, 0.1);
39//! assert_eq!(cfg.gamma, 0.9);
40//! assert!(cfg.epsilon > 0.0);
41//! ```
42
43use crate::types::LearningMode;
44use serde::{Deserialize, Serialize};
45use std::collections::HashSet;
46
47/// Full HELM hyperparameter configuration.
48///
49/// # Examples
50///
51/// ```rust
52/// use lmm_agent::cognition::learning::config::LearningConfig;
53///
54/// let cfg = LearningConfig::builder()
55/// .alpha(0.05)
56/// .gamma(0.95)
57/// .epsilon(0.2)
58/// .build();
59///
60/// assert_eq!(cfg.alpha, 0.05);
61/// ```
62#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
63pub struct LearningConfig {
64 /// Temporal-difference learning rate α ∈ (0, 1].
65 pub alpha: f64,
66
67 /// Discount factor γ ∈ [0, 1].
68 pub gamma: f64,
69
70 /// Initial ε-greedy exploration probability ∈ (0, 1].
71 pub epsilon: f64,
72
73 /// Per-episode ε decay multiplier ∈ (0, 1].
74 pub epsilon_decay: f64,
75
76 /// Minimum exploration probability floor.
77 pub epsilon_min: f64,
78
79 /// Number of top cold entries promoted to the knowledge index per distillation.
80 pub distill_top_k: usize,
81
82 /// Minimum reward score required for a cold entry to be distillation-eligible.
83 pub distill_threshold: f64,
84
85 /// Elastic penalty strength λ.
86 pub elastic_lambda: f64,
87
88 /// Number of recall activations before an entry is considered "pinned."
89 pub elastic_pin_count: usize,
90
91 /// Blend weight for federated merge: 1.0 = keep local only, 0.0 = remote only.
92 pub federated_blend: f64,
93
94 /// Number of task prototypes considered when computing meta-adaptation offsets.
95 pub meta_top_k: usize,
96
97 /// Minimum token-pair co-occurrence count required before PMI is computed.
98 pub pmi_min_count: usize,
99
100 /// Set of learning modes that are active.
101 pub active_modes: HashSet<LearningMode>,
102}
103
104impl Default for LearningConfig {
105 fn default() -> Self {
106 Self {
107 alpha: 0.1,
108 gamma: 0.9,
109 epsilon: 0.3,
110 epsilon_decay: 0.99,
111 epsilon_min: 0.01,
112 distill_top_k: 8,
113 distill_threshold: 0.4,
114 elastic_lambda: 0.5,
115 elastic_pin_count: 3,
116 federated_blend: 0.5,
117 meta_top_k: 3,
118 pmi_min_count: 2,
119 active_modes: LearningMode::all(),
120 }
121 }
122}
123
124impl LearningConfig {
125 /// Returns a [`LearningConfigBuilder`] for ergonomic construction.
126 pub fn builder() -> LearningConfigBuilder {
127 LearningConfigBuilder::default()
128 }
129
130 /// Returns `true` when the given `mode` is active.
131 ///
132 /// # Examples
133 ///
134 /// ```rust
135 /// use lmm_agent::cognition::learning::config::LearningConfig;
136 /// use lmm_agent::types::LearningMode;
137 ///
138 /// let cfg = LearningConfig::default();
139 /// assert!(cfg.is_mode_active(LearningMode::QTable));
140 /// ```
141 pub fn is_mode_active(&self, mode: LearningMode) -> bool {
142 self.active_modes.contains(&mode)
143 }
144}
145
146/// Fluent builder for [`LearningConfig`].
147#[derive(Debug, Default)]
148pub struct LearningConfigBuilder {
149 alpha: Option<f64>,
150 gamma: Option<f64>,
151 epsilon: Option<f64>,
152 epsilon_decay: Option<f64>,
153 epsilon_min: Option<f64>,
154 distill_top_k: Option<usize>,
155 distill_threshold: Option<f64>,
156 elastic_lambda: Option<f64>,
157 elastic_pin_count: Option<usize>,
158 federated_blend: Option<f64>,
159 meta_top_k: Option<usize>,
160 pmi_min_count: Option<usize>,
161 active_modes: Option<HashSet<LearningMode>>,
162}
163
164impl LearningConfigBuilder {
165 /// Sets the TD learning rate α.
166 pub fn alpha(mut self, v: f64) -> Self {
167 self.alpha = Some(v.clamp(1e-6, 1.0));
168 self
169 }
170
171 /// Sets the discount factor γ.
172 pub fn gamma(mut self, v: f64) -> Self {
173 self.gamma = Some(v.clamp(0.0, 1.0));
174 self
175 }
176
177 /// Sets the initial exploration probability ε.
178 pub fn epsilon(mut self, v: f64) -> Self {
179 self.epsilon = Some(v.clamp(0.0, 1.0));
180 self
181 }
182
183 /// Sets the per-episode ε decay multiplier.
184 pub fn epsilon_decay(mut self, v: f64) -> Self {
185 self.epsilon_decay = Some(v.clamp(0.5, 1.0));
186 self
187 }
188
189 /// Sets the minimum ε floor.
190 pub fn epsilon_min(mut self, v: f64) -> Self {
191 self.epsilon_min = Some(v.clamp(0.0, 0.5));
192 self
193 }
194
195 /// Sets how many cold entries are promoted per distillation pass.
196 pub fn distill_top_k(mut self, k: usize) -> Self {
197 self.distill_top_k = Some(k.max(1));
198 self
199 }
200
201 /// Sets the distillation reward threshold.
202 pub fn distill_threshold(mut self, v: f64) -> Self {
203 self.distill_threshold = Some(v.clamp(0.0, 1.0));
204 self
205 }
206
207 /// Sets the elastic penalty λ.
208 pub fn elastic_lambda(mut self, v: f64) -> Self {
209 self.elastic_lambda = Some(v.clamp(0.0, 10.0));
210 self
211 }
212
213 /// Sets the activation count threshold for pinning.
214 pub fn elastic_pin_count(mut self, n: usize) -> Self {
215 self.elastic_pin_count = Some(n.max(1));
216 self
217 }
218
219 /// Sets the federated blend weight.
220 pub fn federated_blend(mut self, v: f64) -> Self {
221 self.federated_blend = Some(v.clamp(0.0, 1.0));
222 self
223 }
224
225 /// Sets the number of meta-prototypes considered per lookup.
226 pub fn meta_top_k(mut self, k: usize) -> Self {
227 self.meta_top_k = Some(k.max(1));
228 self
229 }
230
231 /// Sets the minimum PMI co-occurrence count.
232 pub fn pmi_min_count(mut self, n: usize) -> Self {
233 self.pmi_min_count = Some(n.max(1));
234 self
235 }
236
237 /// Sets the enabled learning modes.
238 pub fn active_modes(mut self, modes: HashSet<LearningMode>) -> Self {
239 self.active_modes = Some(modes);
240 self
241 }
242
243 /// Builds the [`LearningConfig`] by applying all overrides on top of defaults.
244 pub fn build(self) -> LearningConfig {
245 let defaults = LearningConfig::default();
246 LearningConfig {
247 alpha: self.alpha.unwrap_or(defaults.alpha),
248 gamma: self.gamma.unwrap_or(defaults.gamma),
249 epsilon: self.epsilon.unwrap_or(defaults.epsilon),
250 epsilon_decay: self.epsilon_decay.unwrap_or(defaults.epsilon_decay),
251 epsilon_min: self.epsilon_min.unwrap_or(defaults.epsilon_min),
252 distill_top_k: self.distill_top_k.unwrap_or(defaults.distill_top_k),
253 distill_threshold: self.distill_threshold.unwrap_or(defaults.distill_threshold),
254 elastic_lambda: self.elastic_lambda.unwrap_or(defaults.elastic_lambda),
255 elastic_pin_count: self.elastic_pin_count.unwrap_or(defaults.elastic_pin_count),
256 federated_blend: self.federated_blend.unwrap_or(defaults.federated_blend),
257 meta_top_k: self.meta_top_k.unwrap_or(defaults.meta_top_k),
258 pmi_min_count: self.pmi_min_count.unwrap_or(defaults.pmi_min_count),
259 active_modes: self.active_modes.unwrap_or(defaults.active_modes),
260 }
261 }
262}
263
264// Copyright 2026 Mahmoud Harmouch.
265//
266// Licensed under the MIT license
267// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
268// option. This file may not be copied, modified, or distributed
269// except according to those terms.