Skip to main content

lmm_agent/cognition/learning/
config.rs

1// Copyright 2026 Mahmoud Harmouch.
2//
3// Licensed under the MIT license
4// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
5// option. This file may not be copied, modified, or distributed
6// except according to those terms.
7
8//! # `LearningConfig` - HELM hyperparameter configuration.
9//!
10//! All hyperparameters for the HELM learning engine are consolidated here so
11//! callers can tune the system with a single struct rather than passing
12//! individual values to every sub-module.
13//!
14//! ## Defaults
15//!
16//! | Field                  | Default | Description                                       |
17//! |------------------------|---------|---------------------------------------------------|
18//! | `alpha`                | 0.1     | Q-learning rate                                   |
19//! | `gamma`                | 0.9     | Discount factor                                   |
20//! | `epsilon`              | 0.3     | Initial ε-greedy exploration probability          |
21//! | `epsilon_decay`        | 0.99    | Per-episode ε decay multiplier                    |
22//! | `epsilon_min`          | 0.01    | Minimum ε floor                                   |
23//! | `distill_top_k`        | 8       | Cold entries ingested per distillation pass       |
24//! | `distill_threshold`    | 0.4     | Minimum reward for distillation eligibility       |
25//! | `elastic_lambda`       | 0.5     | Elastic penalty strength                          |
26//! | `elastic_pin_count`    | 3       | Activation count before an entry is pinned        |
27//! | `federated_blend`      | 0.5     | Local weight in federated merge: 0=remote, 1=local|
28//! | `meta_top_k`           | 3       | Prototypes considered for meta-adaptation         |
29//! | `pmi_min_count`        | 2       | Minimum co-occurrence count for PMI pairs         |
30//! | `active_modes`         | all     | Bitmask of enabled `LearningMode`s                |
31//!
32//! ## Examples
33//!
34//! ```rust
35//! use lmm_agent::cognition::learning::config::LearningConfig;
36//!
37//! let cfg = LearningConfig::default();
38//! assert_eq!(cfg.alpha, 0.1);
39//! assert_eq!(cfg.gamma, 0.9);
40//! assert!(cfg.epsilon > 0.0);
41//! ```
42
43use crate::types::LearningMode;
44use serde::{Deserialize, Serialize};
45use std::collections::HashSet;
46
47/// Full HELM hyperparameter configuration.
48///
49/// # Examples
50///
51/// ```rust
52/// use lmm_agent::cognition::learning::config::LearningConfig;
53///
54/// let cfg = LearningConfig::builder()
55///     .alpha(0.05)
56///     .gamma(0.95)
57///     .epsilon(0.2)
58///     .build();
59///
60/// assert_eq!(cfg.alpha, 0.05);
61/// ```
62#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
63pub struct LearningConfig {
64    /// Temporal-difference learning rate α ∈ (0, 1].
65    pub alpha: f64,
66
67    /// Discount factor γ ∈ [0, 1].
68    pub gamma: f64,
69
70    /// Initial ε-greedy exploration probability ∈ (0, 1].
71    pub epsilon: f64,
72
73    /// Per-episode ε decay multiplier ∈ (0, 1].
74    pub epsilon_decay: f64,
75
76    /// Minimum exploration probability floor.
77    pub epsilon_min: f64,
78
79    /// Number of top cold entries promoted to the knowledge index per distillation.
80    pub distill_top_k: usize,
81
82    /// Minimum reward score required for a cold entry to be distillation-eligible.
83    pub distill_threshold: f64,
84
85    /// Elastic penalty strength λ.
86    pub elastic_lambda: f64,
87
88    /// Number of recall activations before an entry is considered "pinned."
89    pub elastic_pin_count: usize,
90
91    /// Blend weight for federated merge: 1.0 = keep local only, 0.0 = remote only.
92    pub federated_blend: f64,
93
94    /// Number of task prototypes considered when computing meta-adaptation offsets.
95    pub meta_top_k: usize,
96
97    /// Minimum token-pair co-occurrence count required before PMI is computed.
98    pub pmi_min_count: usize,
99
100    /// Set of learning modes that are active.
101    pub active_modes: HashSet<LearningMode>,
102}
103
104impl Default for LearningConfig {
105    fn default() -> Self {
106        Self {
107            alpha: 0.1,
108            gamma: 0.9,
109            epsilon: 0.3,
110            epsilon_decay: 0.99,
111            epsilon_min: 0.01,
112            distill_top_k: 8,
113            distill_threshold: 0.4,
114            elastic_lambda: 0.5,
115            elastic_pin_count: 3,
116            federated_blend: 0.5,
117            meta_top_k: 3,
118            pmi_min_count: 2,
119            active_modes: LearningMode::all(),
120        }
121    }
122}
123
124impl LearningConfig {
125    /// Returns a [`LearningConfigBuilder`] for ergonomic construction.
126    pub fn builder() -> LearningConfigBuilder {
127        LearningConfigBuilder::default()
128    }
129
130    /// Returns `true` when the given `mode` is active.
131    ///
132    /// # Examples
133    ///
134    /// ```rust
135    /// use lmm_agent::cognition::learning::config::LearningConfig;
136    /// use lmm_agent::types::LearningMode;
137    ///
138    /// let cfg = LearningConfig::default();
139    /// assert!(cfg.is_mode_active(LearningMode::QTable));
140    /// ```
141    pub fn is_mode_active(&self, mode: LearningMode) -> bool {
142        self.active_modes.contains(&mode)
143    }
144}
145
146/// Fluent builder for [`LearningConfig`].
147#[derive(Debug, Default)]
148pub struct LearningConfigBuilder {
149    alpha: Option<f64>,
150    gamma: Option<f64>,
151    epsilon: Option<f64>,
152    epsilon_decay: Option<f64>,
153    epsilon_min: Option<f64>,
154    distill_top_k: Option<usize>,
155    distill_threshold: Option<f64>,
156    elastic_lambda: Option<f64>,
157    elastic_pin_count: Option<usize>,
158    federated_blend: Option<f64>,
159    meta_top_k: Option<usize>,
160    pmi_min_count: Option<usize>,
161    active_modes: Option<HashSet<LearningMode>>,
162}
163
164impl LearningConfigBuilder {
165    /// Sets the TD learning rate α.
166    pub fn alpha(mut self, v: f64) -> Self {
167        self.alpha = Some(v.clamp(1e-6, 1.0));
168        self
169    }
170
171    /// Sets the discount factor γ.
172    pub fn gamma(mut self, v: f64) -> Self {
173        self.gamma = Some(v.clamp(0.0, 1.0));
174        self
175    }
176
177    /// Sets the initial exploration probability ε.
178    pub fn epsilon(mut self, v: f64) -> Self {
179        self.epsilon = Some(v.clamp(0.0, 1.0));
180        self
181    }
182
183    /// Sets the per-episode ε decay multiplier.
184    pub fn epsilon_decay(mut self, v: f64) -> Self {
185        self.epsilon_decay = Some(v.clamp(0.5, 1.0));
186        self
187    }
188
189    /// Sets the minimum ε floor.
190    pub fn epsilon_min(mut self, v: f64) -> Self {
191        self.epsilon_min = Some(v.clamp(0.0, 0.5));
192        self
193    }
194
195    /// Sets how many cold entries are promoted per distillation pass.
196    pub fn distill_top_k(mut self, k: usize) -> Self {
197        self.distill_top_k = Some(k.max(1));
198        self
199    }
200
201    /// Sets the distillation reward threshold.
202    pub fn distill_threshold(mut self, v: f64) -> Self {
203        self.distill_threshold = Some(v.clamp(0.0, 1.0));
204        self
205    }
206
207    /// Sets the elastic penalty λ.
208    pub fn elastic_lambda(mut self, v: f64) -> Self {
209        self.elastic_lambda = Some(v.clamp(0.0, 10.0));
210        self
211    }
212
213    /// Sets the activation count threshold for pinning.
214    pub fn elastic_pin_count(mut self, n: usize) -> Self {
215        self.elastic_pin_count = Some(n.max(1));
216        self
217    }
218
219    /// Sets the federated blend weight.
220    pub fn federated_blend(mut self, v: f64) -> Self {
221        self.federated_blend = Some(v.clamp(0.0, 1.0));
222        self
223    }
224
225    /// Sets the number of meta-prototypes considered per lookup.
226    pub fn meta_top_k(mut self, k: usize) -> Self {
227        self.meta_top_k = Some(k.max(1));
228        self
229    }
230
231    /// Sets the minimum PMI co-occurrence count.
232    pub fn pmi_min_count(mut self, n: usize) -> Self {
233        self.pmi_min_count = Some(n.max(1));
234        self
235    }
236
237    /// Sets the enabled learning modes.
238    pub fn active_modes(mut self, modes: HashSet<LearningMode>) -> Self {
239        self.active_modes = Some(modes);
240        self
241    }
242
243    /// Builds the [`LearningConfig`] by applying all overrides on top of defaults.
244    pub fn build(self) -> LearningConfig {
245        let defaults = LearningConfig::default();
246        LearningConfig {
247            alpha: self.alpha.unwrap_or(defaults.alpha),
248            gamma: self.gamma.unwrap_or(defaults.gamma),
249            epsilon: self.epsilon.unwrap_or(defaults.epsilon),
250            epsilon_decay: self.epsilon_decay.unwrap_or(defaults.epsilon_decay),
251            epsilon_min: self.epsilon_min.unwrap_or(defaults.epsilon_min),
252            distill_top_k: self.distill_top_k.unwrap_or(defaults.distill_top_k),
253            distill_threshold: self.distill_threshold.unwrap_or(defaults.distill_threshold),
254            elastic_lambda: self.elastic_lambda.unwrap_or(defaults.elastic_lambda),
255            elastic_pin_count: self.elastic_pin_count.unwrap_or(defaults.elastic_pin_count),
256            federated_blend: self.federated_blend.unwrap_or(defaults.federated_blend),
257            meta_top_k: self.meta_top_k.unwrap_or(defaults.meta_top_k),
258            pmi_min_count: self.pmi_min_count.unwrap_or(defaults.pmi_min_count),
259            active_modes: self.active_modes.unwrap_or(defaults.active_modes),
260        }
261    }
262}
263
264// Copyright 2026 Mahmoud Harmouch.
265//
266// Licensed under the MIT license
267// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
268// option. This file may not be copied, modified, or distributed
269// except according to those terms.