lmm_agent/cognition/learning/engine.rs
1// Copyright 2026 Mahmoud Harmouch.
2//
3// Licensed under the MIT license
4// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
5// option. This file may not be copied, modified, or distributed
6// except according to those terms.
7
8//! # `LearningEngine` - HELM orchestration layer.
9//!
10//! `LearningEngine` coordinates all six HELM sub-modules into a unified
11//! per-agent learning controller. It is designed to plug into the existing
12//! `ThinkLoop` lifecycle with two call-site hooks:
13//!
14//! 1. **`record_step`** - called once per ThinkLoop iteration with the
15//! `CognitionSignal` from that step.
16//! 2. **`end_of_episode`** - called after `ThinkLoop::run` completes to trigger
17//! distillation, meta-prototype storage, elastic guard update, and ε decay.
18//!
19//! Optional federated exchange is performed explicitly via `federate`.
20//!
21//! ## Examples
22//!
23//! ```rust
24//! use lmm_agent::cognition::learning::engine::LearningEngine;
25//! use lmm_agent::cognition::learning::config::LearningConfig;
26//! use lmm_agent::cognition::learning::q_table::{ActionKey, QTable};
27//! use lmm_agent::cognition::memory::ColdStore;
28//! use lmm_agent::cognition::knowledge::KnowledgeIndex;
29//! use lmm_agent::cognition::signal::CognitionSignal;
30//!
31//! let mut engine = LearningEngine::new(LearningConfig::default());
32//! let sig = CognitionSignal::new(0, "rust memory".into(), "rust uses ownership".into(), 1.0, 0.0);
33//! let state = QTable::state_key("rust memory");
34//! let next_state = QTable::state_key("rust uses ownership");
35//! engine.record_step(&sig, state, ActionKey::Narrow, next_state);
36//!
37//! let cold = ColdStore::default();
38//! let mut idx = KnowledgeIndex::new();
39//! engine.end_of_episode(&cold, &mut idx, "rust memory safety", 0.8);
40//! assert!(!engine.q_table().is_empty());
41//! ```
42
43use crate::cognition::knowledge::KnowledgeIndex;
44use crate::cognition::learning::config::LearningConfig;
45use crate::cognition::learning::distill::KnowledgeDistiller;
46use crate::cognition::learning::elastic::ElasticMemoryGuard;
47use crate::cognition::learning::federated::FederatedAggregator;
48use crate::cognition::learning::informal::InformalLearner;
49use crate::cognition::learning::meta::MetaAdapter;
50use crate::cognition::learning::q_table::{ActionKey, QTable};
51use crate::cognition::memory::ColdStore;
52use crate::cognition::signal::CognitionSignal;
53use crate::types::{AgentSnapshot, ExperienceRecord, LearningMode};
54use serde::{Deserialize, Serialize};
55use std::cmp::Ordering;
56use std::collections::HashMap;
57
58/// The HELM orchestration hub that bundles all six learning sub-modules.
59///
60/// # Examples
61///
62/// ```rust
63/// use lmm_agent::cognition::learning::engine::LearningEngine;
64/// use lmm_agent::cognition::learning::config::LearningConfig;
65///
66/// let mut engine = LearningEngine::new(LearningConfig::default());
67/// assert!(engine.q_table().is_empty());
68/// assert_eq!(engine.episode_count(), 0);
69/// ```
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct LearningEngine {
72 config: LearningConfig,
73 q_table: QTable,
74 meta: MetaAdapter,
75 distiller: KnowledgeDistiller,
76 aggregator: FederatedAggregator,
77 elastic: ElasticMemoryGuard,
78 informal: InformalLearner,
79
80 /// Accumulated total reward across all episodes.
81 total_reward: f64,
82
83 /// Number of completed episodes.
84 episode_count: usize,
85
86 /// Experience buffer for current episode (cleared at `end_of_episode`).
87 experience_buffer: Vec<ExperienceRecord>,
88}
89
90impl LearningEngine {
91 /// Constructs a new `LearningEngine` from a [`LearningConfig`].
92 ///
93 /// # Examples
94 ///
95 /// ```rust
96 /// use lmm_agent::cognition::learning::engine::LearningEngine;
97 /// use lmm_agent::cognition::learning::config::LearningConfig;
98 ///
99 /// let engine = LearningEngine::new(LearningConfig::default());
100 /// assert_eq!(engine.episode_count(), 0);
101 /// ```
102 pub fn new(config: LearningConfig) -> Self {
103 let q_table = QTable::new(
104 config.alpha,
105 config.gamma,
106 config.epsilon,
107 config.epsilon_decay,
108 config.epsilon_min,
109 );
110 let meta = MetaAdapter::new(config.meta_top_k);
111 let distiller = KnowledgeDistiller::new(config.distill_threshold, config.distill_top_k);
112 let aggregator = FederatedAggregator::new(config.federated_blend);
113 let elastic = ElasticMemoryGuard::new(config.elastic_pin_count, config.elastic_lambda);
114 let informal = InformalLearner::new(config.distill_threshold, config.pmi_min_count, 0.5);
115
116 Self {
117 config,
118 q_table,
119 meta,
120 distiller,
121 aggregator,
122 elastic,
123 informal,
124 total_reward: 0.0,
125 episode_count: 0,
126 experience_buffer: Vec::new(),
127 }
128 }
129
130 /// Returns a reference to the current configuration.
131 pub fn config(&self) -> &LearningConfig {
132 &self.config
133 }
134
135 /// Returns a reference to the Q-table.
136 pub fn q_table(&self) -> &QTable {
137 &self.q_table
138 }
139
140 /// Resets the Q-table exploration epsilon.
141 pub fn reset_epsilon(&mut self, epsilon: f64) {
142 self.q_table.reset_epsilon(epsilon);
143 }
144
145 /// Returns the total number of completed episodes.
146 pub fn episode_count(&self) -> usize {
147 self.episode_count
148 }
149
150 /// Returns the accumulated total reward.
151 pub fn total_reward(&self) -> f64 {
152 self.total_reward
153 }
154
155 /// Records one ThinkLoop step into all active learning sub-modules.
156 ///
157 /// This should be called inside `ThinkLoop::run` after each step produces
158 /// a `CognitionSignal`.
159 ///
160 /// # Arguments
161 ///
162 /// * `signal` - The signal produced by this ThinkLoop iteration.
163 /// * `state` - FNV-1a state key for the current query.
164 /// * `action` - The action applied to produce `signal.query`.
165 /// * `next_state` - FNV-1a state key for the observation after the action.
166 ///
167 /// # Examples
168 ///
169 /// ```rust
170 /// use lmm_agent::cognition::learning::engine::LearningEngine;
171 /// use lmm_agent::cognition::learning::config::LearningConfig;
172 /// use lmm_agent::cognition::learning::q_table::{ActionKey, QTable};
173 /// use lmm_agent::cognition::signal::CognitionSignal;
174 ///
175 /// let mut engine = LearningEngine::new(LearningConfig::default());
176 /// let sig = CognitionSignal::new(0, "a b".into(), "a b c".into(), 1.0, 0.0);
177 /// let s = QTable::state_key("a b");
178 /// let s2 = QTable::state_key("a b c");
179 /// engine.record_step(&sig, s, ActionKey::Narrow, s2);
180 /// assert!(!engine.q_table().is_empty());
181 /// ```
182 pub fn record_step(
183 &mut self,
184 signal: &CognitionSignal,
185 state: u64,
186 action: ActionKey,
187 next_state: u64,
188 ) {
189 if self.config.is_mode_active(LearningMode::QTable) {
190 self.q_table
191 .update(state, action, signal.reward, next_state);
192 }
193
194 if self.config.is_mode_active(LearningMode::Informal) {
195 self.informal.observe(&signal.observation, signal.reward);
196 }
197
198 if self.config.is_mode_active(LearningMode::Elastic) && !signal.observation.is_empty() {
199 self.elastic.observe_activation(&signal.observation);
200 }
201
202 self.experience_buffer.push(ExperienceRecord {
203 state,
204 action,
205 reward: signal.reward,
206 next_state,
207 });
208 self.total_reward += signal.reward;
209 }
210
211 /// Selects the recommended action for `state` using the Q-table and meta priors.
212 ///
213 /// If a meta-adapter warm-start is available, it is added as a prior bias to
214 /// the greedy Q selection before applying ε-greedy exploration.
215 ///
216 /// # Examples
217 ///
218 /// ```rust
219 /// use lmm_agent::cognition::learning::engine::LearningEngine;
220 /// use lmm_agent::cognition::learning::config::LearningConfig;
221 /// use lmm_agent::cognition::learning::q_table::{ActionKey, QTable};
222 ///
223 /// let mut engine = LearningEngine::new(LearningConfig::default());
224 /// let s = QTable::state_key("hello world");
225 /// let action = engine.recommend_action(s, "hello world", 0);
226 /// assert!(ActionKey::all().contains(&action));
227 /// ```
228 pub fn recommend_action(&mut self, state: u64, goal: &str, step: usize) -> ActionKey {
229 if self.config.is_mode_active(LearningMode::MetaAdapt) {
230 let offsets = self.meta.adapt(goal);
231 if let Some((&best_action, _)) = offsets
232 .iter()
233 .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(Ordering::Equal))
234 {
235 let q_best = self.q_table.best_action(state);
236 if q_best.is_none() {
237 return best_action;
238 }
239 }
240 }
241 self.q_table.select_action(state, step)
242 }
243
244 /// Finalises a completed episode: distils knowledge, stores meta-prototype,
245 /// synthesises PMI facts, and decays ε.
246 ///
247 /// # Arguments
248 ///
249 /// * `cold` - The agent's cold store after `drain_to_cold`.
250 /// * `index` - The agent's knowledge index for distillation output.
251 /// * `goal` - The natural-language goal that was pursued.
252 /// * `avg_reward` - Mean reward across all steps in this episode.
253 ///
254 /// # Examples
255 ///
256 /// ```rust
257 /// use lmm_agent::cognition::learning::engine::LearningEngine;
258 /// use lmm_agent::cognition::learning::config::LearningConfig;
259 /// use lmm_agent::cognition::memory::ColdStore;
260 /// use lmm_agent::cognition::knowledge::KnowledgeIndex;
261 ///
262 /// let mut engine = LearningEngine::new(LearningConfig::default());
263 /// let cold = ColdStore::default();
264 /// let mut idx = KnowledgeIndex::new();
265 /// engine.end_of_episode(&cold, &mut idx, "test goal", 0.5);
266 /// assert_eq!(engine.episode_count(), 1);
267 /// ```
268 pub fn end_of_episode(
269 &mut self,
270 cold: &ColdStore,
271 index: &mut KnowledgeIndex,
272 goal: &str,
273 avg_reward: f64,
274 ) {
275 if self.config.is_mode_active(LearningMode::Distill) {
276 self.distiller.distill(cold, index);
277 }
278
279 if self.config.is_mode_active(LearningMode::MetaAdapt) {
280 let offsets = self.compute_episode_offsets();
281 self.meta.record_episode(goal, offsets, avg_reward);
282 }
283
284 if self.config.is_mode_active(LearningMode::Informal) {
285 self.informal.synthesise_into(index, 5, 0.3);
286 }
287
288 if self.config.is_mode_active(LearningMode::QTable) {
289 self.q_table.decay_epsilon();
290 }
291
292 self.experience_buffer.clear();
293 self.episode_count += 1;
294 }
295
296 /// Merges a remote [`AgentSnapshot`] into the local Q-table (federated step).
297 ///
298 /// Only active when [`LearningMode::Federated`] is enabled.
299 ///
300 /// # Examples
301 ///
302 /// ```rust
303 /// use lmm_agent::cognition::learning::engine::LearningEngine;
304 /// use lmm_agent::cognition::learning::config::LearningConfig;
305 /// use lmm_agent::cognition::learning::q_table::{ActionKey, QTable};
306 /// use lmm_agent::types::AgentSnapshot;
307 ///
308 /// let mut engine = LearningEngine::new(LearningConfig::default());
309 /// let remote_qt = QTable::new(0.1, 0.9, 0.0, 1.0, 0.0);
310 /// let snap = AgentSnapshot { agent_id: "remote".into(), q_table: remote_qt, total_reward: 1.0 };
311 /// engine.federate(&snap);
312 /// assert_eq!(engine.aggregator().merge_count, 1);
313 /// ```
314 pub fn federate(&mut self, snapshot: &AgentSnapshot) {
315 if self.config.is_mode_active(LearningMode::Federated) {
316 self.aggregator.merge(&mut self.q_table, snapshot);
317 }
318 }
319
320 /// Exports an [`AgentSnapshot`] for federated sharing with other agents.
321 ///
322 /// # Examples
323 ///
324 /// ```rust
325 /// use lmm_agent::cognition::learning::engine::LearningEngine;
326 /// use lmm_agent::cognition::learning::config::LearningConfig;
327 ///
328 /// let engine = LearningEngine::new(LearningConfig::default());
329 /// let snap = engine.export_snapshot("agent-1");
330 /// assert_eq!(snap.agent_id, "agent-1");
331 /// ```
332 pub fn export_snapshot(&self, agent_id: impl Into<String>) -> AgentSnapshot {
333 AgentSnapshot {
334 agent_id: agent_id.into(),
335 q_table: self.q_table.clone(),
336 total_reward: self.total_reward,
337 }
338 }
339
340 /// Returns a reference to the federated aggregator.
341 pub fn aggregator(&self) -> &FederatedAggregator {
342 &self.aggregator
343 }
344
345 /// Returns a reference to the elastic memory guard.
346 pub fn elastic(&self) -> &ElasticMemoryGuard {
347 &self.elastic
348 }
349
350 /// Returns a reference to the informal learner.
351 pub fn informal(&self) -> &InformalLearner {
352 &self.informal
353 }
354
355 /// Returns a reference to the meta-adapter.
356 pub fn meta(&self) -> &MetaAdapter {
357 &self.meta
358 }
359
360 /// Returns a reference to the knowledge distiller.
361 pub fn distiller(&self) -> &KnowledgeDistiller {
362 &self.distiller
363 }
364
365 fn compute_episode_offsets(&self) -> HashMap<ActionKey, f64> {
366 let mut totals: HashMap<ActionKey, (f64, usize)> = HashMap::new();
367 for xp in &self.experience_buffer {
368 let e = totals.entry(xp.action).or_insert((0.0, 0));
369 e.0 += xp.reward;
370 e.1 += 1;
371 }
372 totals
373 .into_iter()
374 .map(|(action, (total, count))| (action, total / count as f64))
375 .collect()
376 }
377}
378
379// Copyright 2026 Mahmoud Harmouch.
380//
381// Licensed under the MIT license
382// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
383// option. This file may not be copied, modified, or distributed
384// except according to those terms.