Skip to main content

lmm_agent/cognition/learning/
engine.rs

1// Copyright 2026 Mahmoud Harmouch.
2//
3// Licensed under the MIT license
4// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
5// option. This file may not be copied, modified, or distributed
6// except according to those terms.
7
8//! # `LearningEngine` - HELM orchestration layer.
9//!
10//! `LearningEngine` coordinates all six HELM sub-modules into a unified
11//! per-agent learning controller. It is designed to plug into the existing
12//! `ThinkLoop` lifecycle with two call-site hooks:
13//!
14//! 1. **`record_step`** - called once per ThinkLoop iteration with the
15//!    `CognitionSignal` from that step.
16//! 2. **`end_of_episode`** - called after `ThinkLoop::run` completes to trigger
17//!    distillation, meta-prototype storage, elastic guard update, and ε decay.
18//!
19//! Optional federated exchange is performed explicitly via `federate`.
20//!
21//! ## Examples
22//!
23//! ```rust
24//! use lmm_agent::cognition::learning::engine::LearningEngine;
25//! use lmm_agent::cognition::learning::config::LearningConfig;
26//! use lmm_agent::cognition::learning::q_table::{ActionKey, QTable};
27//! use lmm_agent::cognition::memory::ColdStore;
28//! use lmm_agent::cognition::knowledge::KnowledgeIndex;
29//! use lmm_agent::cognition::signal::CognitionSignal;
30//!
31//! let mut engine = LearningEngine::new(LearningConfig::default());
32//! let sig = CognitionSignal::new(0, "rust memory".into(), "rust uses ownership".into(), 1.0, 0.0);
33//! let state = QTable::state_key("rust memory");
34//! let next_state = QTable::state_key("rust uses ownership");
35//! engine.record_step(&sig, state, ActionKey::Narrow, next_state);
36//!
37//! let cold = ColdStore::default();
38//! let mut idx = KnowledgeIndex::new();
39//! engine.end_of_episode(&cold, &mut idx, "rust memory safety", 0.8);
40//! assert!(!engine.q_table().is_empty());
41//! ```
42
43use crate::cognition::knowledge::KnowledgeIndex;
44use crate::cognition::learning::config::LearningConfig;
45use crate::cognition::learning::distill::KnowledgeDistiller;
46use crate::cognition::learning::elastic::ElasticMemoryGuard;
47use crate::cognition::learning::federated::FederatedAggregator;
48use crate::cognition::learning::informal::InformalLearner;
49use crate::cognition::learning::meta::MetaAdapter;
50use crate::cognition::learning::q_table::{ActionKey, QTable};
51use crate::cognition::memory::ColdStore;
52use crate::cognition::signal::CognitionSignal;
53use crate::types::{AgentSnapshot, ExperienceRecord, LearningMode};
54use serde::{Deserialize, Serialize};
55use std::cmp::Ordering;
56use std::collections::HashMap;
57
58/// The HELM orchestration hub that bundles all six learning sub-modules.
59///
60/// # Examples
61///
62/// ```rust
63/// use lmm_agent::cognition::learning::engine::LearningEngine;
64/// use lmm_agent::cognition::learning::config::LearningConfig;
65///
66/// let mut engine = LearningEngine::new(LearningConfig::default());
67/// assert!(engine.q_table().is_empty());
68/// assert_eq!(engine.episode_count(), 0);
69/// ```
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct LearningEngine {
72    config: LearningConfig,
73    q_table: QTable,
74    meta: MetaAdapter,
75    distiller: KnowledgeDistiller,
76    aggregator: FederatedAggregator,
77    elastic: ElasticMemoryGuard,
78    informal: InformalLearner,
79
80    /// Accumulated total reward across all episodes.
81    total_reward: f64,
82
83    /// Number of completed episodes.
84    episode_count: usize,
85
86    /// Experience buffer for current episode (cleared at `end_of_episode`).
87    experience_buffer: Vec<ExperienceRecord>,
88}
89
90impl LearningEngine {
91    /// Constructs a new `LearningEngine` from a [`LearningConfig`].
92    ///
93    /// # Examples
94    ///
95    /// ```rust
96    /// use lmm_agent::cognition::learning::engine::LearningEngine;
97    /// use lmm_agent::cognition::learning::config::LearningConfig;
98    ///
99    /// let engine = LearningEngine::new(LearningConfig::default());
100    /// assert_eq!(engine.episode_count(), 0);
101    /// ```
102    pub fn new(config: LearningConfig) -> Self {
103        let q_table = QTable::new(
104            config.alpha,
105            config.gamma,
106            config.epsilon,
107            config.epsilon_decay,
108            config.epsilon_min,
109        );
110        let meta = MetaAdapter::new(config.meta_top_k);
111        let distiller = KnowledgeDistiller::new(config.distill_threshold, config.distill_top_k);
112        let aggregator = FederatedAggregator::new(config.federated_blend);
113        let elastic = ElasticMemoryGuard::new(config.elastic_pin_count, config.elastic_lambda);
114        let informal = InformalLearner::new(config.distill_threshold, config.pmi_min_count, 0.5);
115
116        Self {
117            config,
118            q_table,
119            meta,
120            distiller,
121            aggregator,
122            elastic,
123            informal,
124            total_reward: 0.0,
125            episode_count: 0,
126            experience_buffer: Vec::new(),
127        }
128    }
129
130    /// Returns a reference to the current configuration.
131    pub fn config(&self) -> &LearningConfig {
132        &self.config
133    }
134
135    /// Returns a reference to the Q-table.
136    pub fn q_table(&self) -> &QTable {
137        &self.q_table
138    }
139
140    /// Resets the Q-table exploration epsilon.
141    pub fn reset_epsilon(&mut self, epsilon: f64) {
142        self.q_table.reset_epsilon(epsilon);
143    }
144
145    /// Returns the total number of completed episodes.
146    pub fn episode_count(&self) -> usize {
147        self.episode_count
148    }
149
150    /// Returns the accumulated total reward.
151    pub fn total_reward(&self) -> f64 {
152        self.total_reward
153    }
154
155    /// Records one ThinkLoop step into all active learning sub-modules.
156    ///
157    /// This should be called inside `ThinkLoop::run` after each step produces
158    /// a `CognitionSignal`.
159    ///
160    /// # Arguments
161    ///
162    /// * `signal`     - The signal produced by this ThinkLoop iteration.
163    /// * `state`      - FNV-1a state key for the current query.
164    /// * `action`     - The action applied to produce `signal.query`.
165    /// * `next_state` - FNV-1a state key for the observation after the action.
166    ///
167    /// # Examples
168    ///
169    /// ```rust
170    /// use lmm_agent::cognition::learning::engine::LearningEngine;
171    /// use lmm_agent::cognition::learning::config::LearningConfig;
172    /// use lmm_agent::cognition::learning::q_table::{ActionKey, QTable};
173    /// use lmm_agent::cognition::signal::CognitionSignal;
174    ///
175    /// let mut engine = LearningEngine::new(LearningConfig::default());
176    /// let sig = CognitionSignal::new(0, "a b".into(), "a b c".into(), 1.0, 0.0);
177    /// let s = QTable::state_key("a b");
178    /// let s2 = QTable::state_key("a b c");
179    /// engine.record_step(&sig, s, ActionKey::Narrow, s2);
180    /// assert!(!engine.q_table().is_empty());
181    /// ```
182    pub fn record_step(
183        &mut self,
184        signal: &CognitionSignal,
185        state: u64,
186        action: ActionKey,
187        next_state: u64,
188    ) {
189        if self.config.is_mode_active(LearningMode::QTable) {
190            self.q_table
191                .update(state, action, signal.reward, next_state);
192        }
193
194        if self.config.is_mode_active(LearningMode::Informal) {
195            self.informal.observe(&signal.observation, signal.reward);
196        }
197
198        if self.config.is_mode_active(LearningMode::Elastic) && !signal.observation.is_empty() {
199            self.elastic.observe_activation(&signal.observation);
200        }
201
202        self.experience_buffer.push(ExperienceRecord {
203            state,
204            action,
205            reward: signal.reward,
206            next_state,
207        });
208        self.total_reward += signal.reward;
209    }
210
211    /// Selects the recommended action for `state` using the Q-table and meta priors.
212    ///
213    /// If a meta-adapter warm-start is available, it is added as a prior bias to
214    /// the greedy Q selection before applying ε-greedy exploration.
215    ///
216    /// # Examples
217    ///
218    /// ```rust
219    /// use lmm_agent::cognition::learning::engine::LearningEngine;
220    /// use lmm_agent::cognition::learning::config::LearningConfig;
221    /// use lmm_agent::cognition::learning::q_table::{ActionKey, QTable};
222    ///
223    /// let mut engine = LearningEngine::new(LearningConfig::default());
224    /// let s = QTable::state_key("hello world");
225    /// let action = engine.recommend_action(s, "hello world", 0);
226    /// assert!(ActionKey::all().contains(&action));
227    /// ```
228    pub fn recommend_action(&mut self, state: u64, goal: &str, step: usize) -> ActionKey {
229        if self.config.is_mode_active(LearningMode::MetaAdapt) {
230            let offsets = self.meta.adapt(goal);
231            if let Some((&best_action, _)) = offsets
232                .iter()
233                .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(Ordering::Equal))
234            {
235                let q_best = self.q_table.best_action(state);
236                if q_best.is_none() {
237                    return best_action;
238                }
239            }
240        }
241        self.q_table.select_action(state, step)
242    }
243
244    /// Finalises a completed episode: distils knowledge, stores meta-prototype,
245    /// synthesises PMI facts, and decays ε.
246    ///
247    /// # Arguments
248    ///
249    /// * `cold`       - The agent's cold store after `drain_to_cold`.
250    /// * `index`      - The agent's knowledge index for distillation output.
251    /// * `goal`       - The natural-language goal that was pursued.
252    /// * `avg_reward` - Mean reward across all steps in this episode.
253    ///
254    /// # Examples
255    ///
256    /// ```rust
257    /// use lmm_agent::cognition::learning::engine::LearningEngine;
258    /// use lmm_agent::cognition::learning::config::LearningConfig;
259    /// use lmm_agent::cognition::memory::ColdStore;
260    /// use lmm_agent::cognition::knowledge::KnowledgeIndex;
261    ///
262    /// let mut engine = LearningEngine::new(LearningConfig::default());
263    /// let cold = ColdStore::default();
264    /// let mut idx = KnowledgeIndex::new();
265    /// engine.end_of_episode(&cold, &mut idx, "test goal", 0.5);
266    /// assert_eq!(engine.episode_count(), 1);
267    /// ```
268    pub fn end_of_episode(
269        &mut self,
270        cold: &ColdStore,
271        index: &mut KnowledgeIndex,
272        goal: &str,
273        avg_reward: f64,
274    ) {
275        if self.config.is_mode_active(LearningMode::Distill) {
276            self.distiller.distill(cold, index);
277        }
278
279        if self.config.is_mode_active(LearningMode::MetaAdapt) {
280            let offsets = self.compute_episode_offsets();
281            self.meta.record_episode(goal, offsets, avg_reward);
282        }
283
284        if self.config.is_mode_active(LearningMode::Informal) {
285            self.informal.synthesise_into(index, 5, 0.3);
286        }
287
288        if self.config.is_mode_active(LearningMode::QTable) {
289            self.q_table.decay_epsilon();
290        }
291
292        self.experience_buffer.clear();
293        self.episode_count += 1;
294    }
295
296    /// Merges a remote [`AgentSnapshot`] into the local Q-table (federated step).
297    ///
298    /// Only active when [`LearningMode::Federated`] is enabled.
299    ///
300    /// # Examples
301    ///
302    /// ```rust
303    /// use lmm_agent::cognition::learning::engine::LearningEngine;
304    /// use lmm_agent::cognition::learning::config::LearningConfig;
305    /// use lmm_agent::cognition::learning::q_table::{ActionKey, QTable};
306    /// use lmm_agent::types::AgentSnapshot;
307    ///
308    /// let mut engine = LearningEngine::new(LearningConfig::default());
309    /// let remote_qt = QTable::new(0.1, 0.9, 0.0, 1.0, 0.0);
310    /// let snap = AgentSnapshot { agent_id: "remote".into(), q_table: remote_qt, total_reward: 1.0 };
311    /// engine.federate(&snap);
312    /// assert_eq!(engine.aggregator().merge_count, 1);
313    /// ```
314    pub fn federate(&mut self, snapshot: &AgentSnapshot) {
315        if self.config.is_mode_active(LearningMode::Federated) {
316            self.aggregator.merge(&mut self.q_table, snapshot);
317        }
318    }
319
320    /// Exports an [`AgentSnapshot`] for federated sharing with other agents.
321    ///
322    /// # Examples
323    ///
324    /// ```rust
325    /// use lmm_agent::cognition::learning::engine::LearningEngine;
326    /// use lmm_agent::cognition::learning::config::LearningConfig;
327    ///
328    /// let engine = LearningEngine::new(LearningConfig::default());
329    /// let snap = engine.export_snapshot("agent-1");
330    /// assert_eq!(snap.agent_id, "agent-1");
331    /// ```
332    pub fn export_snapshot(&self, agent_id: impl Into<String>) -> AgentSnapshot {
333        AgentSnapshot {
334            agent_id: agent_id.into(),
335            q_table: self.q_table.clone(),
336            total_reward: self.total_reward,
337        }
338    }
339
340    /// Returns a reference to the federated aggregator.
341    pub fn aggregator(&self) -> &FederatedAggregator {
342        &self.aggregator
343    }
344
345    /// Returns a reference to the elastic memory guard.
346    pub fn elastic(&self) -> &ElasticMemoryGuard {
347        &self.elastic
348    }
349
350    /// Returns a reference to the informal learner.
351    pub fn informal(&self) -> &InformalLearner {
352        &self.informal
353    }
354
355    /// Returns a reference to the meta-adapter.
356    pub fn meta(&self) -> &MetaAdapter {
357        &self.meta
358    }
359
360    /// Returns a reference to the knowledge distiller.
361    pub fn distiller(&self) -> &KnowledgeDistiller {
362        &self.distiller
363    }
364
365    fn compute_episode_offsets(&self) -> HashMap<ActionKey, f64> {
366        let mut totals: HashMap<ActionKey, (f64, usize)> = HashMap::new();
367        for xp in &self.experience_buffer {
368            let e = totals.entry(xp.action).or_insert((0.0, 0));
369            e.0 += xp.reward;
370            e.1 += 1;
371        }
372        totals
373            .into_iter()
374            .map(|(action, (total, count))| (action, total / count as f64))
375            .collect()
376    }
377}
378
379// Copyright 2026 Mahmoud Harmouch.
380//
381// Licensed under the MIT license
382// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
383// option. This file may not be copied, modified, or distributed
384// except according to those terms.