rl_traits/agent.rs
1use crate::environment::Environment;
2use crate::experience::Experience;
3use crate::policy::Policy;
4
5/// An agent that can act in an environment and learn from experience.
6///
7/// An `Agent` is a `Policy` that also knows how to update itself from
8/// batches of experience. The update logic is algorithm-specific and
9/// lives in ember-rl — this trait defines only the interface.
10///
11/// # Separation of concerns
12///
13/// - **`Policy`**: pure observation → action mapping. Stateless from the
14/// caller's perspective (though the implementation may be stateful internally).
15///
16/// - **`Agent`**: owns a policy and can improve it. Has mutable state.
17///
18/// Training loops in ember-rl work entirely in terms of `Agent<E>`:
19/// collect experience from `E`, push to a buffer, call `update()`.
20///
21/// # Generic over the environment
22///
23/// `Agent<E: Environment>` binds the agent to a specific environment's
24/// observation and action types. This prevents accidentally feeding a
25/// CartPole observation to a MuJoCo agent at compile time, not runtime.
26pub trait Agent<E: Environment>: Policy<E::Observation, E::Action> {
27 /// Update the agent's parameters from a batch of experience.
28 ///
29 /// The batch may be a random sample from a replay buffer (off-policy),
30 /// or a full trajectory (on-policy). The agent decides how to use it.
31 ///
32 /// This is called once per training step in the outer loop.
33 fn update(&mut self, batch: &[Experience<E::Observation, E::Action>]);
34}