pub struct DqnTrainer<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>>{ /* private fields */ }Expand description
The imperative training driver for DQN.
A thin convenience wrapper around TrainingSession that adds an
environment and drives the training loop. Use TrainingSession directly
when your loop is owned externally (e.g. Bevy’s ECS).
§Usage – iterator style
ⓘ
let mut trainer = DqnTrainer::new(env, agent);
for step in trainer.steps().take(50_000) {
if step.episode_done {
println!("Episode {} reward: {}", step.episode, step.episode_reward);
}
}§Usage – imperative style with run tracking
ⓘ
let mut trainer = DqnTrainer::new(env, agent)
.with_run(TrainingRun::create("cartpole", "v1")?)
.with_max_steps(200_000);
trainer.train();
let report = trainer.eval(20);
report.print();Implementations§
Source§impl<E, Enc, Act, B, Buf> DqnTrainer<E, Enc, Act, B, Buf>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Buf: ReplayBuffer<E::Observation, E::Action>,
impl<E, Enc, Act, B, Buf> DqnTrainer<E, Enc, Act, B, Buf>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Buf: ReplayBuffer<E::Observation, E::Action>,
pub fn new(env: E, agent: DqnAgent<E, Enc, Act, B, Buf>) -> Self
Sourcepub fn with_run(self, run: TrainingRun) -> Self
pub fn with_run(self, run: TrainingRun) -> Self
Attach a TrainingRun for checkpoint saving and JSONL logging.
Sourcepub fn with_max_steps(self, n: usize) -> Self
pub fn with_max_steps(self, n: usize) -> Self
Maximum training steps. train() stops when this is reached.
Sourcepub fn with_checkpoint_freq(self, freq: usize) -> Self
pub fn with_checkpoint_freq(self, freq: usize) -> Self
Checkpoint frequency in steps. Default: 10_000.
Sourcepub fn with_keep_checkpoints(self, keep: usize) -> Self
pub fn with_keep_checkpoints(self, keep: usize) -> Self
Number of numbered checkpoints to keep on disk. Default: 3.
Sourcepub fn with_stats(self, stats: StatsTracker) -> Self
pub fn with_stats(self, stats: StatsTracker) -> Self
Replace the default stats tracker.
Sourcepub fn steps(&mut self) -> TrainIter<'_, E, Enc, Act, B, Buf> ⓘ
pub fn steps(&mut self) -> TrainIter<'_, E, Enc, Act, B, Buf> ⓘ
Returns an iterator that yields StepMetrics after each environment step.
Sourcepub fn eval(&mut self, n_episodes: usize) -> EvalReport
pub fn eval(&mut self, n_episodes: usize) -> EvalReport
Run n_episodes of greedy evaluation and return an EvalReport.
Sourcepub fn session(&self) -> &TrainingSession<E, DqnAgent<E, Enc, Act, B, Buf>>
pub fn session(&self) -> &TrainingSession<E, DqnAgent<E, Enc, Act, B, Buf>>
Read-only access to the underlying session.
Sourcepub fn into_agent(self) -> DqnAgent<E, Enc, Act, B, Buf>
pub fn into_agent(self) -> DqnAgent<E, Enc, Act, B, Buf>
Consume the trainer and return the inner agent.
Auto Trait Implementations§
impl<E, Enc, Act, B, Buf> Freeze for DqnTrainer<E, Enc, Act, B, Buf>where
E: Freeze,
<E as Environment>::Observation: Freeze,
Buf: Freeze,
Enc: Freeze,
Act: Freeze,
<B as Backend>::Device: Freeze,
impl<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>> !RefUnwindSafe for DqnTrainer<E, Enc, Act, B, Buf>
impl<E, Enc, Act, B, Buf> Send for DqnTrainer<E, Enc, Act, B, Buf>
impl<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>> !Sync for DqnTrainer<E, Enc, Act, B, Buf>
impl<E, Enc, Act, B, Buf> Unpin for DqnTrainer<E, Enc, Act, B, Buf>where
E: Unpin,
<E as Environment>::Observation: Unpin,
Buf: Unpin,
Enc: Unpin,
Act: Unpin,
<B as Backend>::Device: Unpin,
<B as Backend>::FloatTensorPrimitive: Unpin,
<B as Backend>::QuantizedTensorPrimitive: Unpin,
<<B as AutodiffBackend>::InnerBackend as Backend>::FloatTensorPrimitive: Unpin,
<<B as AutodiffBackend>::InnerBackend as Backend>::QuantizedTensorPrimitive: Unpin,
impl<E, Enc, Act, B, Buf> UnsafeUnpin for DqnTrainer<E, Enc, Act, B, Buf>where
E: UnsafeUnpin,
<E as Environment>::Observation: UnsafeUnpin,
Buf: UnsafeUnpin,
Enc: UnsafeUnpin,
Act: UnsafeUnpin,
<B as Backend>::Device: UnsafeUnpin,
impl<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>> !UnwindSafe for DqnTrainer<E, Enc, Act, B, Buf>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more