pub struct ValuesOptConfig<MB, OC = AdamConfig> {
pub state_value_fn_config: MB,
pub optimizer_config: OC,
pub advantage_fn: AdvantageFn,
pub target: StepValueTarget,
pub opt_steps_per_update: u64,
pub max_discount_factor: f64,
}
Expand description
Configuration for ValuesOpt
Fields
state_value_fn_config: MB
Configuration for the state value function module.
optimizer_config: OC
Configuration for the state value function module optimizer.
advantage_fn: AdvantageFn
Strategy for calculating advantage estimates given a state value function module.
target: StepValueTarget
Strategy for calculating state value target values.
The state value module is updated to minimize its mean-squared-error to these targets.
opt_steps_per_update: u64
Number of optimization steps per update.
Design Note
Could be called num_epochs
by analogy to supervised learning as the number of passes
through the dataset in which the dataset is collected experience since the last agent
update. However, the term “epoch” is used inconsistently in reinforcement learning,
sometimes referring to an iteration of the collect-data-then-update-agent loop.
max_discount_factor: f64
Upper bound on the environment discount factor.
Effectively sets a maximum horizon on the number of steps of future reward considered. Low values bias the value estimates but reduce variance.
Trait Implementations
sourceimpl<MB, OC> BuildCritic for ValuesOptConfig<MB, OC> where
MB: BuildModule,
MB::Module: SeqPacked,
OC: BuildOptimizer,
OC::Optimizer: Optimizer,
impl<MB, OC> BuildCritic for ValuesOptConfig<MB, OC> where
MB: BuildModule,
MB::Module: SeqPacked,
OC: BuildOptimizer,
OC::Optimizer: Optimizer,
type Critic = ValuesOpt<<MB as BuildModule>::Module, <OC as BuildOptimizer>::Optimizer>
fn build_critic(
&self,
in_dim: usize,
discount_factor: f64,
device: Device
) -> Self::Critic
sourceimpl<MB: Clone, OC: Clone> Clone for ValuesOptConfig<MB, OC>
impl<MB: Clone, OC: Clone> Clone for ValuesOptConfig<MB, OC>
sourcefn clone(&self) -> ValuesOptConfig<MB, OC>
fn clone(&self) -> ValuesOptConfig<MB, OC>
Returns a copy of the value. Read more
1.0.0 · sourcefn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from source
. Read more
sourceimpl<MB: Debug, OC: Debug> Debug for ValuesOptConfig<MB, OC>
impl<MB: Debug, OC: Debug> Debug for ValuesOptConfig<MB, OC>
sourceimpl<MB: Default, OC: Default> Default for ValuesOptConfig<MB, OC>
impl<MB: Default, OC: Default> Default for ValuesOptConfig<MB, OC>
sourceimpl<'de, MB, OC> Deserialize<'de> for ValuesOptConfig<MB, OC> where
MB: Deserialize<'de>,
OC: Deserialize<'de>,
impl<'de, MB, OC> Deserialize<'de> for ValuesOptConfig<MB, OC> where
MB: Deserialize<'de>,
OC: Deserialize<'de>,
sourcefn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
sourceimpl<MB: PartialEq, OC: PartialEq> PartialEq<ValuesOptConfig<MB, OC>> for ValuesOptConfig<MB, OC>
impl<MB: PartialEq, OC: PartialEq> PartialEq<ValuesOptConfig<MB, OC>> for ValuesOptConfig<MB, OC>
sourcefn eq(&self, other: &ValuesOptConfig<MB, OC>) -> bool
fn eq(&self, other: &ValuesOptConfig<MB, OC>) -> bool
This method tests for self
and other
values to be equal, and is used
by ==
. Read more
sourcefn ne(&self, other: &ValuesOptConfig<MB, OC>) -> bool
fn ne(&self, other: &ValuesOptConfig<MB, OC>) -> bool
This method tests for !=
.
sourceimpl<MB, OC> Serialize for ValuesOptConfig<MB, OC> where
MB: Serialize,
OC: Serialize,
impl<MB, OC> Serialize for ValuesOptConfig<MB, OC> where
MB: Serialize,
OC: Serialize,
impl<MB: Copy, OC: Copy> Copy for ValuesOptConfig<MB, OC>
impl<MB, OC> StructuralPartialEq for ValuesOptConfig<MB, OC>
Auto Trait Implementations
impl<MB, OC> RefUnwindSafe for ValuesOptConfig<MB, OC> where
MB: RefUnwindSafe,
OC: RefUnwindSafe,
impl<MB, OC> Send for ValuesOptConfig<MB, OC> where
MB: Send,
OC: Send,
impl<MB, OC> Sync for ValuesOptConfig<MB, OC> where
MB: Sync,
OC: Sync,
impl<MB, OC> Unpin for ValuesOptConfig<MB, OC> where
MB: Unpin,
OC: Unpin,
impl<MB, OC> UnwindSafe for ValuesOptConfig<MB, OC> where
MB: UnwindSafe,
OC: UnwindSafe,
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more