Struct relearn::torch::agents::critics::ValuesOptConfig

source · [−]

pub struct ValuesOptConfig<MB, OC = AdamConfig> {
    pub state_value_fn_config: MB,
    pub optimizer_config: OC,
    pub advantage_fn: AdvantageFn,
    pub target: StepValueTarget,
    pub opt_steps_per_update: u64,
    pub max_discount_factor: f64,
}

Expand description

Configuration for ValuesOpt

Fields

state_value_fn_config: MB

Configuration for the state value function module.

optimizer_config: OC

Configuration for the state value function module optimizer.

advantage_fn: AdvantageFn

Strategy for calculating advantage estimates given a state value function module.

target: StepValueTarget

Strategy for calculating state value target values.

The state value module is updated to minimize its mean-squared-error to these targets.

opt_steps_per_update: u64

Number of optimization steps per update.

Design Note

Could be called num_epochs by analogy to supervised learning as the number of passes through the dataset in which the dataset is collected experience since the last agent update. However, the term “epoch” is used inconsistently in reinforcement learning, sometimes referring to an iteration of the collect-data-then-update-agent loop.

max_discount_factor: f64

Upper bound on the environment discount factor.

Effectively sets a maximum horizon on the number of steps of future reward considered. Low values bias the value estimates but reduce variance.

Struct relearn::torch::agents::critics::ValuesOptConfig

Fields

Design Note

Trait Implementations

impl<MB, OC> BuildCritic for ValuesOptConfig<MB, OC> where MB: BuildModule, MB::Module: SeqPacked, OC: BuildOptimizer, OC::Optimizer: Optimizer,

type Critic = ValuesOpt<<MB as BuildModule>::Module, <OC as BuildOptimizer>::Optimizer>

fn build_critic( &self, in_dim: usize, discount_factor: f64, device: Device) -> Self::Critic

impl<MB: Clone, OC: Clone> Clone for ValuesOptConfig<MB, OC>

fn clone(&self) -> ValuesOptConfig<MB, OC>

fn clone_from(&mut self, source: &Self)

impl<MB: Debug, OC: Debug> Debug for ValuesOptConfig<MB, OC>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<MB: Default, OC: Default> Default for ValuesOptConfig<MB, OC>

fn default() -> Self

impl<'de, MB, OC> Deserialize<'de> for ValuesOptConfig<MB, OC> where MB: Deserialize<'de>, OC: Deserialize<'de>,

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where __D: Deserializer<'de>,

impl<MB: PartialEq, OC: PartialEq> PartialEq<ValuesOptConfig<MB, OC>> for ValuesOptConfig<MB, OC>

fn eq(&self, other: &ValuesOptConfig<MB, OC>) -> bool

fn ne(&self, other: &ValuesOptConfig<MB, OC>) -> bool

impl<MB, OC> Serialize for ValuesOptConfig<MB, OC> where MB: Serialize, OC: Serialize,

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error> where __S: Serializer,

impl<MB: Copy, OC: Copy> Copy for ValuesOptConfig<MB, OC>

impl<MB, OC> StructuralPartialEq for ValuesOptConfig<MB, OC>

Auto Trait Implementations

impl<MB, OC> RefUnwindSafe for ValuesOptConfig<MB, OC> where MB: RefUnwindSafe, OC: RefUnwindSafe,

impl<MB, OC> Send for ValuesOptConfig<MB, OC> where MB: Send, OC: Send,

impl<MB, OC> Sync for ValuesOptConfig<MB, OC> where MB: Sync, OC: Sync,

impl<MB, OC> Unpin for ValuesOptConfig<MB, OC> where MB: Unpin, OC: Unpin,

impl<MB, OC> UnwindSafe for ValuesOptConfig<MB, OC> where MB: UnwindSafe, OC: UnwindSafe,

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> AsAny for T where T: Any,

fn as_any(&self) -> &(dyn Any + 'static)

impl<T> Borrow<T> for T where T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for T where U: From<T>,

fn into(self) -> U

impl<T> Pointable for T

const ALIGN: usize = mem::align_of::<T>()

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> Same<T> for T

type Output = T

impl<T> ToOwned for T where T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for T where U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for T where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for T where V: MultiLane<T>,

fn vzip(self) -> V

impl<T> DeserializeOwned for T where T: for<'de> Deserialize<'de>,

impl<MB, OC> BuildCritic for ValuesOptConfig<MB, OC> where
MB: BuildModule,
MB::Module: SeqPacked,
OC: BuildOptimizer,
OC::Optimizer: Optimizer,

fn build_critic(
&self,
in_dim: usize,
discount_factor: f64,
device: Device
) -> Self::Critic

impl<'de, MB, OC> Deserialize<'de> for ValuesOptConfig<MB, OC> where
MB: Deserialize<'de>,
OC: Deserialize<'de>,

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where
__D: Deserializer<'de>,

impl<MB, OC> Serialize for ValuesOptConfig<MB, OC> where
MB: Serialize,
OC: Serialize,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where
S: Serializer,

impl<MB, OC> RefUnwindSafe for ValuesOptConfig<MB, OC> where
MB: RefUnwindSafe,
OC: RefUnwindSafe,

impl<MB, OC> Send for ValuesOptConfig<MB, OC> where
MB: Send,
OC: Send,

impl<MB, OC> Sync for ValuesOptConfig<MB, OC> where
MB: Sync,
OC: Sync,

impl<MB, OC> Unpin for ValuesOptConfig<MB, OC> where
MB: Unpin,
OC: Unpin,

impl<MB, OC> UnwindSafe for ValuesOptConfig<MB, OC> where
MB: UnwindSafe,
OC: UnwindSafe,

impl<T> Any for T where
T: 'static + ?Sized,

impl<T> AsAny for T where
T: Any,

impl<T> Borrow<T> for T where
T: ?Sized,

impl<T> BorrowMut<T> for T where
T: ?Sized,

impl<T, U> Into<U> for T where
U: From<T>,

impl<T> ToOwned for T where
T: Clone,

impl<T, U> TryFrom<U> for T where
U: Into<T>,

impl<T, U> TryInto<U> for T where
U: TryFrom<T>,

impl<V, T> VZip<V> for T where
V: MultiLane<T>,

impl<T> DeserializeOwned for T where
T: for<'de> Deserialize<'de>,