Trait border::core::base::Policy [−][src]
Represents a policy. on an environment. It is based on a mapping from an observation to an action. The mapping can be either of deterministic or stochastic.
Required methods
Loading content...Implementors
impl<E, M, O, A> Policy<E> for DQN<E, M, O, A> where
E: Env,
M: Model1<Input = Tensor, Output = Tensor> + Clone,
E::Obs: Into<M::Input>,
E::Act: From<Tensor>,
O: TchBuffer<Item = E::Obs, SubBatch = M::Input>,
A: TchBuffer<Item = E::Act, SubBatch = Tensor>, [src]
E: Env,
M: Model1<Input = Tensor, Output = Tensor> + Clone,
E::Obs: Into<M::Input>,
E::Act: From<Tensor>,
O: TchBuffer<Item = E::Obs, SubBatch = M::Input>,
A: TchBuffer<Item = E::Act, SubBatch = Tensor>,
impl<E, M, O, A> Policy<E> for PGDiscrete<E, M, O, A> where
E: Env,
M: Model1<Input = Tensor, Output = Tensor>,
E::Obs: Into<M::Input> + Clone,
E::Act: From<Tensor>,
O: TchBuffer<Item = E::Obs, SubBatch = M::Input>,
A: TchBuffer<Item = E::Act, SubBatch = Tensor>, [src]
E: Env,
M: Model1<Input = Tensor, Output = Tensor>,
E::Obs: Into<M::Input> + Clone,
E::Act: From<Tensor>,
O: TchBuffer<Item = E::Obs, SubBatch = M::Input>,
A: TchBuffer<Item = E::Act, SubBatch = Tensor>,
impl<E, M, O, A> Policy<E> for PPODiscrete<E, M, O, A> where
E: Env,
M: Model1<Input = Tensor, Output = (Tensor, Tensor)>,
E::Obs: Into<M::Input> + Clone,
E::Act: From<Tensor>,
O: TchBuffer<Item = E::Obs, SubBatch = M::Input>,
A: TchBuffer<Item = E::Act, SubBatch = Tensor>, [src]
E: Env,
M: Model1<Input = Tensor, Output = (Tensor, Tensor)>,
E::Obs: Into<M::Input> + Clone,
E::Act: From<Tensor>,
O: TchBuffer<Item = E::Obs, SubBatch = M::Input>,
A: TchBuffer<Item = E::Act, SubBatch = Tensor>,
impl<E, Q, P, O, A> Policy<E> for DDPG<E, Q, P, O, A> where
E: Env,
P: Model1<Output = A::SubBatch> + Clone,
E::Obs: Into<O::SubBatch>,
E::Act: From<Tensor>,
O: TchBuffer<Item = E::Obs, SubBatch = P::Input>,
A: TchBuffer<Item = E::Act, SubBatch = Tensor>, [src]
E: Env,
P: Model1<Output = A::SubBatch> + Clone,
E::Obs: Into<O::SubBatch>,
E::Act: From<Tensor>,
O: TchBuffer<Item = E::Obs, SubBatch = P::Input>,
A: TchBuffer<Item = E::Act, SubBatch = Tensor>,
impl<E, Q, P, O, A> Policy<E> for SAC<E, Q, P, O, A> where
E: Env,
Q: Model2<Input1 = O::SubBatch, Input2 = A::SubBatch, Output = Tensor> + Clone,
P: Model1<Input = Tensor, Output = (Tensor, Tensor)> + Clone,
E::Obs: Into<O::SubBatch>,
E::Act: From<Tensor>,
O: TchBuffer<Item = E::Obs, SubBatch = P::Input>,
A: TchBuffer<Item = E::Act, SubBatch = Tensor>, [src]
E: Env,
Q: Model2<Input1 = O::SubBatch, Input2 = A::SubBatch, Output = Tensor> + Clone,
P: Model1<Input = Tensor, Output = (Tensor, Tensor)> + Clone,
E::Obs: Into<O::SubBatch>,
E::Act: From<Tensor>,
O: TchBuffer<Item = E::Obs, SubBatch = P::Input>,
A: TchBuffer<Item = E::Act, SubBatch = Tensor>,