use crate::{Differentiable, Enumerable, Function, OutputOf, Shared};
use ndarray::Array2;
use rand::{thread_rng, Rng};
mod greedy;
mod random;
mod epsilon_greedy;
pub use self::greedy::Greedy;
pub use self::random::Random;
pub use self::epsilon_greedy::EpsilonGreedy;
mod beta;
mod gaussian;
mod softmax;
pub use self::beta::Beta;
pub use self::gaussian::Gaussian;
pub use self::softmax::{Gibbs, Softmax};
mod ipp;
mod point;
pub use self::ipp::IPP;
pub use self::point::Point;
#[allow(dead_code)]
#[inline]
pub(self) fn sample_probs(probabilities: &[f64]) -> usize {
sample_probs_with_rng(&mut thread_rng(), probabilities)
}
#[inline]
pub(self) fn sample_probs_with_rng<R: Rng + ?Sized>(rng: &mut R, probabilities: &[f64]) -> usize {
let r = rng.gen::<f64>();
match probabilities
.into_iter()
.scan(0.0, |state, &p| {
*state = *state + p;
Some(*state)
})
.position(|p| p > r)
{
Some(index) => index,
None => probabilities.len() - 1,
}
}
pub trait Policy<S>:
Function<(S, <Self as Policy<S>>::Action), Output = f64>
+ for<'a> Function<(S, &'a <Self as Policy<S>>::Action), Output = f64>
{
type Action: Sized;
fn sample<R: Rng + ?Sized>(&self, rng: &mut R, state: S) -> Self::Action;
fn mode(&self, state: S) -> Self::Action;
}
impl<S, T: Policy<S>> Policy<S> for Shared<T> {
type Action = T::Action;
fn sample<R: Rng + ?Sized>(&self, rng: &mut R, state: S) -> Self::Action {
self.borrow().sample(rng, state)
}
fn mode(&self, state: S) -> Self::Action { self.borrow().mode(state) }
}
pub trait EnumerablePolicy<S>: Policy<S, Action = usize> + Enumerable<(S,)>
where
OutputOf<Self, (S,)>: std::ops::Index<usize, Output = f64> + IntoIterator<Item = f64>,
<OutputOf<Self, (S,)> as IntoIterator>::IntoIter: ExactSizeIterator,
{
}
impl<S, P> EnumerablePolicy<S> for P
where
P: Policy<S, Action = usize> + Enumerable<(S,)>,
OutputOf<Self, (S,)>: std::ops::Index<usize, Output = f64> + IntoIterator<Item = f64>,
<OutputOf<Self, (S,)> as IntoIterator>::IntoIter: ExactSizeIterator,
{
}
pub trait DifferentiablePolicy<S>:
Policy<S>
+ Differentiable<(S, <Self as Policy<S>>::Action), Jacobian = Array2<f64>>
+ for<'a> Differentiable<(S, &'a <Self as Policy<S>>::Action), Jacobian = Array2<f64>>
{
}
impl<S, P> DifferentiablePolicy<S> for P where P: Policy<S>
+ Differentiable<(S, <Self as Policy<S>>::Action), Jacobian = Array2<f64>>
+ for<'a> Differentiable<(S, &'a <Self as Policy<S>>::Action), Jacobian = Array2<f64>>
{
}