//! # Circuit
//!
//! A high performance implementation of OpenAI's Gym
//! for reinforcement learning.
//!
//! ```
//! let x = 3;
//!
//! assert_eq!(x, 3)
//! ```
//!
//!
/// And here is just a module
pub mod env {
use crate::space::Space;
pub struct State<OSample>{
pub observation: OSample,
pub reward: f64,
pub done: bool
}
pub trait Environment<Action, ASample, Observation, OSample>
where Action: Space<ASample>,
Observation: Space<OSample> {
/// Reset indicates that all environment state
/// should be wiped and re-initialized. Analogous
/// to starting a new game. Returns an observation
/// from the newly initialized env.
fn reset(&mut self) -> OSample;
/// Step takes an agent action and (probably) mutates
/// itself; returns State to the agent.
fn step(&mut self, action: ASample) -> State<OSample>;
}
pub mod rock_paper_scissors {
use crate::space::{Discrete, Space};
use super::Environment;
use super::State;
/// A simple two-player game, see
/// https://en.wikipedia.org/wiki/Rock_paper_scissors
pub struct RockPaperScissors {
pub rounds: u8,
pub plays: Vec<(Option<u32>, Option<u32>)>,
pub action_space: Discrete,
pub observation_space: Discrete
}
impl RockPaperScissors {
/// Computer plays its turn
fn play(&mut self) -> () {
let action = self.action_space.sample();
println!("Computer plays action {}", action.clone());
self.plays.push((Some(action), None));
}
/// The default game is best out of 3,
/// where possible action space is Rock, Paper, or Scissors.
/// The observation space is the opponents last play
pub fn default() -> RockPaperScissors {
RockPaperScissors {
rounds: 3,
plays: Vec::new(),
action_space: Discrete(3),
observation_space: Discrete(4)
}
}
}
///
/// # Examples
///
/// ```rust
/// use coliseum::env;
/// use env::{Environment, State};
/// use env::rock_paper_scissors::RockPaperScissors;
///
/// let mut game = RockPaperScissors::default();
/// game.reset();
///
/// // our player loves rock
/// let agent = || 1;
///
/// loop {
/// let State { reward, done, .. } = game.step(agent());
///
/// if done {
/// break;
/// }
/// }
///
/// ```
///
impl Environment<Discrete, u32, Discrete, u32> for RockPaperScissors {
///
///
fn reset(&mut self) -> u32 {
// computer plays first, chooses at random
self.plays = Vec::new();
self.play();
// 0 means last play is undefined
0
}
fn step(&mut self, action: u32) -> State<u32> {
let turn = self.plays.pop().unwrap();
// Scissors = 0
// Rock = 1
// Paper = 2
//
// Scissors > Paper
// Rock > Scissors
// Paper > Rock
let computer = turn.0.unwrap();
let reward = match (computer, action) {
(0, 0) => 0,
(0, 1) => 1,
(0, 2) => -1,
(1, 0) => -1,
(1, 1) => 0,
(1, 2) => 1,
(2, 0) => 1,
(2, 1) => -1,
(2, 2) => 0,
_ => panic!("Players entered undefined inputs")
};
// append this turn
self.plays.push((Some(computer), Some(action)));
println!("Game has gone for {}/{} rounds", self.plays.len(), self.rounds);
// start the next round
self.play();
State{
observation: computer,
reward: reward as f64,
done: self.plays.len() as u8 == self.rounds
}
}
}
}
}
pub mod space {
use rand::rngs::SmallRng;
use rand::{thread_rng, SeedableRng, Rng};
use ndarray_rand::RandomExt;
use ndarray::{Array, Dimension, ShapeBuilder};
use ndarray_rand::rand_distr::Uniform;
pub trait Space<Sample> {
fn sample(self) -> Sample;
fn contains(self, sample: Sample) -> bool;
}
/// The Discrete space allows a fixed range of non-negative numbers,
/// so in this case valid actions are either 0 or 1.
///
/// # Examples
///
/// Discrete contains actions 0..Discrete(N-1)
///
/// ```rust
/// use coliseum::space::{Discrete, Space};
///
/// let N = 10;
/// let discrete = Discrete(N);
///
/// // what's the idiomatic way to avoid cloning here?
/// assert!(discrete.clone().sample() < 10);
/// assert!(!discrete.clone().contains(10));
/// ```
///
/// We can represent N*2 states for Discrete{N}
///
/// ```rust
/// use coliseum::space::{Discrete, Space};
/// let discrete = Discrete(3);
///
/// let sample: (u32, u32, u32) = match discrete.sample() {
/// 0 => (1, 0, 0),
/// 1 => (0, 1, 0),
/// 2 => (0, 0, 1),
/// _ => panic!("This is out of bounds!")
/// };
///
/// assert_eq!(sample.0 + sample.1 + sample.2, 1)
///
/// ```
///
#[derive(Copy, Clone)]
pub struct Discrete(pub u32);
impl Space<u32> for Discrete {
/// Draws a random item from [0, u32]
fn sample(self) -> u32 {
rand::thread_rng().gen_range(0, self.0)
}
/// Checks if the sample is part of the set
fn contains(self, sample: u32) -> bool {
sample < self.0
}
}
/// A (possibly unbounded) box in R^n. Specifically, a Box represents the
/// Cartesian product of n closed intervals. Each interval has the form of one
/// of [a, b], (-oo, b], [a, oo), or (-oo, oo). E.g. low = [-oo, -oo], high=[oo,oo]
/// is a 2D Cartesian plane
///
/// # Examples
///
///
pub struct Box<D: Dimension, Shape: ShapeBuilder<Dim=D>>
{
low: f64,
high: f64,
shape: Shape,
}
impl<D, Shape> Space<ndarray::ArrayBase<ndarray::OwnedRepr<f64>, D>> for Box<D, Shape>
where
D: Dimension,
Shape: ShapeBuilder<Dim=D>
{
// https://github.com/openai/gym/blob/master/gym/spaces/box.py#L83
// sample = np.empty(self.shape)
/// Samples using a uniform distribution with self.low and self.high.
/// We should be able to return a different distribution based
/// on the bounds in Box, e.g. Gym uses:
///
/// ```python
/// unbounded = ~self.bounded_below & ~self.bounded_above
/// upp_bounded = ~self.bounded_below & self.bounded_above
/// low_bounded = self.bounded_below & ~self.bounded_above
/// bounded = self.bounded_below & self.bounded_above
///
/// #### Vectorized sampling by interval type
///
/// sample[unbounded] = self.np_random.normal(
/// size=unbounded[unbounded].shape)
///
/// sample[low_bounded] = self.np_random.exponential(
/// size=low_bounded[low_bounded].shape) + self.low[low_bounded]
/// ```
/// ....
fn sample(self) -> ndarray::ArrayBase<ndarray::OwnedRepr<f64>, D>
{
let distribution = Uniform::new(self.low, self.high);
let mut rng = SmallRng::from_rng(thread_rng())
.expect("create SmallRng from thread_rng failed");
Array::random_using(self.shape, distribution, &mut rng)
}
/// Whether the sample exists in the Box
fn contains(self, sample: Array<f64, D>) -> bool {
let max_b = sample.iter().max_by(|a, b| a.partial_cmp(b)
.expect("Tried to compare a NaN")).unwrap() <= &self.high;
let min_b = sample.iter().min_by(|a, b| a.partial_cmp(b)
.expect("Tried to compare a NaN")).unwrap() >= &self.low;
max_b && min_b
}
}
// impl<Shape, D> Box<Shape, D>
// where Shape: ShapeBuilder<Dim=D>,
// D: Dimension
// {
// /// Create a box with the given lower, upper bounds.
// /// Array index i corresponds to dimension d, i.e.
// /// low = [-∞, -∞], high = [∞, ∞] is a 2D Cartesian plane
// fn new(low: f64, high: f64, shape: Shape) -> Box<Shape, D> {
// Box { low, high, shape }
// }
// }
}
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
}
}