coliseum 0.1.0 - Docs.rs

//! # Circuit
//!
//! A high performance implementation of OpenAI's Gym
//! for reinforcement learning.
//!
//! ```
//! let x = 3;
//!
//! assert_eq!(x, 3)
//! ```
//!
//!
/// And here is just a module
pub mod env {

    use crate::space::Space;


    pub struct State<OSample>{
        pub observation: OSample,
        pub reward: f64,
        pub done: bool
    }

    pub trait Environment<Action, ASample, Observation, OSample>
        where Action: Space<ASample>,
              Observation: Space<OSample> {

        /// Reset indicates that all environment state
        /// should be wiped and re-initialized. Analogous
        /// to starting a new game. Returns an observation
        /// from the newly initialized env.
        fn reset(&mut self) -> OSample;


        /// Step takes an agent action and (probably) mutates
        /// itself; returns State to the agent.
        fn step(&mut self, action: ASample) -> State<OSample>;
    }

    pub mod rock_paper_scissors {

        use crate::space::{Discrete, Space};
        use super::Environment;
        use super::State;

        /// A simple two-player game, see
        /// https://en.wikipedia.org/wiki/Rock_paper_scissors
        pub struct RockPaperScissors {
            pub rounds: u8,
            pub plays: Vec<(Option<u32>, Option<u32>)>,
            pub action_space: Discrete,
            pub observation_space: Discrete
        }

        impl RockPaperScissors {
            /// Computer plays its turn
            fn play(&mut self) -> () {
                let action = self.action_space.sample();
                println!("Computer plays action {}", action.clone());
                self.plays.push((Some(action), None));
            }

            /// The default game is best out of 3,
            /// where possible action space is Rock, Paper, or Scissors.
            /// The observation space is the opponents last play
            pub fn default() -> RockPaperScissors {
                RockPaperScissors {
                    rounds: 3,
                    plays: Vec::new(),
                    action_space: Discrete(3),
                    observation_space: Discrete(4)
                }
            }
        }

        ///
        /// # Examples
        ///
        /// ```rust
        /// use coliseum::env;
        /// use env::{Environment, State};
        /// use env::rock_paper_scissors::RockPaperScissors;
        ///
        /// let mut game = RockPaperScissors::default();
        /// game.reset();
        ///
        /// // our player loves rock
        /// let agent = || 1;
        ///
        /// loop {
        ///     let State { reward, done, .. } = game.step(agent());
        ///
        ///     if done {
        ///         break;
        ///     }
        /// }
        ///
        /// ```
        ///
        impl Environment<Discrete, u32, Discrete, u32> for RockPaperScissors {
            ///
            ///
            fn reset(&mut self) -> u32 {
                // computer plays first, chooses at random
                self.plays = Vec::new();
                self.play();
                // 0 means last play is undefined
                0
            }

            fn step(&mut self, action: u32) -> State<u32> {
                let turn = self.plays.pop().unwrap();
                // Scissors = 0
                // Rock = 1
                // Paper = 2
                //
                // Scissors > Paper
                // Rock > Scissors
                // Paper > Rock
                let computer = turn.0.unwrap();

                let reward = match (computer, action) {
                    (0, 0) => 0,
                    (0, 1) => 1,
                    (0, 2) => -1,
                    (1, 0) => -1,
                    (1, 1) => 0,
                    (1, 2) => 1,
                    (2, 0) => 1,
                    (2, 1) => -1,
                    (2, 2) => 0,
                    _ => panic!("Players entered undefined inputs")
                };

                // append this turn
                self.plays.push((Some(computer), Some(action)));
                println!("Game has gone for {}/{} rounds", self.plays.len(), self.rounds);

                // start the next round
                self.play();

                State{
                    observation: computer,
                    reward: reward as f64,
                    done: self.plays.len() as u8 == self.rounds
                }
            }
        }

    }

}

pub mod space {

    use rand::rngs::SmallRng;
    use rand::{thread_rng, SeedableRng, Rng};
    use ndarray_rand::RandomExt;
    use ndarray::{Array, Dimension, ShapeBuilder};
    use ndarray_rand::rand_distr::Uniform;


    pub trait Space<Sample> {
        fn sample(self) -> Sample;
        fn contains(self, sample: Sample) -> bool;
    }

    /// The Discrete space allows a fixed range of non-negative numbers,
    /// so in this case valid actions are either 0 or 1.
    ///
    /// # Examples
    ///
    /// Discrete contains actions 0..Discrete(N-1)
    ///
    /// ```rust
    /// use coliseum::space::{Discrete, Space};
    ///
    /// let N = 10;
    /// let discrete = Discrete(N);
    ///
    /// // what's the idiomatic way to avoid cloning here?
    /// assert!(discrete.clone().sample() < 10);
    /// assert!(!discrete.clone().contains(10));
    /// ```
    ///
    /// We can represent N*2 states for Discrete{N}
    ///
    /// ```rust
    /// use coliseum::space::{Discrete, Space};
    /// let discrete = Discrete(3);
    ///
    /// let sample: (u32, u32, u32) = match discrete.sample() {
    ///     0 => (1, 0, 0),
    ///     1 => (0, 1, 0),
    ///     2 => (0, 0, 1),
    ///     _ => panic!("This is out of bounds!")
    /// };
    ///
    /// assert_eq!(sample.0 + sample.1 + sample.2, 1)
    ///
    /// ```
    ///
    #[derive(Copy, Clone)]
    pub struct Discrete(pub u32);


    impl Space<u32> for Discrete {
        /// Draws a random item from [0, u32]
        fn sample(self) -> u32 {
            rand::thread_rng().gen_range(0, self.0)
        }

        /// Checks if the sample is part of the set
        fn contains(self, sample: u32) -> bool {
            sample < self.0
        }
    }


    /// A (possibly unbounded) box in R^n. Specifically, a Box represents the
    /// Cartesian product of n closed intervals. Each interval has the form of one
    /// of [a, b], (-oo, b], [a, oo), or (-oo, oo). E.g. low = [-oo, -oo], high=[oo,oo]
    /// is a 2D Cartesian plane
    ///
    /// # Examples
    ///
    ///
    pub struct Box<D: Dimension, Shape: ShapeBuilder<Dim=D>>
    {
        low: f64,
        high: f64,
        shape: Shape,
    }


    impl<D, Shape> Space<ndarray::ArrayBase<ndarray::OwnedRepr<f64>, D>> for Box<D, Shape>
        where
            D: Dimension,
            Shape: ShapeBuilder<Dim=D>
    {

        // https://github.com/openai/gym/blob/master/gym/spaces/box.py#L83

        // sample = np.empty(self.shape)


        /// Samples using a uniform distribution with self.low and self.high.
        /// We should be able to return a different distribution based
        /// on the bounds in Box, e.g. Gym uses:
        ///
        /// ```python
        /// unbounded   = ~self.bounded_below & ~self.bounded_above
        /// upp_bounded = ~self.bounded_below &  self.bounded_above
        /// low_bounded =  self.bounded_below & ~self.bounded_above
        /// bounded     =  self.bounded_below &  self.bounded_above
        ///
        /// #### Vectorized sampling by interval type
        ///
        /// sample[unbounded] = self.np_random.normal(
        ///     size=unbounded[unbounded].shape)
        ///
        /// sample[low_bounded] = self.np_random.exponential(
        ///     size=low_bounded[low_bounded].shape) + self.low[low_bounded]
        /// ```
        /// ....
        fn sample(self) -> ndarray::ArrayBase<ndarray::OwnedRepr<f64>, D>
        {
            let distribution = Uniform::new(self.low, self.high);

            let mut rng = SmallRng::from_rng(thread_rng())
                .expect("create SmallRng from thread_rng failed");

            Array::random_using(self.shape, distribution, &mut rng)
        }

        /// Whether the sample exists in the Box
        fn contains(self, sample: Array<f64, D>) -> bool {
            let max_b = sample.iter().max_by(|a, b| a.partial_cmp(b)
                .expect("Tried to compare a NaN")).unwrap() <= &self.high;

            let min_b = sample.iter().min_by(|a, b| a.partial_cmp(b)
                .expect("Tried to compare a NaN")).unwrap() >= &self.low;

            max_b && min_b
        }
    }

// impl<Shape, D> Box<Shape, D>
//     where Shape: ShapeBuilder<Dim=D>,
//           D: Dimension
// {
//     /// Create a box with the given lower, upper bounds.
//     /// Array index i corresponds to dimension d, i.e.
//     /// low = [-∞, -∞], high = [∞, ∞] is a 2D Cartesian plane
//     fn new(low: f64, high: f64, shape: Shape) -> Box<Shape, D> {
//         Box { low, high, shape }
//     }
// }
}

#[cfg(test)]
mod tests {

    #[test]
    fn it_works() {
        assert_eq!(2 + 2, 4);
    }

}