1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
use super::{CloneBuild, EnvStructure, Environment, Successor};
use crate::feedback::Reward;
use crate::logging::StatsLogger;
use crate::spaces::{
    BooleanSpace, Indexed, IndexedTypeSpace, IntervalSpace, OptionSpace, PowerSpace, TupleSpace2,
};
use crate::Prng;
use rand::Rng;
use serde::{Deserialize, Serialize};

pub const NUM_FEATURES: usize = 10;
/// Elements to partition
pub type Element = [bool; NUM_FEATURES];

/// Environment where the goal is to partition vectors based on supervision
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct PartitionGame;

impl CloneBuild for PartitionGame {}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum Supervisor {
    AxisAligned(usize),
}

impl Supervisor {
    const fn classify(self, element: &Element) -> Classification {
        match self {
            Self::AxisAligned(axis) => {
                if element[axis] {
                    Classification::Right
                } else {
                    Classification::Left
                }
            }
        }
    }
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Indexed)]
pub enum Classification {
    Left,
    Right,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Indexed)]
pub enum Action {
    ClassifyLeft,
    ClassifyRight,
    // Query,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct PartitionGameState {
    supervisor: Supervisor,
    element: Element,
    /// (prev_element, classification)
    feedback: Option<(Element, Classification)>,
}

pub type ElementSpace = PowerSpace<BooleanSpace, NUM_FEATURES>;
pub type FeedbackSpace = TupleSpace2<ElementSpace, IndexedTypeSpace<Classification>>;

impl EnvStructure for PartitionGame {
    type ObservationSpace = TupleSpace2<ElementSpace, OptionSpace<FeedbackSpace>>;
    type ActionSpace = IndexedTypeSpace<Action>;
    type FeedbackSpace = IntervalSpace<Reward>;

    fn observation_space(&self) -> Self::ObservationSpace {
        Default::default()
    }

    fn action_space(&self) -> Self::ActionSpace {
        Default::default()
    }

    fn feedback_space(&self) -> Self::FeedbackSpace {
        IntervalSpace::new(Reward(-1.0), Reward(1.0))
    }

    fn discount_factor(&self) -> f64 {
        0.999
    }
}

impl Environment for PartitionGame {
    type State = PartitionGameState;
    /// Current element and feedback
    type Observation = (Element, Option<(Element, Classification)>);
    type Action = Action;
    type Feedback = Reward;

    fn initial_state(&self, rng: &mut Prng) -> Self::State {
        let supervisor = Supervisor::AxisAligned(rng.gen_range(0..NUM_FEATURES));
        let element = rng.gen();
        PartitionGameState {
            supervisor,
            element,
            feedback: None,
        }
    }

    fn observe(&self, state: &Self::State, _: &mut Prng) -> Self::Observation {
        (state.element, state.feedback)
    }

    fn step(
        &self,
        state: Self::State,
        action: &Self::Action,
        rng: &mut Prng,
        _: &mut dyn StatsLogger,
    ) -> (Successor<Self::State>, Self::Feedback) {
        let label = state.supervisor.classify(&state.element);
        let reward = match (label, action) {
            (Classification::Left, Action::ClassifyLeft)
            | (Classification::Right, Action::ClassifyRight) => 1.0,
            _ => -1.0,
        };
        (
            Successor::Continue(PartitionGameState {
                supervisor: state.supervisor,
                element: rng.gen(),
                // Feedback on the previous state and action
                feedback: Some((state.element, label)),
            }),
            Reward(reward),
        )
    }
}

#[cfg(test)]
mod tests {
    use super::super::testing;
    use super::*;

    #[test]
    fn run_default() {
        testing::check_structured_env(&PartitionGame::default(), 1000, 0);
    }
}