1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
use std::fmt::Debug;
use ordered_float::OrderedFloat;
use rand_pcg::Pcg64;
use serde::Serialize;
use crate::{
spaces::BoxR,
utils::{
custom::{structs::Metadata, traits::Sample, types::O64},
renderer::{RenderMode, Renders},
},
};
/// Defines the range of values that can be outputted by a given environment.
const DEFAULT_REWARD_RANGE: &'static RewardRange = &(RewardRange {
lower_bound: OrderedFloat(f64::NEG_INFINITY),
upper_bound: OrderedFloat(f64::INFINITY),
});
/// Defines the render mode set by a default environment instances.
const DEFAULT_RENDER_MODE: &'static RenderMode = &RenderMode::None;
/// Defines a common set of operations available to different environments.
pub trait Env: Clone + Debug + Serialize + EnvProperties
where
Self::Observation: Sample + Into<Vec<f64>>,
{
/// The type of action supported.
type Action;
/// The type of the observation produced after an action has been applied.
type Observation;
/// The type of the metadata object produced by acting on the environment.
type Info;
/// The type of the object produced when an environment is reset.
type ResetInfo;
/// Acts on an environment using the given action, producing a reward.
fn step(&mut self, action: Self::Action) -> ActionReward<Self::Observation, Self::Info>;
/// Resets the environment to a initial random state.
fn reset(
&mut self,
seed: Option<u64>,
return_info: bool,
options: Option<BoxR<Self::Observation>>,
) -> (Self::Observation, Option<Self::ResetInfo>);
/// Produces the renders, if any, associated with the given mode.
fn render(&mut self, mode: RenderMode) -> Renders;
/// Closes any open resources associated with the internal rendering service.
fn close(&mut self);
}
/// Defines a set of properties that should be accessible in all environments.
pub trait EnvProperties
where
Self: Sized,
{
/// The type of values that can be observed in the action space.
type ActionSpace;
/// The type of observations produced
type ObservationSpace;
/// Provides an object describing additional details about this environment.
fn metadata(&self) -> &Metadata<Self>;
/// Provides the random number generator responsible for seeding states.
fn rand_random(&self) -> &Pcg64;
/// Provides the current render mode.
fn render_mode(&self) -> &RenderMode {
DEFAULT_RENDER_MODE
}
/// Provides the range of reward values that can be outputted by this environment.
fn reward_range(&self) -> &RewardRange {
DEFAULT_REWARD_RANGE
}
/// Provides the object describing the actions that can be observed.
fn action_space(&self) -> &Self::ActionSpace;
/// Provides the object describing the states that can be observed in this environment.
fn observation_space(&self) -> &Self::ObservationSpace;
}
/// Encapsulates and describes the state update experienced by an environment after acting on an
/// action.
#[derive(Clone, Debug, Copy, PartialEq, Eq, Ord, PartialOrd)]
pub struct ActionReward<T, E> {
/// The current observable state.
pub observation: T,
/// The value of the reward produced.
pub reward: O64,
/// Indicates whether the episode has terminated or not.
pub done: bool,
/// Indicates whether the episode has termianted early or not.
pub truncated: bool,
/// Additional info implementations may provide for purposes beyond classical RL.
pub info: Option<E>,
}
/// Defines the bounds for the reward value that can be observed.
#[derive(Clone, Debug, Serialize, PartialEq, Ord, PartialOrd, Eq)]
pub struct RewardRange {
/// The smallest possible reward that can be observed.
lower_bound: O64,
/// The largest possible reward that can be observed.
upper_bound: O64,
}
/// Implement a default reward range.
impl Default for RewardRange {
fn default() -> Self {
DEFAULT_REWARD_RANGE.clone()
}
}