Struct AgentTrainer

Source
pub struct AgentTrainer<S>
where S: State,
{ /* private fields */ }
Expand description

An AgentTrainer can be trained for using a certain Agent. After training, the AgentTrainer contains learned knowledge about the process, and can be queried for this. For example, you can ask the AgentTrainer the expected values of all possible actions in a given state.

Implementations§

Source§

impl<S> AgentTrainer<S>
where S: State,

Source

pub fn new() -> AgentTrainer<S>

Examples found in repository?
src/examples/weightedcoin.rs (line 71)
69fn main() {
70    const TRIALS: i32 = 100000;
71    let mut trainer = AgentTrainer::new();
72    for trial in 0..TRIALS {
73        let mut agent = CoinAgent {
74            state: CoinState {
75                balance: 1 + trial % 98,
76            },
77        };
78        trainer.train(
79            &mut agent,
80            &QLearning::new(0.2, 1.0, 0.0),
81            &mut SinkStates {},
82            &RandomExploration::new(),
83        );
84    }
85
86    println!("Balance\tBet\tQ-value");
87    for balance in 1..TARGET {
88        let state = CoinState { balance };
89        let action = trainer.best_action(&state).unwrap();
90        println!(
91            "{}\t{}\t{}",
92            balance,
93            action.bet,
94            trainer.expected_value(&state, &action).unwrap(),
95        );
96    }
97}
More examples
Hide additional examples
src/examples/eucdist.rs (line 76)
69fn main() {
70    let initial_state = MyState {
71        x: 0,
72        y: 0,
73        maxx: 21,
74        maxy: 21,
75    };
76    let mut trainer = AgentTrainer::new();
77    let mut agent = MyAgent {
78        state: initial_state.clone(),
79    };
80    trainer.train(
81        &mut agent,
82        &QLearning::new(0.2, 0.01, 2.),
83        &mut FixedIterations::new(100000),
84        &RandomExploration::new(),
85    );
86    for j in 0..21 {
87        for i in 0..21 {
88            let entry: &HashMap<MyAction, f64> = trainer
89                .expected_values(&MyState {
90                    x: i,
91                    y: j,
92                    ..initial_state
93                })
94                .unwrap();
95            let best_action = entry
96                .iter()
97                .max_by(|(_, v1), (_, v2)| v1.partial_cmp(v2).unwrap())
98                .map(|(v, _)| v)
99                .unwrap();
100            match best_action {
101                MyAction::Move { dx: -1, dy: 0 } => print!("<"),
102                MyAction::Move { dx: 1, dy: 0 } => print!(">"),
103                MyAction::Move { dx: 0, dy: -1 } => print!("^"),
104                MyAction::Move { dx: 0, dy: 1 } => print!("v"),
105                _ => unreachable!(),
106            };
107        }
108        println!();
109    }
110}
Source

pub fn expected_values(&self, state: &S) -> Option<&HashMap<S::A, f64>>

Fetches the learned values for the given state, by Action, or None if no value was learned.

Examples found in repository?
src/examples/eucdist.rs (lines 89-93)
69fn main() {
70    let initial_state = MyState {
71        x: 0,
72        y: 0,
73        maxx: 21,
74        maxy: 21,
75    };
76    let mut trainer = AgentTrainer::new();
77    let mut agent = MyAgent {
78        state: initial_state.clone(),
79    };
80    trainer.train(
81        &mut agent,
82        &QLearning::new(0.2, 0.01, 2.),
83        &mut FixedIterations::new(100000),
84        &RandomExploration::new(),
85    );
86    for j in 0..21 {
87        for i in 0..21 {
88            let entry: &HashMap<MyAction, f64> = trainer
89                .expected_values(&MyState {
90                    x: i,
91                    y: j,
92                    ..initial_state
93                })
94                .unwrap();
95            let best_action = entry
96                .iter()
97                .max_by(|(_, v1), (_, v2)| v1.partial_cmp(v2).unwrap())
98                .map(|(v, _)| v)
99                .unwrap();
100            match best_action {
101                MyAction::Move { dx: -1, dy: 0 } => print!("<"),
102                MyAction::Move { dx: 1, dy: 0 } => print!(">"),
103                MyAction::Move { dx: 0, dy: -1 } => print!("^"),
104                MyAction::Move { dx: 0, dy: 1 } => print!("v"),
105                _ => unreachable!(),
106            };
107        }
108        println!();
109    }
110}
Source

pub fn expected_value(&self, state: &S, action: &S::A) -> Option<f64>

Fetches the learned value for the given Action in the given State, or None if no value was learned.

Examples found in repository?
src/examples/weightedcoin.rs (line 94)
69fn main() {
70    const TRIALS: i32 = 100000;
71    let mut trainer = AgentTrainer::new();
72    for trial in 0..TRIALS {
73        let mut agent = CoinAgent {
74            state: CoinState {
75                balance: 1 + trial % 98,
76            },
77        };
78        trainer.train(
79            &mut agent,
80            &QLearning::new(0.2, 1.0, 0.0),
81            &mut SinkStates {},
82            &RandomExploration::new(),
83        );
84    }
85
86    println!("Balance\tBet\tQ-value");
87    for balance in 1..TARGET {
88        let state = CoinState { balance };
89        let action = trainer.best_action(&state).unwrap();
90        println!(
91            "{}\t{}\t{}",
92            balance,
93            action.bet,
94            trainer.expected_value(&state, &action).unwrap(),
95        );
96    }
97}
Source

pub fn export_learned_values(&self) -> HashMap<S, HashMap<S::A, f64>>

Returns a clone of the entire learned state to be saved or used elsewhere.

Source

pub fn learned_values(&self) -> &HashMap<S, HashMap<S::A, f64>>

Source

pub fn import_state(&mut self, q: HashMap<S, HashMap<S::A, f64>>)

Imports a state, completely replacing any learned progress

Source

pub fn best_action(&self, state: &S) -> Option<S::A>

Returns the best action for the given State, or None if no values were learned.

Examples found in repository?
src/examples/weightedcoin.rs (line 89)
69fn main() {
70    const TRIALS: i32 = 100000;
71    let mut trainer = AgentTrainer::new();
72    for trial in 0..TRIALS {
73        let mut agent = CoinAgent {
74            state: CoinState {
75                balance: 1 + trial % 98,
76            },
77        };
78        trainer.train(
79            &mut agent,
80            &QLearning::new(0.2, 1.0, 0.0),
81            &mut SinkStates {},
82            &RandomExploration::new(),
83        );
84    }
85
86    println!("Balance\tBet\tQ-value");
87    for balance in 1..TARGET {
88        let state = CoinState { balance };
89        let action = trainer.best_action(&state).unwrap();
90        println!(
91            "{}\t{}\t{}",
92            balance,
93            action.bet,
94            trainer.expected_value(&state, &action).unwrap(),
95        );
96    }
97}
Source

pub fn train( &mut self, agent: &mut dyn Agent<S>, learning_strategy: &dyn LearningStrategy<S>, termination_strategy: &mut dyn TerminationStrategy<S>, exploration_strategy: &dyn ExplorationStrategy<S>, )

Trains this AgentTrainer using the given ExplorationStrategy, LearningStrategy and Agent until the TerminationStrategy decides to stop.

Examples found in repository?
src/examples/weightedcoin.rs (lines 78-83)
69fn main() {
70    const TRIALS: i32 = 100000;
71    let mut trainer = AgentTrainer::new();
72    for trial in 0..TRIALS {
73        let mut agent = CoinAgent {
74            state: CoinState {
75                balance: 1 + trial % 98,
76            },
77        };
78        trainer.train(
79            &mut agent,
80            &QLearning::new(0.2, 1.0, 0.0),
81            &mut SinkStates {},
82            &RandomExploration::new(),
83        );
84    }
85
86    println!("Balance\tBet\tQ-value");
87    for balance in 1..TARGET {
88        let state = CoinState { balance };
89        let action = trainer.best_action(&state).unwrap();
90        println!(
91            "{}\t{}\t{}",
92            balance,
93            action.bet,
94            trainer.expected_value(&state, &action).unwrap(),
95        );
96    }
97}
More examples
Hide additional examples
src/examples/eucdist.rs (lines 80-85)
69fn main() {
70    let initial_state = MyState {
71        x: 0,
72        y: 0,
73        maxx: 21,
74        maxy: 21,
75    };
76    let mut trainer = AgentTrainer::new();
77    let mut agent = MyAgent {
78        state: initial_state.clone(),
79    };
80    trainer.train(
81        &mut agent,
82        &QLearning::new(0.2, 0.01, 2.),
83        &mut FixedIterations::new(100000),
84        &RandomExploration::new(),
85    );
86    for j in 0..21 {
87        for i in 0..21 {
88            let entry: &HashMap<MyAction, f64> = trainer
89                .expected_values(&MyState {
90                    x: i,
91                    y: j,
92                    ..initial_state
93                })
94                .unwrap();
95            let best_action = entry
96                .iter()
97                .max_by(|(_, v1), (_, v2)| v1.partial_cmp(v2).unwrap())
98                .map(|(v, _)| v)
99                .unwrap();
100            match best_action {
101                MyAction::Move { dx: -1, dy: 0 } => print!("<"),
102                MyAction::Move { dx: 1, dy: 0 } => print!(">"),
103                MyAction::Move { dx: 0, dy: -1 } => print!("^"),
104                MyAction::Move { dx: 0, dy: 1 } => print!("v"),
105                _ => unreachable!(),
106            };
107        }
108        println!();
109    }
110}

Trait Implementations§

Source§

impl<S: State> Default for AgentTrainer<S>

Source§

fn default() -> Self

Returns the “default value” for a type. Read more

Auto Trait Implementations§

§

impl<S> Freeze for AgentTrainer<S>

§

impl<S> RefUnwindSafe for AgentTrainer<S>
where S: RefUnwindSafe, <S as State>::A: RefUnwindSafe,

§

impl<S> Send for AgentTrainer<S>
where S: Send, <S as State>::A: Send,

§

impl<S> Sync for AgentTrainer<S>
where S: Sync, <S as State>::A: Sync,

§

impl<S> Unpin for AgentTrainer<S>
where S: Unpin, <S as State>::A: Unpin,

§

impl<S> UnwindSafe for AgentTrainer<S>
where S: UnwindSafe, <S as State>::A: UnwindSafe,

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V