pub struct AgentTrainer<S>where
S: State,{ /* private fields */ }
Expand description
An AgentTrainer
can be trained for using a certain Agent. After
training, the AgentTrainer
contains learned knowledge about the process, and can be queried
for this. For example, you can ask the AgentTrainer
the expected values of all possible
actions in a given state.
Implementations§
Source§impl<S> AgentTrainer<S>where
S: State,
impl<S> AgentTrainer<S>where
S: State,
Sourcepub fn new() -> AgentTrainer<S>
pub fn new() -> AgentTrainer<S>
Examples found in repository?
69fn main() {
70 const TRIALS: i32 = 100000;
71 let mut trainer = AgentTrainer::new();
72 for trial in 0..TRIALS {
73 let mut agent = CoinAgent {
74 state: CoinState {
75 balance: 1 + trial % 98,
76 },
77 };
78 trainer.train(
79 &mut agent,
80 &QLearning::new(0.2, 1.0, 0.0),
81 &mut SinkStates {},
82 &RandomExploration::new(),
83 );
84 }
85
86 println!("Balance\tBet\tQ-value");
87 for balance in 1..TARGET {
88 let state = CoinState { balance };
89 let action = trainer.best_action(&state).unwrap();
90 println!(
91 "{}\t{}\t{}",
92 balance,
93 action.bet,
94 trainer.expected_value(&state, &action).unwrap(),
95 );
96 }
97}
More examples
69fn main() {
70 let initial_state = MyState {
71 x: 0,
72 y: 0,
73 maxx: 21,
74 maxy: 21,
75 };
76 let mut trainer = AgentTrainer::new();
77 let mut agent = MyAgent {
78 state: initial_state.clone(),
79 };
80 trainer.train(
81 &mut agent,
82 &QLearning::new(0.2, 0.01, 2.),
83 &mut FixedIterations::new(100000),
84 &RandomExploration::new(),
85 );
86 for j in 0..21 {
87 for i in 0..21 {
88 let entry: &HashMap<MyAction, f64> = trainer
89 .expected_values(&MyState {
90 x: i,
91 y: j,
92 ..initial_state
93 })
94 .unwrap();
95 let best_action = entry
96 .iter()
97 .max_by(|(_, v1), (_, v2)| v1.partial_cmp(v2).unwrap())
98 .map(|(v, _)| v)
99 .unwrap();
100 match best_action {
101 MyAction::Move { dx: -1, dy: 0 } => print!("<"),
102 MyAction::Move { dx: 1, dy: 0 } => print!(">"),
103 MyAction::Move { dx: 0, dy: -1 } => print!("^"),
104 MyAction::Move { dx: 0, dy: 1 } => print!("v"),
105 _ => unreachable!(),
106 };
107 }
108 println!();
109 }
110}
Sourcepub fn expected_values(&self, state: &S) -> Option<&HashMap<S::A, f64>>
pub fn expected_values(&self, state: &S) -> Option<&HashMap<S::A, f64>>
Fetches the learned values for the given state, by Action
, or None
if no value was
learned.
Examples found in repository?
69fn main() {
70 let initial_state = MyState {
71 x: 0,
72 y: 0,
73 maxx: 21,
74 maxy: 21,
75 };
76 let mut trainer = AgentTrainer::new();
77 let mut agent = MyAgent {
78 state: initial_state.clone(),
79 };
80 trainer.train(
81 &mut agent,
82 &QLearning::new(0.2, 0.01, 2.),
83 &mut FixedIterations::new(100000),
84 &RandomExploration::new(),
85 );
86 for j in 0..21 {
87 for i in 0..21 {
88 let entry: &HashMap<MyAction, f64> = trainer
89 .expected_values(&MyState {
90 x: i,
91 y: j,
92 ..initial_state
93 })
94 .unwrap();
95 let best_action = entry
96 .iter()
97 .max_by(|(_, v1), (_, v2)| v1.partial_cmp(v2).unwrap())
98 .map(|(v, _)| v)
99 .unwrap();
100 match best_action {
101 MyAction::Move { dx: -1, dy: 0 } => print!("<"),
102 MyAction::Move { dx: 1, dy: 0 } => print!(">"),
103 MyAction::Move { dx: 0, dy: -1 } => print!("^"),
104 MyAction::Move { dx: 0, dy: 1 } => print!("v"),
105 _ => unreachable!(),
106 };
107 }
108 println!();
109 }
110}
Sourcepub fn expected_value(&self, state: &S, action: &S::A) -> Option<f64>
pub fn expected_value(&self, state: &S, action: &S::A) -> Option<f64>
Fetches the learned value for the given Action
in the given State
, or None
if no
value was learned.
Examples found in repository?
69fn main() {
70 const TRIALS: i32 = 100000;
71 let mut trainer = AgentTrainer::new();
72 for trial in 0..TRIALS {
73 let mut agent = CoinAgent {
74 state: CoinState {
75 balance: 1 + trial % 98,
76 },
77 };
78 trainer.train(
79 &mut agent,
80 &QLearning::new(0.2, 1.0, 0.0),
81 &mut SinkStates {},
82 &RandomExploration::new(),
83 );
84 }
85
86 println!("Balance\tBet\tQ-value");
87 for balance in 1..TARGET {
88 let state = CoinState { balance };
89 let action = trainer.best_action(&state).unwrap();
90 println!(
91 "{}\t{}\t{}",
92 balance,
93 action.bet,
94 trainer.expected_value(&state, &action).unwrap(),
95 );
96 }
97}
Sourcepub fn export_learned_values(&self) -> HashMap<S, HashMap<S::A, f64>>
pub fn export_learned_values(&self) -> HashMap<S, HashMap<S::A, f64>>
Returns a clone of the entire learned state to be saved or used elsewhere.
pub fn learned_values(&self) -> &HashMap<S, HashMap<S::A, f64>>
Sourcepub fn import_state(&mut self, q: HashMap<S, HashMap<S::A, f64>>)
pub fn import_state(&mut self, q: HashMap<S, HashMap<S::A, f64>>)
Imports a state, completely replacing any learned progress
Sourcepub fn best_action(&self, state: &S) -> Option<S::A>
pub fn best_action(&self, state: &S) -> Option<S::A>
Returns the best action for the given State
, or None
if no values were learned.
Examples found in repository?
69fn main() {
70 const TRIALS: i32 = 100000;
71 let mut trainer = AgentTrainer::new();
72 for trial in 0..TRIALS {
73 let mut agent = CoinAgent {
74 state: CoinState {
75 balance: 1 + trial % 98,
76 },
77 };
78 trainer.train(
79 &mut agent,
80 &QLearning::new(0.2, 1.0, 0.0),
81 &mut SinkStates {},
82 &RandomExploration::new(),
83 );
84 }
85
86 println!("Balance\tBet\tQ-value");
87 for balance in 1..TARGET {
88 let state = CoinState { balance };
89 let action = trainer.best_action(&state).unwrap();
90 println!(
91 "{}\t{}\t{}",
92 balance,
93 action.bet,
94 trainer.expected_value(&state, &action).unwrap(),
95 );
96 }
97}
Sourcepub fn train(
&mut self,
agent: &mut dyn Agent<S>,
learning_strategy: &dyn LearningStrategy<S>,
termination_strategy: &mut dyn TerminationStrategy<S>,
exploration_strategy: &dyn ExplorationStrategy<S>,
)
pub fn train( &mut self, agent: &mut dyn Agent<S>, learning_strategy: &dyn LearningStrategy<S>, termination_strategy: &mut dyn TerminationStrategy<S>, exploration_strategy: &dyn ExplorationStrategy<S>, )
Trains this AgentTrainer using the given ExplorationStrategy, LearningStrategy and Agent until the TerminationStrategy decides to stop.
Examples found in repository?
69fn main() {
70 const TRIALS: i32 = 100000;
71 let mut trainer = AgentTrainer::new();
72 for trial in 0..TRIALS {
73 let mut agent = CoinAgent {
74 state: CoinState {
75 balance: 1 + trial % 98,
76 },
77 };
78 trainer.train(
79 &mut agent,
80 &QLearning::new(0.2, 1.0, 0.0),
81 &mut SinkStates {},
82 &RandomExploration::new(),
83 );
84 }
85
86 println!("Balance\tBet\tQ-value");
87 for balance in 1..TARGET {
88 let state = CoinState { balance };
89 let action = trainer.best_action(&state).unwrap();
90 println!(
91 "{}\t{}\t{}",
92 balance,
93 action.bet,
94 trainer.expected_value(&state, &action).unwrap(),
95 );
96 }
97}
More examples
69fn main() {
70 let initial_state = MyState {
71 x: 0,
72 y: 0,
73 maxx: 21,
74 maxy: 21,
75 };
76 let mut trainer = AgentTrainer::new();
77 let mut agent = MyAgent {
78 state: initial_state.clone(),
79 };
80 trainer.train(
81 &mut agent,
82 &QLearning::new(0.2, 0.01, 2.),
83 &mut FixedIterations::new(100000),
84 &RandomExploration::new(),
85 );
86 for j in 0..21 {
87 for i in 0..21 {
88 let entry: &HashMap<MyAction, f64> = trainer
89 .expected_values(&MyState {
90 x: i,
91 y: j,
92 ..initial_state
93 })
94 .unwrap();
95 let best_action = entry
96 .iter()
97 .max_by(|(_, v1), (_, v2)| v1.partial_cmp(v2).unwrap())
98 .map(|(v, _)| v)
99 .unwrap();
100 match best_action {
101 MyAction::Move { dx: -1, dy: 0 } => print!("<"),
102 MyAction::Move { dx: 1, dy: 0 } => print!(">"),
103 MyAction::Move { dx: 0, dy: -1 } => print!("^"),
104 MyAction::Move { dx: 0, dy: 1 } => print!("v"),
105 _ => unreachable!(),
106 };
107 }
108 println!();
109 }
110}