use mctrust::{Environment, Heuristic, Outcome, Reward, SearchConfig, TreeSearch};
#[derive(Clone)]
struct SphereMinimizer {
x: f64,
step: f64,
}
#[derive(Clone, Debug, PartialEq)]
enum Action {
Dec,
Inc,
}
impl SphereMinimizer {
fn reward_for(&self) -> f64 {
1.0 / (1.0 + self.x.abs())
}
}
impl Environment for SphereMinimizer {
type Action = Action;
fn legal_actions(&self) -> Vec<Action> {
vec![Action::Dec, Action::Inc]
}
fn apply(&mut self, action: &Action) {
match action {
Action::Dec => self.x -= self.step,
Action::Inc => self.x += self.step,
}
}
fn evaluate(&self) -> Outcome {
if self.x.abs() <= 1e-3 {
Outcome::Success(Reward::WIN)
} else if self.x.abs() >= 12.0 {
Outcome::Failure
} else {
Outcome::Ongoing
}
}
fn heuristic(&self) -> Heuristic {
Heuristic::from_reward(Reward::new(self.reward_for()))
}
}
fn main() {
let state = SphereMinimizer { x: 4.0, step: 0.5 };
let config = SearchConfig::builder()
.iterations(4_000)
.max_depth(200)
.tree_policy(mctrust::TreePolicy::Uct)
.heuristic_weight(0.7)
.build();
let mut search = TreeSearch::new(state, config);
if let Some(best) = search.run() {
println!("best action toward minimum: {best:?}");
}
}