pub struct PendulumEnvironment { /* private fields */ }Expand description
Pendulum environment for continuous control
Implementations§
Source§impl PendulumEnvironment
impl PendulumEnvironment
Sourcepub fn new() -> Self
pub fn new() -> Self
Create new pendulum environment
Examples found in repository?
examples/continuous_rl.rs (line 38)
37fn test_pendulum_dynamics() -> Result<()> {
38 let mut env = PendulumEnvironment::new();
39
40 println!(" Initial state: {:?}", env.state());
41 println!(" Action bounds: {:?}", env.action_bounds());
42
43 // Run a few steps with different actions
44 let actions = vec![
45 Array1::from_vec(vec![0.0]), // No torque
46 Array1::from_vec(vec![2.0]), // Max positive torque
47 Array1::from_vec(vec![-2.0]), // Max negative torque
48 ];
49
50 for (i, action) in actions.iter().enumerate() {
51 let state = env.reset();
52 let (next_state, reward, done) = env.step(action.clone())?;
53
54 println!("\n Step {} with action {:.1}:", i + 1, action[0]);
55 println!(
56 " State: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
57 state[0], state[1], state[2]
58 );
59 println!(
60 " Next: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
61 next_state[0], next_state[1], next_state[2]
62 );
63 println!(" Reward: {reward:.3}, Done: {done}");
64 }
65
66 Ok(())
67}
68
69/// Train QDDPG on pendulum control
70fn train_qddpg_pendulum() -> Result<()> {
71 let state_dim = 3;
72 let action_dim = 1;
73 let action_bounds = vec![(-2.0, 2.0)];
74 let num_qubits = 4;
75 let buffer_capacity = 10000;
76
77 // Create QDDPG agent
78 let mut agent = QuantumDDPG::new(
79 state_dim,
80 action_dim,
81 action_bounds,
82 num_qubits,
83 buffer_capacity,
84 )?;
85
86 // Create environment
87 let mut env = PendulumEnvironment::new();
88
89 // Create optimizers
90 let mut actor_optimizer = Adam::new(0.001);
91 let mut critic_optimizer = Adam::new(0.001);
92
93 // Train for a few episodes (reduced for demo)
94 let episodes = 50;
95 println!(" Training QDDPG for {episodes} episodes...");
96
97 let rewards = agent.train(
98 &mut env,
99 episodes,
100 &mut actor_optimizer,
101 &mut critic_optimizer,
102 )?;
103
104 // Print training statistics
105 let avg_initial = rewards[..10].iter().sum::<f64>() / 10.0;
106 let avg_final = rewards[rewards.len() - 10..].iter().sum::<f64>() / 10.0;
107
108 println!("\n Training Statistics:");
109 println!(" - Average initial reward: {avg_initial:.2}");
110 println!(" - Average final reward: {avg_final:.2}");
111 println!(" - Improvement: {:.2}", avg_final - avg_initial);
112
113 // Test trained agent
114 println!("\n Testing trained agent...");
115 test_trained_agent(&agent, &mut env)?;
116
117 Ok(())
118}
119
120/// Test a trained agent
121fn test_trained_agent(agent: &QuantumDDPG, env: &mut dyn ContinuousEnvironment) -> Result<()> {
122 let test_episodes = 5;
123 let mut test_rewards = Vec::new();
124
125 for episode in 0..test_episodes {
126 let mut state = env.reset();
127 let mut episode_reward = 0.0;
128 let mut done = false;
129 let mut steps = 0;
130
131 while !done && steps < 200 {
132 let action = agent.get_action(&state, false)?; // No exploration
133 let (next_state, reward, is_done) = env.step(action.clone())?;
134
135 state = next_state;
136 episode_reward += reward;
137 done = is_done;
138 steps += 1;
139 }
140
141 test_rewards.push(episode_reward);
142 println!(
143 " Test episode {}: Reward = {:.2}, Steps = {}",
144 episode + 1,
145 episode_reward,
146 steps
147 );
148 }
149
150 let avg_test = test_rewards.iter().sum::<f64>() / f64::from(test_episodes);
151 println!(" Average test reward: {avg_test:.2}");
152
153 Ok(())
154}
155
156/// Compare trained policy with random policy
157fn compare_policies() -> Result<()> {
158 let mut env = PendulumEnvironment::new();
159 let episodes = 10;
160
161 // Random policy performance
162 println!(" Random Policy Performance:");
163 let mut random_rewards = Vec::new();
164
165 for _ in 0..episodes {
166 let mut state = env.reset();
167 let mut episode_reward = 0.0;
168 let mut done = false;
169
170 while !done {
171 // Random action in bounds
172 let action = Array1::from_vec(vec![4.0f64.mul_add(thread_rng().gen::<f64>(), -2.0)]);
173
174 let (next_state, reward, is_done) = env.step(action)?;
175 state = next_state;
176 episode_reward += reward;
177 done = is_done;
178 }
179
180 random_rewards.push(episode_reward);
181 }
182
183 let avg_random = random_rewards.iter().sum::<f64>() / f64::from(episodes);
184 println!(" Average random policy reward: {avg_random:.2}");
185
186 // Simple control policy (proportional control)
187 println!("\n Simple Control Policy Performance:");
188 let mut control_rewards = Vec::new();
189
190 for _ in 0..episodes {
191 let mut state = env.reset();
192 let mut episode_reward = 0.0;
193 let mut done = false;
194
195 while !done {
196 // Proportional control: torque = -k * theta
197 let theta = state[1].atan2(state[0]); // Reconstruct angle
198 let action = Array1::from_vec(vec![(-2.0 * theta).clamp(-2.0, 2.0)]);
199
200 let (next_state, reward, is_done) = env.step(action)?;
201 state = next_state;
202 episode_reward += reward;
203 done = is_done;
204 }
205
206 control_rewards.push(episode_reward);
207 }
208
209 let avg_control = control_rewards.iter().sum::<f64>() / f64::from(episodes);
210 println!(" Average control policy reward: {avg_control:.2}");
211
212 println!("\n Performance Summary:");
213 println!(" - Random policy: {avg_random:.2}");
214 println!(" - Simple control: {avg_control:.2}");
215 println!(" - Improvement: {:.2}", avg_control - avg_random);
216
217 Ok(())
218}Trait Implementations§
Source§impl ContinuousEnvironment for PendulumEnvironment
impl ContinuousEnvironment for PendulumEnvironment
Source§fn action_bounds(&self) -> Vec<(f64, f64)>
fn action_bounds(&self) -> Vec<(f64, f64)>
Gets the action space bounds (min, max) for each dimension
Source§fn step(&mut self, action: Array1<f64>) -> Result<(Array1<f64>, f64, bool)>
fn step(&mut self, action: Array1<f64>) -> Result<(Array1<f64>, f64, bool)>
Takes a continuous action and returns reward and next state
Source§fn action_dim(&self) -> usize
fn action_dim(&self) -> usize
Get action dimension
Auto Trait Implementations§
impl Freeze for PendulumEnvironment
impl RefUnwindSafe for PendulumEnvironment
impl Send for PendulumEnvironment
impl Sync for PendulumEnvironment
impl Unpin for PendulumEnvironment
impl UnwindSafe for PendulumEnvironment
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.