pub struct PendulumEnvironment { /* private fields */ }Expand description
Pendulum environment for continuous control
Implementations§
Source§impl PendulumEnvironment
impl PendulumEnvironment
Sourcepub fn new() -> Self
pub fn new() -> Self
Create new pendulum environment
Examples found in repository?
examples/continuous_rl.rs (line 37)
36fn test_pendulum_dynamics() -> Result<()> {
37 let mut env = PendulumEnvironment::new();
38
39 println!(" Initial state: {:?}", env.state());
40 println!(" Action bounds: {:?}", env.action_bounds());
41
42 // Run a few steps with different actions
43 let actions = vec![
44 Array1::from_vec(vec![0.0]), // No torque
45 Array1::from_vec(vec![2.0]), // Max positive torque
46 Array1::from_vec(vec![-2.0]), // Max negative torque
47 ];
48
49 for (i, action) in actions.iter().enumerate() {
50 let state = env.reset();
51 let (next_state, reward, done) = env.step(action.clone())?;
52
53 println!("\n Step {} with action {:.1}:", i + 1, action[0]);
54 println!(
55 " State: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
56 state[0], state[1], state[2]
57 );
58 println!(
59 " Next: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
60 next_state[0], next_state[1], next_state[2]
61 );
62 println!(" Reward: {reward:.3}, Done: {done}");
63 }
64
65 Ok(())
66}
67
68/// Train QDDPG on pendulum control
69fn train_qddpg_pendulum() -> Result<()> {
70 let state_dim = 3;
71 let action_dim = 1;
72 let action_bounds = vec![(-2.0, 2.0)];
73 let num_qubits = 4;
74 let buffer_capacity = 10000;
75
76 // Create QDDPG agent
77 let mut agent = QuantumDDPG::new(
78 state_dim,
79 action_dim,
80 action_bounds,
81 num_qubits,
82 buffer_capacity,
83 )?;
84
85 // Create environment
86 let mut env = PendulumEnvironment::new();
87
88 // Create optimizers
89 let mut actor_optimizer = Adam::new(0.001);
90 let mut critic_optimizer = Adam::new(0.001);
91
92 // Train for a few episodes (reduced for demo)
93 let episodes = 50;
94 println!(" Training QDDPG for {episodes} episodes...");
95
96 let rewards = agent.train(
97 &mut env,
98 episodes,
99 &mut actor_optimizer,
100 &mut critic_optimizer,
101 )?;
102
103 // Print training statistics
104 let avg_initial = rewards[..10].iter().sum::<f64>() / 10.0;
105 let avg_final = rewards[rewards.len() - 10..].iter().sum::<f64>() / 10.0;
106
107 println!("\n Training Statistics:");
108 println!(" - Average initial reward: {avg_initial:.2}");
109 println!(" - Average final reward: {avg_final:.2}");
110 println!(" - Improvement: {:.2}", avg_final - avg_initial);
111
112 // Test trained agent
113 println!("\n Testing trained agent...");
114 test_trained_agent(&agent, &mut env)?;
115
116 Ok(())
117}
118
119/// Test a trained agent
120fn test_trained_agent(agent: &QuantumDDPG, env: &mut dyn ContinuousEnvironment) -> Result<()> {
121 let test_episodes = 5;
122 let mut test_rewards = Vec::new();
123
124 for episode in 0..test_episodes {
125 let mut state = env.reset();
126 let mut episode_reward = 0.0;
127 let mut done = false;
128 let mut steps = 0;
129
130 while !done && steps < 200 {
131 let action = agent.get_action(&state, false)?; // No exploration
132 let (next_state, reward, is_done) = env.step(action.clone())?;
133
134 state = next_state;
135 episode_reward += reward;
136 done = is_done;
137 steps += 1;
138 }
139
140 test_rewards.push(episode_reward);
141 println!(
142 " Test episode {}: Reward = {:.2}, Steps = {}",
143 episode + 1,
144 episode_reward,
145 steps
146 );
147 }
148
149 let avg_test = test_rewards.iter().sum::<f64>() / f64::from(test_episodes);
150 println!(" Average test reward: {avg_test:.2}");
151
152 Ok(())
153}
154
155/// Compare trained policy with random policy
156fn compare_policies() -> Result<()> {
157 let mut env = PendulumEnvironment::new();
158 let episodes = 10;
159
160 // Random policy performance
161 println!(" Random Policy Performance:");
162 let mut random_rewards = Vec::new();
163
164 for _ in 0..episodes {
165 let mut state = env.reset();
166 let mut episode_reward = 0.0;
167 let mut done = false;
168
169 while !done {
170 // Random action in bounds
171 let action = Array1::from_vec(vec![4.0f64.mul_add(thread_rng().gen::<f64>(), -2.0)]);
172
173 let (next_state, reward, is_done) = env.step(action)?;
174 state = next_state;
175 episode_reward += reward;
176 done = is_done;
177 }
178
179 random_rewards.push(episode_reward);
180 }
181
182 let avg_random = random_rewards.iter().sum::<f64>() / f64::from(episodes);
183 println!(" Average random policy reward: {avg_random:.2}");
184
185 // Simple control policy (proportional control)
186 println!("\n Simple Control Policy Performance:");
187 let mut control_rewards = Vec::new();
188
189 for _ in 0..episodes {
190 let mut state = env.reset();
191 let mut episode_reward = 0.0;
192 let mut done = false;
193
194 while !done {
195 // Proportional control: torque = -k * theta
196 let theta = state[1].atan2(state[0]); // Reconstruct angle
197 let action = Array1::from_vec(vec![(-2.0 * theta).clamp(-2.0, 2.0)]);
198
199 let (next_state, reward, is_done) = env.step(action)?;
200 state = next_state;
201 episode_reward += reward;
202 done = is_done;
203 }
204
205 control_rewards.push(episode_reward);
206 }
207
208 let avg_control = control_rewards.iter().sum::<f64>() / f64::from(episodes);
209 println!(" Average control policy reward: {avg_control:.2}");
210
211 println!("\n Performance Summary:");
212 println!(" - Random policy: {avg_random:.2}");
213 println!(" - Simple control: {avg_control:.2}");
214 println!(" - Improvement: {:.2}", avg_control - avg_random);
215
216 Ok(())
217}Trait Implementations§
Source§impl ContinuousEnvironment for PendulumEnvironment
impl ContinuousEnvironment for PendulumEnvironment
Source§fn action_bounds(&self) -> Vec<(f64, f64)>
fn action_bounds(&self) -> Vec<(f64, f64)>
Gets the action space bounds (min, max) for each dimension
Source§fn step(&mut self, action: Array1<f64>) -> Result<(Array1<f64>, f64, bool)>
fn step(&mut self, action: Array1<f64>) -> Result<(Array1<f64>, f64, bool)>
Takes a continuous action and returns reward and next state
Source§fn action_dim(&self) -> usize
fn action_dim(&self) -> usize
Get action dimension
Auto Trait Implementations§
impl Freeze for PendulumEnvironment
impl RefUnwindSafe for PendulumEnvironment
impl Send for PendulumEnvironment
impl Sync for PendulumEnvironment
impl Unpin for PendulumEnvironment
impl UnwindSafe for PendulumEnvironment
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.