pub struct PendulumEnvironment { /* private fields */ }Expand description
Pendulum environment for continuous control
Implementations§
Source§impl PendulumEnvironment
impl PendulumEnvironment
Sourcepub fn new() -> Self
pub fn new() -> Self
Create new pendulum environment
Examples found in repository?
examples/continuous_rl.rs (line 45)
44fn test_pendulum_dynamics() -> Result<()> {
45 let mut env = PendulumEnvironment::new();
46
47 println!(" Initial state: {:?}", env.state());
48 println!(" Action bounds: {:?}", env.action_bounds());
49
50 // Run a few steps with different actions
51 let actions = vec![
52 Array1::from_vec(vec![0.0]), // No torque
53 Array1::from_vec(vec![2.0]), // Max positive torque
54 Array1::from_vec(vec![-2.0]), // Max negative torque
55 ];
56
57 for (i, action) in actions.iter().enumerate() {
58 let state = env.reset();
59 let (next_state, reward, done) = env.step(action.clone())?;
60
61 println!("\n Step {} with action {:.1}:", i + 1, action[0]);
62 println!(
63 " State: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
64 state[0], state[1], state[2]
65 );
66 println!(
67 " Next: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
68 next_state[0], next_state[1], next_state[2]
69 );
70 println!(" Reward: {reward:.3}, Done: {done}");
71 }
72
73 Ok(())
74}
75
76/// Train QDDPG on pendulum control
77fn train_qddpg_pendulum() -> Result<()> {
78 let state_dim = 3;
79 let action_dim = 1;
80 let action_bounds = vec![(-2.0, 2.0)];
81 let num_qubits = 4;
82 let buffer_capacity = 10000;
83
84 // Create QDDPG agent
85 let mut agent = QuantumDDPG::new(
86 state_dim,
87 action_dim,
88 action_bounds,
89 num_qubits,
90 buffer_capacity,
91 )?;
92
93 // Create environment
94 let mut env = PendulumEnvironment::new();
95
96 // Create optimizers
97 let mut actor_optimizer = Adam::new(0.001);
98 let mut critic_optimizer = Adam::new(0.001);
99
100 // Train for a few episodes (reduced for demo)
101 let episodes = 50;
102 println!(" Training QDDPG for {episodes} episodes...");
103
104 let rewards = agent.train(
105 &mut env,
106 episodes,
107 &mut actor_optimizer,
108 &mut critic_optimizer,
109 )?;
110
111 // Print training statistics
112 let avg_initial = rewards[..10].iter().sum::<f64>() / 10.0;
113 let avg_final = rewards[rewards.len() - 10..].iter().sum::<f64>() / 10.0;
114
115 println!("\n Training Statistics:");
116 println!(" - Average initial reward: {avg_initial:.2}");
117 println!(" - Average final reward: {avg_final:.2}");
118 println!(" - Improvement: {:.2}", avg_final - avg_initial);
119
120 // Test trained agent
121 println!("\n Testing trained agent...");
122 test_trained_agent(&agent, &mut env)?;
123
124 Ok(())
125}
126
127/// Test a trained agent
128fn test_trained_agent(agent: &QuantumDDPG, env: &mut dyn ContinuousEnvironment) -> Result<()> {
129 let test_episodes = 5;
130 let mut test_rewards = Vec::new();
131
132 for episode in 0..test_episodes {
133 let mut state = env.reset();
134 let mut episode_reward = 0.0;
135 let mut done = false;
136 let mut steps = 0;
137
138 while !done && steps < 200 {
139 let action = agent.get_action(&state, false)?; // No exploration
140 let (next_state, reward, is_done) = env.step(action.clone())?;
141
142 state = next_state;
143 episode_reward += reward;
144 done = is_done;
145 steps += 1;
146 }
147
148 test_rewards.push(episode_reward);
149 println!(
150 " Test episode {}: Reward = {:.2}, Steps = {}",
151 episode + 1,
152 episode_reward,
153 steps
154 );
155 }
156
157 let avg_test = test_rewards.iter().sum::<f64>() / f64::from(test_episodes);
158 println!(" Average test reward: {avg_test:.2}");
159
160 Ok(())
161}
162
163/// Compare trained policy with random policy
164fn compare_policies() -> Result<()> {
165 let mut env = PendulumEnvironment::new();
166 let episodes = 10;
167
168 // Random policy performance
169 println!(" Random Policy Performance:");
170 let mut random_rewards = Vec::new();
171
172 for _ in 0..episodes {
173 let mut state = env.reset();
174 let mut episode_reward = 0.0;
175 let mut done = false;
176
177 while !done {
178 // Random action in bounds
179 let action = Array1::from_vec(vec![4.0f64.mul_add(thread_rng().gen::<f64>(), -2.0)]);
180
181 let (next_state, reward, is_done) = env.step(action)?;
182 state = next_state;
183 episode_reward += reward;
184 done = is_done;
185 }
186
187 random_rewards.push(episode_reward);
188 }
189
190 let avg_random = random_rewards.iter().sum::<f64>() / f64::from(episodes);
191 println!(" Average random policy reward: {avg_random:.2}");
192
193 // Simple control policy (proportional control)
194 println!("\n Simple Control Policy Performance:");
195 let mut control_rewards = Vec::new();
196
197 for _ in 0..episodes {
198 let mut state = env.reset();
199 let mut episode_reward = 0.0;
200 let mut done = false;
201
202 while !done {
203 // Proportional control: torque = -k * theta
204 let theta = state[1].atan2(state[0]); // Reconstruct angle
205 let action = Array1::from_vec(vec![(-2.0 * theta).clamp(-2.0, 2.0)]);
206
207 let (next_state, reward, is_done) = env.step(action)?;
208 state = next_state;
209 episode_reward += reward;
210 done = is_done;
211 }
212
213 control_rewards.push(episode_reward);
214 }
215
216 let avg_control = control_rewards.iter().sum::<f64>() / f64::from(episodes);
217 println!(" Average control policy reward: {avg_control:.2}");
218
219 println!("\n Performance Summary:");
220 println!(" - Random policy: {avg_random:.2}");
221 println!(" - Simple control: {avg_control:.2}");
222 println!(" - Improvement: {:.2}", avg_control - avg_random);
223
224 Ok(())
225}Trait Implementations§
Source§impl ContinuousEnvironment for PendulumEnvironment
impl ContinuousEnvironment for PendulumEnvironment
Source§fn action_bounds(&self) -> Vec<(f64, f64)>
fn action_bounds(&self) -> Vec<(f64, f64)>
Gets the action space bounds (min, max) for each dimension
Source§fn step(&mut self, action: Array1<f64>) -> Result<(Array1<f64>, f64, bool)>
fn step(&mut self, action: Array1<f64>) -> Result<(Array1<f64>, f64, bool)>
Takes a continuous action and returns reward and next state
Source§fn action_dim(&self) -> usize
fn action_dim(&self) -> usize
Get action dimension
Auto Trait Implementations§
impl Freeze for PendulumEnvironment
impl RefUnwindSafe for PendulumEnvironment
impl Send for PendulumEnvironment
impl Sync for PendulumEnvironment
impl Unpin for PendulumEnvironment
impl UnwindSafe for PendulumEnvironment
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.