pub struct PendulumEnvironment { /* private fields */ }Expand description
Pendulum environment for continuous control
Implementations§
Source§impl PendulumEnvironment
impl PendulumEnvironment
Sourcepub fn new() -> Self
pub fn new() -> Self
Create new pendulum environment
Examples found in repository?
examples/continuous_rl.rs (line 36)
35fn test_pendulum_dynamics() -> Result<()> {
36 let mut env = PendulumEnvironment::new();
37
38 println!(" Initial state: {:?}", env.state());
39 println!(" Action bounds: {:?}", env.action_bounds());
40
41 // Run a few steps with different actions
42 let actions = vec![
43 Array1::from_vec(vec![0.0]), // No torque
44 Array1::from_vec(vec![2.0]), // Max positive torque
45 Array1::from_vec(vec![-2.0]), // Max negative torque
46 ];
47
48 for (i, action) in actions.iter().enumerate() {
49 let state = env.reset();
50 let (next_state, reward, done) = env.step(action.clone())?;
51
52 println!("\n Step {} with action {:.1}:", i + 1, action[0]);
53 println!(
54 " State: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
55 state[0], state[1], state[2]
56 );
57 println!(
58 " Next: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
59 next_state[0], next_state[1], next_state[2]
60 );
61 println!(" Reward: {:.3}, Done: {}", reward, done);
62 }
63
64 Ok(())
65}
66
67/// Train QDDPG on pendulum control
68fn train_qddpg_pendulum() -> Result<()> {
69 let state_dim = 3;
70 let action_dim = 1;
71 let action_bounds = vec![(-2.0, 2.0)];
72 let num_qubits = 4;
73 let buffer_capacity = 10000;
74
75 // Create QDDPG agent
76 let mut agent = QuantumDDPG::new(
77 state_dim,
78 action_dim,
79 action_bounds,
80 num_qubits,
81 buffer_capacity,
82 )?;
83
84 // Create environment
85 let mut env = PendulumEnvironment::new();
86
87 // Create optimizers
88 let mut actor_optimizer = Adam::new(0.001);
89 let mut critic_optimizer = Adam::new(0.001);
90
91 // Train for a few episodes (reduced for demo)
92 let episodes = 50;
93 println!(" Training QDDPG for {} episodes...", episodes);
94
95 let rewards = agent.train(
96 &mut env,
97 episodes,
98 &mut actor_optimizer,
99 &mut critic_optimizer,
100 )?;
101
102 // Print training statistics
103 let avg_initial = rewards[..10].iter().sum::<f64>() / 10.0;
104 let avg_final = rewards[rewards.len() - 10..].iter().sum::<f64>() / 10.0;
105
106 println!("\n Training Statistics:");
107 println!(" - Average initial reward: {:.2}", avg_initial);
108 println!(" - Average final reward: {:.2}", avg_final);
109 println!(" - Improvement: {:.2}", avg_final - avg_initial);
110
111 // Test trained agent
112 println!("\n Testing trained agent...");
113 test_trained_agent(&agent, &mut env)?;
114
115 Ok(())
116}
117
118/// Test a trained agent
119fn test_trained_agent(agent: &QuantumDDPG, env: &mut dyn ContinuousEnvironment) -> Result<()> {
120 let test_episodes = 5;
121 let mut test_rewards = Vec::new();
122
123 for episode in 0..test_episodes {
124 let mut state = env.reset();
125 let mut episode_reward = 0.0;
126 let mut done = false;
127 let mut steps = 0;
128
129 while !done && steps < 200 {
130 let action = agent.get_action(&state, false)?; // No exploration
131 let (next_state, reward, is_done) = env.step(action.clone())?;
132
133 state = next_state;
134 episode_reward += reward;
135 done = is_done;
136 steps += 1;
137 }
138
139 test_rewards.push(episode_reward);
140 println!(
141 " Test episode {}: Reward = {:.2}, Steps = {}",
142 episode + 1,
143 episode_reward,
144 steps
145 );
146 }
147
148 let avg_test = test_rewards.iter().sum::<f64>() / test_episodes as f64;
149 println!(" Average test reward: {:.2}", avg_test);
150
151 Ok(())
152}
153
154/// Compare trained policy with random policy
155fn compare_policies() -> Result<()> {
156 let mut env = PendulumEnvironment::new();
157 let episodes = 10;
158
159 // Random policy performance
160 println!(" Random Policy Performance:");
161 let mut random_rewards = Vec::new();
162
163 for _ in 0..episodes {
164 let mut state = env.reset();
165 let mut episode_reward = 0.0;
166 let mut done = false;
167
168 while !done {
169 // Random action in bounds
170 let action = Array1::from_vec(vec![4.0 * rand::random::<f64>() - 2.0]);
171
172 let (next_state, reward, is_done) = env.step(action)?;
173 state = next_state;
174 episode_reward += reward;
175 done = is_done;
176 }
177
178 random_rewards.push(episode_reward);
179 }
180
181 let avg_random = random_rewards.iter().sum::<f64>() / episodes as f64;
182 println!(" Average random policy reward: {:.2}", avg_random);
183
184 // Simple control policy (proportional control)
185 println!("\n Simple Control Policy Performance:");
186 let mut control_rewards = Vec::new();
187
188 for _ in 0..episodes {
189 let mut state = env.reset();
190 let mut episode_reward = 0.0;
191 let mut done = false;
192
193 while !done {
194 // Proportional control: torque = -k * theta
195 let theta = state[1].atan2(state[0]); // Reconstruct angle
196 let action = Array1::from_vec(vec![(-2.0 * theta).clamp(-2.0, 2.0)]);
197
198 let (next_state, reward, is_done) = env.step(action)?;
199 state = next_state;
200 episode_reward += reward;
201 done = is_done;
202 }
203
204 control_rewards.push(episode_reward);
205 }
206
207 let avg_control = control_rewards.iter().sum::<f64>() / episodes as f64;
208 println!(" Average control policy reward: {:.2}", avg_control);
209
210 println!("\n Performance Summary:");
211 println!(" - Random policy: {:.2}", avg_random);
212 println!(" - Simple control: {:.2}", avg_control);
213 println!(" - Improvement: {:.2}", avg_control - avg_random);
214
215 Ok(())
216}Trait Implementations§
Source§impl ContinuousEnvironment for PendulumEnvironment
impl ContinuousEnvironment for PendulumEnvironment
Source§fn action_bounds(&self) -> Vec<(f64, f64)>
fn action_bounds(&self) -> Vec<(f64, f64)>
Gets the action space bounds (min, max) for each dimension
Source§fn step(&mut self, action: Array1<f64>) -> Result<(Array1<f64>, f64, bool)>
fn step(&mut self, action: Array1<f64>) -> Result<(Array1<f64>, f64, bool)>
Takes a continuous action and returns reward and next state
Source§fn action_dim(&self) -> usize
fn action_dim(&self) -> usize
Get action dimension
Auto Trait Implementations§
impl Freeze for PendulumEnvironment
impl RefUnwindSafe for PendulumEnvironment
impl Send for PendulumEnvironment
impl Sync for PendulumEnvironment
impl Unpin for PendulumEnvironment
impl UnwindSafe for PendulumEnvironment
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.