1#![allow(clippy::pedantic, clippy::unnecessary_wraps)]
2use quantrs2_ml::autodiff::optimizers::Adam;
8use quantrs2_ml::prelude::*;
9use scirs2_core::ndarray::Array1;
10use scirs2_core::random::prelude::*;
11
12fn main() -> Result<()> {
13 println!("=== Quantum Continuous RL Demo ===\n");
14
15 println!("1. Testing Pendulum Environment...");
17 test_pendulum_dynamics()?;
18
19 println!("\n2. Training Quantum DDPG on Pendulum Control...");
21 train_qddpg_pendulum()?;
22
23 println!("\n3. Comparing with Random Policy...");
25 compare_policies()?;
26
27 println!("\n4. Custom Continuous Environment Example...");
29 custom_environment_demo()?;
30
31 println!("\n=== Continuous RL Demo Complete ===");
32
33 Ok(())
34}
35
36fn test_pendulum_dynamics() -> Result<()> {
38 let mut env = PendulumEnvironment::new();
39
40 println!(" Initial state: {:?}", env.state());
41 println!(" Action bounds: {:?}", env.action_bounds());
42
43 let actions = vec![
45 Array1::from_vec(vec![0.0]), Array1::from_vec(vec![2.0]), Array1::from_vec(vec![-2.0]), ];
49
50 for (i, action) in actions.iter().enumerate() {
51 let state = env.reset();
52 let (next_state, reward, done) = env.step(action.clone())?;
53
54 println!("\n Step {} with action {:.1}:", i + 1, action[0]);
55 println!(
56 " State: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
57 state[0], state[1], state[2]
58 );
59 println!(
60 " Next: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
61 next_state[0], next_state[1], next_state[2]
62 );
63 println!(" Reward: {reward:.3}, Done: {done}");
64 }
65
66 Ok(())
67}
68
69fn train_qddpg_pendulum() -> Result<()> {
71 let state_dim = 3;
72 let action_dim = 1;
73 let action_bounds = vec![(-2.0, 2.0)];
74 let num_qubits = 4;
75 let buffer_capacity = 10000;
76
77 let mut agent = QuantumDDPG::new(
79 state_dim,
80 action_dim,
81 action_bounds,
82 num_qubits,
83 buffer_capacity,
84 )?;
85
86 let mut env = PendulumEnvironment::new();
88
89 let mut actor_optimizer = Adam::new(0.001);
91 let mut critic_optimizer = Adam::new(0.001);
92
93 let episodes = 50;
95 println!(" Training QDDPG for {episodes} episodes...");
96
97 let rewards = agent.train(
98 &mut env,
99 episodes,
100 &mut actor_optimizer,
101 &mut critic_optimizer,
102 )?;
103
104 let avg_initial = rewards[..10].iter().sum::<f64>() / 10.0;
106 let avg_final = rewards[rewards.len() - 10..].iter().sum::<f64>() / 10.0;
107
108 println!("\n Training Statistics:");
109 println!(" - Average initial reward: {avg_initial:.2}");
110 println!(" - Average final reward: {avg_final:.2}");
111 println!(" - Improvement: {:.2}", avg_final - avg_initial);
112
113 println!("\n Testing trained agent...");
115 test_trained_agent(&agent, &mut env)?;
116
117 Ok(())
118}
119
120fn test_trained_agent(agent: &QuantumDDPG, env: &mut dyn ContinuousEnvironment) -> Result<()> {
122 let test_episodes = 5;
123 let mut test_rewards = Vec::new();
124
125 for episode in 0..test_episodes {
126 let mut state = env.reset();
127 let mut episode_reward = 0.0;
128 let mut done = false;
129 let mut steps = 0;
130
131 while !done && steps < 200 {
132 let action = agent.get_action(&state, false)?; let (next_state, reward, is_done) = env.step(action.clone())?;
134
135 state = next_state;
136 episode_reward += reward;
137 done = is_done;
138 steps += 1;
139 }
140
141 test_rewards.push(episode_reward);
142 println!(
143 " Test episode {}: Reward = {:.2}, Steps = {}",
144 episode + 1,
145 episode_reward,
146 steps
147 );
148 }
149
150 let avg_test = test_rewards.iter().sum::<f64>() / f64::from(test_episodes);
151 println!(" Average test reward: {avg_test:.2}");
152
153 Ok(())
154}
155
156fn compare_policies() -> Result<()> {
158 let mut env = PendulumEnvironment::new();
159 let episodes = 10;
160
161 println!(" Random Policy Performance:");
163 let mut random_rewards = Vec::new();
164
165 for _ in 0..episodes {
166 let mut state = env.reset();
167 let mut episode_reward = 0.0;
168 let mut done = false;
169
170 while !done {
171 let action = Array1::from_vec(vec![4.0f64.mul_add(thread_rng().gen::<f64>(), -2.0)]);
173
174 let (next_state, reward, is_done) = env.step(action)?;
175 state = next_state;
176 episode_reward += reward;
177 done = is_done;
178 }
179
180 random_rewards.push(episode_reward);
181 }
182
183 let avg_random = random_rewards.iter().sum::<f64>() / f64::from(episodes);
184 println!(" Average random policy reward: {avg_random:.2}");
185
186 println!("\n Simple Control Policy Performance:");
188 let mut control_rewards = Vec::new();
189
190 for _ in 0..episodes {
191 let mut state = env.reset();
192 let mut episode_reward = 0.0;
193 let mut done = false;
194
195 while !done {
196 let theta = state[1].atan2(state[0]); let action = Array1::from_vec(vec![(-2.0 * theta).clamp(-2.0, 2.0)]);
199
200 let (next_state, reward, is_done) = env.step(action)?;
201 state = next_state;
202 episode_reward += reward;
203 done = is_done;
204 }
205
206 control_rewards.push(episode_reward);
207 }
208
209 let avg_control = control_rewards.iter().sum::<f64>() / f64::from(episodes);
210 println!(" Average control policy reward: {avg_control:.2}");
211
212 println!("\n Performance Summary:");
213 println!(" - Random policy: {avg_random:.2}");
214 println!(" - Simple control: {avg_control:.2}");
215 println!(" - Improvement: {:.2}", avg_control - avg_random);
216
217 Ok(())
218}
219
220fn custom_environment_demo() -> Result<()> {
222 struct Navigation2D {
224 position: Array1<f64>,
225 goal: Array1<f64>,
226 max_steps: usize,
227 current_step: usize,
228 }
229
230 impl Navigation2D {
231 fn new() -> Self {
232 Self {
233 position: Array1::zeros(2),
234 goal: Array1::from_vec(vec![5.0, 5.0]),
235 max_steps: 50,
236 current_step: 0,
237 }
238 }
239 }
240
241 impl ContinuousEnvironment for Navigation2D {
242 fn state(&self) -> Array1<f64> {
243 let mut state = Array1::zeros(4);
245 state[0] = self.position[0];
246 state[1] = self.position[1];
247 state[2] = self.goal[0] - self.position[0];
248 state[3] = self.goal[1] - self.position[1];
249 state
250 }
251
252 fn action_bounds(&self) -> Vec<(f64, f64)> {
253 vec![(-1.0, 1.0), (-1.0, 1.0)] }
255
256 fn step(&mut self, action: Array1<f64>) -> Result<(Array1<f64>, f64, bool)> {
257 self.position = &self.position + &action;
259
260 let distance = (self.position[0] - self.goal[0]).hypot(self.position[1] - self.goal[1]);
262
263 let reward = -distance;
265
266 self.current_step += 1;
267 let done = distance < 0.5 || self.current_step >= self.max_steps;
268
269 Ok((self.state(), reward, done))
270 }
271
272 fn reset(&mut self) -> Array1<f64> {
273 self.position = Array1::from_vec(vec![
274 10.0f64.mul_add(thread_rng().gen::<f64>(), -5.0),
275 10.0f64.mul_add(thread_rng().gen::<f64>(), -5.0),
276 ]);
277 self.current_step = 0;
278 self.state()
279 }
280
281 fn state_dim(&self) -> usize {
282 4
283 }
284 fn action_dim(&self) -> usize {
285 2
286 }
287 }
288
289 println!(" Created 2D Navigation Environment");
290
291 let mut nav_env = Navigation2D::new();
292 let state = nav_env.reset();
293
294 println!(" Initial position: [{:.2}, {:.2}]", state[0], state[1]);
295 println!(" Goal position: [5.00, 5.00]");
296 println!(" Action space: 2D velocity vectors in [-1, 1]");
297
298 println!("\n Taking some steps:");
300 for i in 0..3 {
301 let action = Array1::from_vec(vec![
302 0.5 * 2.0f64.mul_add(thread_rng().gen::<f64>(), -1.0),
303 0.5 * 2.0f64.mul_add(thread_rng().gen::<f64>(), -1.0),
304 ]);
305
306 let (next_state, reward, done) = nav_env.step(action.clone())?;
307
308 println!(
309 " Step {}: action=[{:.2}, {:.2}], pos=[{:.2}, {:.2}], reward={:.2}, done={}",
310 i + 1,
311 action[0],
312 action[1],
313 next_state[0],
314 next_state[1],
315 reward,
316 done
317 );
318 }
319
320 println!("\n This demonstrates how to create custom continuous environments");
321 println!(" for quantum RL algorithms!");
322
323 Ok(())
324}