continuous_rl/
continuous_rl.rs

1#![allow(
2    clippy::pedantic,
3    clippy::unnecessary_wraps,
4    clippy::needless_range_loop,
5    clippy::useless_vec,
6    clippy::needless_collect,
7    clippy::too_many_arguments
8)]
9//! Quantum Continuous Reinforcement Learning Example
10//!
11//! This example demonstrates quantum reinforcement learning algorithms
12//! for continuous action spaces, including QDDPG and QSAC.
13
14use quantrs2_ml::autodiff::optimizers::Adam;
15use quantrs2_ml::prelude::*;
16use scirs2_core::ndarray::Array1;
17use scirs2_core::random::prelude::*;
18
19fn main() -> Result<()> {
20    println!("=== Quantum Continuous RL Demo ===\n");
21
22    // Step 1: Test pendulum environment
23    println!("1. Testing Pendulum Environment...");
24    test_pendulum_dynamics()?;
25
26    // Step 2: Train QDDPG on pendulum
27    println!("\n2. Training Quantum DDPG on Pendulum Control...");
28    train_qddpg_pendulum()?;
29
30    // Step 3: Compare with random policy
31    println!("\n3. Comparing with Random Policy...");
32    compare_policies()?;
33
34    // Step 4: Demonstrate custom continuous environment
35    println!("\n4. Custom Continuous Environment Example...");
36    custom_environment_demo()?;
37
38    println!("\n=== Continuous RL Demo Complete ===");
39
40    Ok(())
41}
42
43/// Test pendulum environment dynamics
44fn test_pendulum_dynamics() -> Result<()> {
45    let mut env = PendulumEnvironment::new();
46
47    println!("   Initial state: {:?}", env.state());
48    println!("   Action bounds: {:?}", env.action_bounds());
49
50    // Run a few steps with different actions
51    let actions = vec![
52        Array1::from_vec(vec![0.0]),  // No torque
53        Array1::from_vec(vec![2.0]),  // Max positive torque
54        Array1::from_vec(vec![-2.0]), // Max negative torque
55    ];
56
57    for (i, action) in actions.iter().enumerate() {
58        let state = env.reset();
59        let (next_state, reward, done) = env.step(action.clone())?;
60
61        println!("\n   Step {} with action {:.1}:", i + 1, action[0]);
62        println!(
63            "     State: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
64            state[0], state[1], state[2]
65        );
66        println!(
67            "     Next: [θ_cos={:.3}, θ_sin={:.3}, θ_dot={:.3}]",
68            next_state[0], next_state[1], next_state[2]
69        );
70        println!("     Reward: {reward:.3}, Done: {done}");
71    }
72
73    Ok(())
74}
75
76/// Train QDDPG on pendulum control
77fn train_qddpg_pendulum() -> Result<()> {
78    let state_dim = 3;
79    let action_dim = 1;
80    let action_bounds = vec![(-2.0, 2.0)];
81    let num_qubits = 4;
82    let buffer_capacity = 10000;
83
84    // Create QDDPG agent
85    let mut agent = QuantumDDPG::new(
86        state_dim,
87        action_dim,
88        action_bounds,
89        num_qubits,
90        buffer_capacity,
91    )?;
92
93    // Create environment
94    let mut env = PendulumEnvironment::new();
95
96    // Create optimizers
97    let mut actor_optimizer = Adam::new(0.001);
98    let mut critic_optimizer = Adam::new(0.001);
99
100    // Train for a few episodes (reduced for demo)
101    let episodes = 50;
102    println!("   Training QDDPG for {episodes} episodes...");
103
104    let rewards = agent.train(
105        &mut env,
106        episodes,
107        &mut actor_optimizer,
108        &mut critic_optimizer,
109    )?;
110
111    // Print training statistics
112    let avg_initial = rewards[..10].iter().sum::<f64>() / 10.0;
113    let avg_final = rewards[rewards.len() - 10..].iter().sum::<f64>() / 10.0;
114
115    println!("\n   Training Statistics:");
116    println!("   - Average initial reward: {avg_initial:.2}");
117    println!("   - Average final reward: {avg_final:.2}");
118    println!("   - Improvement: {:.2}", avg_final - avg_initial);
119
120    // Test trained agent
121    println!("\n   Testing trained agent...");
122    test_trained_agent(&agent, &mut env)?;
123
124    Ok(())
125}
126
127/// Test a trained agent
128fn test_trained_agent(agent: &QuantumDDPG, env: &mut dyn ContinuousEnvironment) -> Result<()> {
129    let test_episodes = 5;
130    let mut test_rewards = Vec::new();
131
132    for episode in 0..test_episodes {
133        let mut state = env.reset();
134        let mut episode_reward = 0.0;
135        let mut done = false;
136        let mut steps = 0;
137
138        while !done && steps < 200 {
139            let action = agent.get_action(&state, false)?; // No exploration
140            let (next_state, reward, is_done) = env.step(action.clone())?;
141
142            state = next_state;
143            episode_reward += reward;
144            done = is_done;
145            steps += 1;
146        }
147
148        test_rewards.push(episode_reward);
149        println!(
150            "   Test episode {}: Reward = {:.2}, Steps = {}",
151            episode + 1,
152            episode_reward,
153            steps
154        );
155    }
156
157    let avg_test = test_rewards.iter().sum::<f64>() / f64::from(test_episodes);
158    println!("   Average test reward: {avg_test:.2}");
159
160    Ok(())
161}
162
163/// Compare trained policy with random policy
164fn compare_policies() -> Result<()> {
165    let mut env = PendulumEnvironment::new();
166    let episodes = 10;
167
168    // Random policy performance
169    println!("   Random Policy Performance:");
170    let mut random_rewards = Vec::new();
171
172    for _ in 0..episodes {
173        let mut state = env.reset();
174        let mut episode_reward = 0.0;
175        let mut done = false;
176
177        while !done {
178            // Random action in bounds
179            let action = Array1::from_vec(vec![4.0f64.mul_add(thread_rng().gen::<f64>(), -2.0)]);
180
181            let (next_state, reward, is_done) = env.step(action)?;
182            state = next_state;
183            episode_reward += reward;
184            done = is_done;
185        }
186
187        random_rewards.push(episode_reward);
188    }
189
190    let avg_random = random_rewards.iter().sum::<f64>() / f64::from(episodes);
191    println!("   Average random policy reward: {avg_random:.2}");
192
193    // Simple control policy (proportional control)
194    println!("\n   Simple Control Policy Performance:");
195    let mut control_rewards = Vec::new();
196
197    for _ in 0..episodes {
198        let mut state = env.reset();
199        let mut episode_reward = 0.0;
200        let mut done = false;
201
202        while !done {
203            // Proportional control: torque = -k * theta
204            let theta = state[1].atan2(state[0]); // Reconstruct angle
205            let action = Array1::from_vec(vec![(-2.0 * theta).clamp(-2.0, 2.0)]);
206
207            let (next_state, reward, is_done) = env.step(action)?;
208            state = next_state;
209            episode_reward += reward;
210            done = is_done;
211        }
212
213        control_rewards.push(episode_reward);
214    }
215
216    let avg_control = control_rewards.iter().sum::<f64>() / f64::from(episodes);
217    println!("   Average control policy reward: {avg_control:.2}");
218
219    println!("\n   Performance Summary:");
220    println!("   - Random policy: {avg_random:.2}");
221    println!("   - Simple control: {avg_control:.2}");
222    println!("   - Improvement: {:.2}", avg_control - avg_random);
223
224    Ok(())
225}
226
227/// Custom continuous environment example
228fn custom_environment_demo() -> Result<()> {
229    // Define a simple 2D navigation environment
230    struct Navigation2D {
231        position: Array1<f64>,
232        goal: Array1<f64>,
233        max_steps: usize,
234        current_step: usize,
235    }
236
237    impl Navigation2D {
238        fn new() -> Self {
239            Self {
240                position: Array1::zeros(2),
241                goal: Array1::from_vec(vec![5.0, 5.0]),
242                max_steps: 50,
243                current_step: 0,
244            }
245        }
246    }
247
248    impl ContinuousEnvironment for Navigation2D {
249        fn state(&self) -> Array1<f64> {
250            // State includes position and relative goal position
251            let mut state = Array1::zeros(4);
252            state[0] = self.position[0];
253            state[1] = self.position[1];
254            state[2] = self.goal[0] - self.position[0];
255            state[3] = self.goal[1] - self.position[1];
256            state
257        }
258
259        fn action_bounds(&self) -> Vec<(f64, f64)> {
260            vec![(-1.0, 1.0), (-1.0, 1.0)] // Velocity in x and y
261        }
262
263        fn step(&mut self, action: Array1<f64>) -> Result<(Array1<f64>, f64, bool)> {
264            // Update position
265            self.position = &self.position + &action;
266
267            // Compute distance to goal
268            let distance = (self.position[0] - self.goal[0]).hypot(self.position[1] - self.goal[1]);
269
270            // Reward is negative distance (closer is better)
271            let reward = -distance;
272
273            self.current_step += 1;
274            let done = distance < 0.5 || self.current_step >= self.max_steps;
275
276            Ok((self.state(), reward, done))
277        }
278
279        fn reset(&mut self) -> Array1<f64> {
280            self.position = Array1::from_vec(vec![
281                10.0f64.mul_add(thread_rng().gen::<f64>(), -5.0),
282                10.0f64.mul_add(thread_rng().gen::<f64>(), -5.0),
283            ]);
284            self.current_step = 0;
285            self.state()
286        }
287
288        fn state_dim(&self) -> usize {
289            4
290        }
291        fn action_dim(&self) -> usize {
292            2
293        }
294    }
295
296    println!("   Created 2D Navigation Environment");
297
298    let mut nav_env = Navigation2D::new();
299    let state = nav_env.reset();
300
301    println!("   Initial position: [{:.2}, {:.2}]", state[0], state[1]);
302    println!("   Goal position: [5.00, 5.00]");
303    println!("   Action space: 2D velocity vectors in [-1, 1]");
304
305    // Demonstrate a few steps
306    println!("\n   Taking some steps:");
307    for i in 0..3 {
308        let action = Array1::from_vec(vec![
309            0.5 * 2.0f64.mul_add(thread_rng().gen::<f64>(), -1.0),
310            0.5 * 2.0f64.mul_add(thread_rng().gen::<f64>(), -1.0),
311        ]);
312
313        let (next_state, reward, done) = nav_env.step(action.clone())?;
314
315        println!(
316            "   Step {}: action=[{:.2}, {:.2}], pos=[{:.2}, {:.2}], reward={:.2}, done={}",
317            i + 1,
318            action[0],
319            action[1],
320            next_state[0],
321            next_state[1],
322            reward,
323            done
324        );
325    }
326
327    println!("\n   This demonstrates how to create custom continuous environments");
328    println!("   for quantum RL algorithms!");
329
330    Ok(())
331}