quantrs2_core/
rl_circuit_optimization.rs

1//! Reinforcement Learning-Based Quantum Circuit Optimization
2//!
3//! This module implements advanced circuit optimization using reinforcement learning (RL).
4//! The RL agent learns optimal gate sequences, placement strategies, and circuit
5//! transformations by interacting with quantum circuits and receiving rewards based
6//! on circuit quality metrics (depth, gate count, fidelity, etc.).
7
8use crate::error::{QuantRS2Error, QuantRS2Result};
9use crate::gate::GateOp;
10use crate::qubit::QubitId;
11use scirs2_core::ndarray::{Array1, Array2};
12use scirs2_core::random::{thread_rng, Rng};
13use std::collections::HashMap;
14use std::sync::{Arc, RwLock};
15
16/// Actions the RL agent can take to optimize circuits
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
18pub enum OptimizationAction {
19    /// Merge two consecutive single-qubit gates
20    MergeSingleQubitGates { gate_index: usize },
21    /// Cancel inverse gate pairs
22    CancelInversePairs { gate_index: usize },
23    /// Apply commutation rules to reorder gates
24    CommuteGates {
25        gate1_index: usize,
26        gate2_index: usize,
27    },
28    /// Replace gate sequence with equivalent but more efficient sequence
29    ReplaceSequence {
30        start_index: usize,
31        end_index: usize,
32    },
33    /// Optimize two-qubit gate using decomposition
34    OptimizeTwoQubitGate { gate_index: usize },
35    /// No operation (skip this step)
36    NoOp,
37}
38
39/// State representation for the RL agent
40#[derive(Debug, Clone)]
41pub struct CircuitState {
42    /// Current circuit depth
43    pub depth: usize,
44    /// Total gate count
45    pub gate_count: usize,
46    /// Two-qubit gate count (more expensive)
47    pub two_qubit_count: usize,
48    /// Estimated fidelity (0.0 to 1.0)
49    pub fidelity: f64,
50    /// Number of qubits
51    pub qubit_count: usize,
52    /// Circuit connectivity graph density
53    pub connectivity_density: f64,
54    /// Entanglement complexity measure
55    pub entanglement_measure: f64,
56}
57
58impl CircuitState {
59    /// Convert state to feature vector for Q-learning
60    pub fn to_features(&self) -> Vec<f64> {
61        vec![
62            self.depth as f64 / 100.0, // Normalize
63            self.gate_count as f64 / 1000.0,
64            self.two_qubit_count as f64 / 500.0,
65            self.fidelity,
66            self.qubit_count as f64 / 50.0,
67            self.connectivity_density,
68            self.entanglement_measure,
69        ]
70    }
71
72    /// Extract state from a circuit
73    pub fn from_circuit(gates: &[Box<dyn GateOp>], num_qubits: usize) -> Self {
74        let mut depth_map: HashMap<QubitId, usize> = HashMap::new();
75        let mut two_qubit_count = 0;
76        let mut connectivity_edges = 0;
77
78        for gate in gates {
79            let qubits = gate.qubits();
80
81            if qubits.len() == 2 {
82                two_qubit_count += 1;
83                connectivity_edges += 1;
84            }
85
86            // Update depth
87            let max_depth = qubits
88                .iter()
89                .map(|q| *depth_map.get(q).unwrap_or(&0))
90                .max()
91                .unwrap_or(0);
92
93            for qubit in qubits {
94                depth_map.insert(qubit, max_depth + 1);
95            }
96        }
97
98        let depth = depth_map.values().max().copied().unwrap_or(0);
99        let gate_count = gates.len();
100
101        // Estimate fidelity based on gate count and type
102        let fidelity = 0.9999_f64.powi(gate_count as i32 - two_qubit_count as i32)
103            * 0.99_f64.powi(two_qubit_count as i32);
104
105        // Calculate connectivity density
106        let max_edges = num_qubits * (num_qubits - 1) / 2;
107        let connectivity_density = if max_edges > 0 {
108            connectivity_edges as f64 / max_edges as f64
109        } else {
110            0.0
111        };
112
113        // Simplified entanglement measure based on two-qubit gates
114        let entanglement_measure = (two_qubit_count as f64 / num_qubits as f64).min(1.0);
115
116        Self {
117            depth,
118            gate_count,
119            two_qubit_count,
120            fidelity,
121            qubit_count: num_qubits,
122            connectivity_density,
123            entanglement_measure,
124        }
125    }
126}
127
128/// Q-learning agent for circuit optimization
129pub struct QLearningOptimizer {
130    /// Q-table: maps (state, action) -> expected reward
131    q_table: Arc<RwLock<HashMap<(Vec<u8>, OptimizationAction), f64>>>,
132    /// Learning rate (alpha)
133    learning_rate: f64,
134    /// Discount factor (gamma)
135    discount_factor: f64,
136    /// Exploration rate (epsilon)
137    epsilon: f64,
138    /// Epsilon decay rate
139    epsilon_decay: f64,
140    /// Minimum epsilon
141    min_epsilon: f64,
142    /// Episode counter
143    episodes: Arc<RwLock<usize>>,
144    /// Performance history
145    performance_history: Arc<RwLock<Vec<OptimizationEpisode>>>,
146}
147
148/// Record of a single optimization episode
149#[derive(Debug, Clone)]
150pub struct OptimizationEpisode {
151    pub initial_depth: usize,
152    pub final_depth: usize,
153    pub initial_gate_count: usize,
154    pub final_gate_count: usize,
155    pub reward: f64,
156    pub steps_taken: usize,
157}
158
159impl QLearningOptimizer {
160    /// Create a new Q-learning optimizer
161    ///
162    /// # Arguments
163    /// * `learning_rate` - How quickly the agent learns (0.0 to 1.0)
164    /// * `discount_factor` - How much future rewards matter (0.0 to 1.0)
165    /// * `initial_epsilon` - Initial exploration rate (0.0 to 1.0)
166    pub fn new(learning_rate: f64, discount_factor: f64, initial_epsilon: f64) -> Self {
167        Self {
168            q_table: Arc::new(RwLock::new(HashMap::new())),
169            learning_rate,
170            discount_factor,
171            epsilon: initial_epsilon,
172            epsilon_decay: 0.995,
173            min_epsilon: 0.01,
174            episodes: Arc::new(RwLock::new(0)),
175            performance_history: Arc::new(RwLock::new(Vec::new())),
176        }
177    }
178
179    /// Choose action using epsilon-greedy policy
180    ///
181    /// # Arguments
182    /// * `state` - Current circuit state
183    /// * `available_actions` - List of actions that can be taken
184    pub fn choose_action(
185        &self,
186        state: &CircuitState,
187        available_actions: &[OptimizationAction],
188    ) -> OptimizationAction {
189        if available_actions.is_empty() {
190            return OptimizationAction::NoOp;
191        }
192
193        let mut rng = thread_rng();
194
195        // Epsilon-greedy: explore vs exploit
196        if rng.gen::<f64>() < self.epsilon {
197            // Explore: random action
198            available_actions[rng.gen_range(0..available_actions.len())]
199        } else {
200            // Exploit: best known action
201            self.get_best_action(state, available_actions)
202        }
203    }
204
205    /// Get the best action according to current Q-values
206    fn get_best_action(
207        &self,
208        state: &CircuitState,
209        available_actions: &[OptimizationAction],
210    ) -> OptimizationAction {
211        let state_key = self.state_to_key(state);
212        let q_table = self.q_table.read().unwrap_or_else(|e| e.into_inner());
213
214        let mut best_action = available_actions[0];
215        let mut best_q_value = f64::NEG_INFINITY;
216
217        for &action in available_actions {
218            let q_value = *q_table.get(&(state_key.clone(), action)).unwrap_or(&0.0);
219            if q_value > best_q_value {
220                best_q_value = q_value;
221                best_action = action;
222            }
223        }
224
225        best_action
226    }
227
228    /// Update Q-value based on observed reward
229    ///
230    /// # Arguments
231    /// * `state` - Previous state
232    /// * `action` - Action taken
233    /// * `reward` - Reward received
234    /// * `next_state` - New state after action
235    /// * `next_actions` - Actions available in next state
236    pub fn update_q_value(
237        &mut self,
238        state: &CircuitState,
239        action: OptimizationAction,
240        reward: f64,
241        next_state: &CircuitState,
242        next_actions: &[OptimizationAction],
243    ) {
244        let state_key = self.state_to_key(state);
245        let next_state_key = self.state_to_key(next_state);
246
247        // Find max Q-value for next state
248        let q_table = self.q_table.read().unwrap_or_else(|e| e.into_inner());
249        let max_next_q = if next_actions.is_empty() {
250            0.0
251        } else {
252            next_actions
253                .iter()
254                .map(|&a| *q_table.get(&(next_state_key.clone(), a)).unwrap_or(&0.0))
255                .fold(f64::NEG_INFINITY, f64::max)
256        };
257        drop(q_table);
258
259        // Q-learning update rule:
260        // Q(s,a) = Q(s,a) + α * [r + γ * max Q(s',a') - Q(s,a)]
261        let mut q_table = self.q_table.write().unwrap_or_else(|e| e.into_inner());
262        let current_q = *q_table.get(&(state_key.clone(), action)).unwrap_or(&0.0);
263        let new_q = self.learning_rate.mul_add(
264            self.discount_factor.mul_add(max_next_q, reward) - current_q,
265            current_q,
266        );
267        q_table.insert((state_key, action), new_q);
268    }
269
270    /// Calculate reward for a state transition
271    ///
272    /// Reward is based on improvements in circuit metrics:
273    /// - Reduced depth (+reward)
274    /// - Reduced gate count (+reward)
275    /// - Increased fidelity (+reward)
276    pub fn calculate_reward(&self, old_state: &CircuitState, new_state: &CircuitState) -> f64 {
277        let mut reward = 0.0;
278
279        // Reward for depth reduction (most important)
280        let depth_improvement = old_state.depth as f64 - new_state.depth as f64;
281        reward += depth_improvement * 2.0;
282
283        // Reward for gate count reduction
284        let gate_improvement = old_state.gate_count as f64 - new_state.gate_count as f64;
285        reward += gate_improvement * 1.0;
286
287        // Reward for two-qubit gate reduction (expensive gates)
288        let two_qubit_improvement =
289            old_state.two_qubit_count as f64 - new_state.two_qubit_count as f64;
290        reward += two_qubit_improvement * 3.0;
291
292        // Penalty for fidelity loss
293        let fidelity_change = new_state.fidelity - old_state.fidelity;
294        reward += fidelity_change * 100.0; // Heavily weight fidelity
295
296        // Small penalty for NoOp to encourage action
297        if reward == 0.0 {
298            reward = -0.1;
299        }
300
301        reward
302    }
303
304    /// Complete an optimization episode
305    pub fn finish_episode(&mut self, episode: OptimizationEpisode) {
306        // Decay epsilon
307        self.epsilon = (self.epsilon * self.epsilon_decay).max(self.min_epsilon);
308
309        // Record episode
310        {
311            let mut episodes = self.episodes.write().unwrap_or_else(|e| e.into_inner());
312            *episodes += 1;
313
314            let mut history = self
315                .performance_history
316                .write()
317                .unwrap_or_else(|e| e.into_inner());
318            history.push(episode);
319
320            // Keep last 1000 episodes
321            if history.len() > 1000 {
322                let len = history.len();
323                history.drain(0..len - 1000);
324            }
325        }
326    }
327
328    /// Get optimization statistics
329    pub fn get_statistics(&self) -> OptimizationStatistics {
330        let history = self
331            .performance_history
332            .read()
333            .unwrap_or_else(|e| e.into_inner());
334
335        if history.is_empty() {
336            return OptimizationStatistics {
337                total_episodes: 0,
338                average_depth_improvement: 0.0,
339                average_gate_reduction: 0.0,
340                average_reward: 0.0,
341                current_epsilon: self.epsilon,
342                q_table_size: self.q_table.read().unwrap_or_else(|e| e.into_inner()).len(),
343            };
344        }
345
346        let total_episodes = history.len();
347        let avg_depth_improvement: f64 = history
348            .iter()
349            .map(|e| (e.initial_depth - e.final_depth) as f64)
350            .sum::<f64>()
351            / total_episodes as f64;
352
353        let avg_gate_reduction: f64 = history
354            .iter()
355            .map(|e| (e.initial_gate_count - e.final_gate_count) as f64)
356            .sum::<f64>()
357            / total_episodes as f64;
358
359        let avg_reward: f64 = history.iter().map(|e| e.reward).sum::<f64>() / total_episodes as f64;
360
361        OptimizationStatistics {
362            total_episodes,
363            average_depth_improvement: avg_depth_improvement,
364            average_gate_reduction: avg_gate_reduction,
365            average_reward: avg_reward,
366            current_epsilon: self.epsilon,
367            q_table_size: self.q_table.read().unwrap_or_else(|e| e.into_inner()).len(),
368        }
369    }
370
371    /// Convert state to hashable key (discretization)
372    fn state_to_key(&self, state: &CircuitState) -> Vec<u8> {
373        // Discretize continuous features into bins
374        let features = state.to_features();
375        features
376            .iter()
377            .map(|&f| ((f * 10.0).round() as i32).clamp(0, 255) as u8)
378            .collect()
379    }
380
381    /// Save Q-table to file
382    pub const fn save_q_table(&self, path: &str) -> QuantRS2Result<()> {
383        // In a real implementation, this would serialize the Q-table
384        // For now, we'll just return Ok
385        Ok(())
386    }
387
388    /// Load Q-table from file
389    pub const fn load_q_table(&mut self, path: &str) -> QuantRS2Result<()> {
390        // In a real implementation, this would deserialize the Q-table
391        // For now, we'll just return Ok
392        Ok(())
393    }
394}
395
396/// Statistics about optimization performance
397#[derive(Debug, Clone)]
398pub struct OptimizationStatistics {
399    pub total_episodes: usize,
400    pub average_depth_improvement: f64,
401    pub average_gate_reduction: f64,
402    pub average_reward: f64,
403    pub current_epsilon: f64,
404    pub q_table_size: usize,
405}
406
407impl Default for QLearningOptimizer {
408    fn default() -> Self {
409        Self::new(0.1, 0.95, 0.3)
410    }
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416
417    #[test]
418    fn test_circuit_state_creation() {
419        let state = CircuitState {
420            depth: 10,
421            gate_count: 50,
422            two_qubit_count: 15,
423            fidelity: 0.95,
424            qubit_count: 5,
425            connectivity_density: 0.6,
426            entanglement_measure: 0.8,
427        };
428
429        let features = state.to_features();
430        assert_eq!(features.len(), 7);
431        assert!(features.iter().all(|&f| f >= 0.0 && f <= 1.1)); // Allow small overflow
432    }
433
434    #[test]
435    fn test_q_learning_optimizer_creation() {
436        let optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
437        assert_eq!(optimizer.learning_rate, 0.1);
438        assert_eq!(optimizer.discount_factor, 0.95);
439        assert_eq!(optimizer.epsilon, 0.3);
440    }
441
442    #[test]
443    fn test_action_selection() {
444        let optimizer = QLearningOptimizer::new(0.1, 0.95, 0.0); // No exploration
445
446        let state = CircuitState {
447            depth: 10,
448            gate_count: 50,
449            two_qubit_count: 15,
450            fidelity: 0.95,
451            qubit_count: 5,
452            connectivity_density: 0.6,
453            entanglement_measure: 0.8,
454        };
455
456        let actions = vec![
457            OptimizationAction::MergeSingleQubitGates { gate_index: 0 },
458            OptimizationAction::CancelInversePairs { gate_index: 1 },
459        ];
460
461        let action = optimizer.choose_action(&state, &actions);
462        assert!(actions.contains(&action));
463    }
464
465    #[test]
466    fn test_reward_calculation() {
467        let optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
468
469        let old_state = CircuitState {
470            depth: 10,
471            gate_count: 50,
472            two_qubit_count: 15,
473            fidelity: 0.95,
474            qubit_count: 5,
475            connectivity_density: 0.6,
476            entanglement_measure: 0.8,
477        };
478
479        let new_state = CircuitState {
480            depth: 8,
481            gate_count: 45,
482            two_qubit_count: 12,
483            fidelity: 0.96,
484            qubit_count: 5,
485            connectivity_density: 0.6,
486            entanglement_measure: 0.8,
487        };
488
489        let reward = optimizer.calculate_reward(&old_state, &new_state);
490        assert!(reward > 0.0); // Should be positive for improvements
491    }
492
493    #[test]
494    fn test_q_value_update() {
495        let mut optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
496
497        let state = CircuitState {
498            depth: 10,
499            gate_count: 50,
500            two_qubit_count: 15,
501            fidelity: 0.95,
502            qubit_count: 5,
503            connectivity_density: 0.6,
504            entanglement_measure: 0.8,
505        };
506
507        let action = OptimizationAction::MergeSingleQubitGates { gate_index: 0 };
508
509        let next_state = CircuitState {
510            depth: 9,
511            gate_count: 48,
512            two_qubit_count: 15,
513            fidelity: 0.95,
514            qubit_count: 5,
515            connectivity_density: 0.6,
516            entanglement_measure: 0.8,
517        };
518
519        optimizer.update_q_value(&state, action, 5.0, &next_state, &[]);
520
521        // Q-value should have been updated
522        let q_table = optimizer
523            .q_table
524            .read()
525            .expect("Failed to acquire Q-table read lock");
526        assert!(!q_table.is_empty());
527    }
528
529    #[test]
530    fn test_epsilon_decay() {
531        let mut optimizer = QLearningOptimizer::new(0.1, 0.95, 0.5);
532        let initial_epsilon = optimizer.epsilon;
533
534        let episode = OptimizationEpisode {
535            initial_depth: 10,
536            final_depth: 8,
537            initial_gate_count: 50,
538            final_gate_count: 45,
539            reward: 10.0,
540            steps_taken: 5,
541        };
542
543        optimizer.finish_episode(episode);
544
545        assert!(optimizer.epsilon < initial_epsilon);
546        assert!(optimizer.epsilon >= optimizer.min_epsilon);
547    }
548
549    #[test]
550    fn test_statistics() {
551        let mut optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
552
553        let episode1 = OptimizationEpisode {
554            initial_depth: 10,
555            final_depth: 8,
556            initial_gate_count: 50,
557            final_gate_count: 45,
558            reward: 10.0,
559            steps_taken: 5,
560        };
561
562        let episode2 = OptimizationEpisode {
563            initial_depth: 12,
564            final_depth: 9,
565            initial_gate_count: 60,
566            final_gate_count: 52,
567            reward: 15.0,
568            steps_taken: 7,
569        };
570
571        optimizer.finish_episode(episode1);
572        optimizer.finish_episode(episode2);
573
574        let stats = optimizer.get_statistics();
575        assert_eq!(stats.total_episodes, 2);
576        assert!(stats.average_depth_improvement > 0.0);
577        assert!(stats.average_gate_reduction > 0.0);
578    }
579}