1use crate::error::{QuantRS2Error, QuantRS2Result};
9use crate::gate::GateOp;
10use crate::qubit::QubitId;
11use scirs2_core::ndarray::{Array1, Array2};
12use scirs2_core::random::{thread_rng, Rng};
13use std::collections::HashMap;
14use std::sync::{Arc, RwLock};
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
18pub enum OptimizationAction {
19 MergeSingleQubitGates { gate_index: usize },
21 CancelInversePairs { gate_index: usize },
23 CommuteGates {
25 gate1_index: usize,
26 gate2_index: usize,
27 },
28 ReplaceSequence {
30 start_index: usize,
31 end_index: usize,
32 },
33 OptimizeTwoQubitGate { gate_index: usize },
35 NoOp,
37}
38
39#[derive(Debug, Clone)]
41pub struct CircuitState {
42 pub depth: usize,
44 pub gate_count: usize,
46 pub two_qubit_count: usize,
48 pub fidelity: f64,
50 pub qubit_count: usize,
52 pub connectivity_density: f64,
54 pub entanglement_measure: f64,
56}
57
58impl CircuitState {
59 pub fn to_features(&self) -> Vec<f64> {
61 vec![
62 self.depth as f64 / 100.0, self.gate_count as f64 / 1000.0,
64 self.two_qubit_count as f64 / 500.0,
65 self.fidelity,
66 self.qubit_count as f64 / 50.0,
67 self.connectivity_density,
68 self.entanglement_measure,
69 ]
70 }
71
72 pub fn from_circuit(gates: &[Box<dyn GateOp>], num_qubits: usize) -> Self {
74 let mut depth_map: HashMap<QubitId, usize> = HashMap::new();
75 let mut two_qubit_count = 0;
76 let mut connectivity_edges = 0;
77
78 for gate in gates {
79 let qubits = gate.qubits();
80
81 if qubits.len() == 2 {
82 two_qubit_count += 1;
83 connectivity_edges += 1;
84 }
85
86 let max_depth = qubits
88 .iter()
89 .map(|q| *depth_map.get(q).unwrap_or(&0))
90 .max()
91 .unwrap_or(0);
92
93 for qubit in qubits {
94 depth_map.insert(qubit, max_depth + 1);
95 }
96 }
97
98 let depth = depth_map.values().max().copied().unwrap_or(0);
99 let gate_count = gates.len();
100
101 let fidelity = 0.9999_f64.powi(gate_count as i32 - two_qubit_count as i32)
103 * 0.99_f64.powi(two_qubit_count as i32);
104
105 let max_edges = num_qubits * (num_qubits - 1) / 2;
107 let connectivity_density = if max_edges > 0 {
108 connectivity_edges as f64 / max_edges as f64
109 } else {
110 0.0
111 };
112
113 let entanglement_measure = (two_qubit_count as f64 / num_qubits as f64).min(1.0);
115
116 Self {
117 depth,
118 gate_count,
119 two_qubit_count,
120 fidelity,
121 qubit_count: num_qubits,
122 connectivity_density,
123 entanglement_measure,
124 }
125 }
126}
127
128pub struct QLearningOptimizer {
130 q_table: Arc<RwLock<HashMap<(Vec<u8>, OptimizationAction), f64>>>,
132 learning_rate: f64,
134 discount_factor: f64,
136 epsilon: f64,
138 epsilon_decay: f64,
140 min_epsilon: f64,
142 episodes: Arc<RwLock<usize>>,
144 performance_history: Arc<RwLock<Vec<OptimizationEpisode>>>,
146}
147
148#[derive(Debug, Clone)]
150pub struct OptimizationEpisode {
151 pub initial_depth: usize,
152 pub final_depth: usize,
153 pub initial_gate_count: usize,
154 pub final_gate_count: usize,
155 pub reward: f64,
156 pub steps_taken: usize,
157}
158
159impl QLearningOptimizer {
160 pub fn new(learning_rate: f64, discount_factor: f64, initial_epsilon: f64) -> Self {
167 Self {
168 q_table: Arc::new(RwLock::new(HashMap::new())),
169 learning_rate,
170 discount_factor,
171 epsilon: initial_epsilon,
172 epsilon_decay: 0.995,
173 min_epsilon: 0.01,
174 episodes: Arc::new(RwLock::new(0)),
175 performance_history: Arc::new(RwLock::new(Vec::new())),
176 }
177 }
178
179 pub fn choose_action(
185 &self,
186 state: &CircuitState,
187 available_actions: &[OptimizationAction],
188 ) -> OptimizationAction {
189 if available_actions.is_empty() {
190 return OptimizationAction::NoOp;
191 }
192
193 let mut rng = thread_rng();
194
195 if rng.gen::<f64>() < self.epsilon {
197 available_actions[rng.gen_range(0..available_actions.len())]
199 } else {
200 self.get_best_action(state, available_actions)
202 }
203 }
204
205 fn get_best_action(
207 &self,
208 state: &CircuitState,
209 available_actions: &[OptimizationAction],
210 ) -> OptimizationAction {
211 let state_key = self.state_to_key(state);
212 let q_table = self.q_table.read().unwrap_or_else(|e| e.into_inner());
213
214 let mut best_action = available_actions[0];
215 let mut best_q_value = f64::NEG_INFINITY;
216
217 for &action in available_actions {
218 let q_value = *q_table.get(&(state_key.clone(), action)).unwrap_or(&0.0);
219 if q_value > best_q_value {
220 best_q_value = q_value;
221 best_action = action;
222 }
223 }
224
225 best_action
226 }
227
228 pub fn update_q_value(
237 &mut self,
238 state: &CircuitState,
239 action: OptimizationAction,
240 reward: f64,
241 next_state: &CircuitState,
242 next_actions: &[OptimizationAction],
243 ) {
244 let state_key = self.state_to_key(state);
245 let next_state_key = self.state_to_key(next_state);
246
247 let q_table = self.q_table.read().unwrap_or_else(|e| e.into_inner());
249 let max_next_q = if next_actions.is_empty() {
250 0.0
251 } else {
252 next_actions
253 .iter()
254 .map(|&a| *q_table.get(&(next_state_key.clone(), a)).unwrap_or(&0.0))
255 .fold(f64::NEG_INFINITY, f64::max)
256 };
257 drop(q_table);
258
259 let mut q_table = self.q_table.write().unwrap_or_else(|e| e.into_inner());
262 let current_q = *q_table.get(&(state_key.clone(), action)).unwrap_or(&0.0);
263 let new_q = self.learning_rate.mul_add(
264 self.discount_factor.mul_add(max_next_q, reward) - current_q,
265 current_q,
266 );
267 q_table.insert((state_key, action), new_q);
268 }
269
270 pub fn calculate_reward(&self, old_state: &CircuitState, new_state: &CircuitState) -> f64 {
277 let mut reward = 0.0;
278
279 let depth_improvement = old_state.depth as f64 - new_state.depth as f64;
281 reward += depth_improvement * 2.0;
282
283 let gate_improvement = old_state.gate_count as f64 - new_state.gate_count as f64;
285 reward += gate_improvement * 1.0;
286
287 let two_qubit_improvement =
289 old_state.two_qubit_count as f64 - new_state.two_qubit_count as f64;
290 reward += two_qubit_improvement * 3.0;
291
292 let fidelity_change = new_state.fidelity - old_state.fidelity;
294 reward += fidelity_change * 100.0; if reward == 0.0 {
298 reward = -0.1;
299 }
300
301 reward
302 }
303
304 pub fn finish_episode(&mut self, episode: OptimizationEpisode) {
306 self.epsilon = (self.epsilon * self.epsilon_decay).max(self.min_epsilon);
308
309 {
311 let mut episodes = self.episodes.write().unwrap_or_else(|e| e.into_inner());
312 *episodes += 1;
313
314 let mut history = self
315 .performance_history
316 .write()
317 .unwrap_or_else(|e| e.into_inner());
318 history.push(episode);
319
320 if history.len() > 1000 {
322 let len = history.len();
323 history.drain(0..len - 1000);
324 }
325 }
326 }
327
328 pub fn get_statistics(&self) -> OptimizationStatistics {
330 let history = self
331 .performance_history
332 .read()
333 .unwrap_or_else(|e| e.into_inner());
334
335 if history.is_empty() {
336 return OptimizationStatistics {
337 total_episodes: 0,
338 average_depth_improvement: 0.0,
339 average_gate_reduction: 0.0,
340 average_reward: 0.0,
341 current_epsilon: self.epsilon,
342 q_table_size: self.q_table.read().unwrap_or_else(|e| e.into_inner()).len(),
343 };
344 }
345
346 let total_episodes = history.len();
347 let avg_depth_improvement: f64 = history
348 .iter()
349 .map(|e| (e.initial_depth - e.final_depth) as f64)
350 .sum::<f64>()
351 / total_episodes as f64;
352
353 let avg_gate_reduction: f64 = history
354 .iter()
355 .map(|e| (e.initial_gate_count - e.final_gate_count) as f64)
356 .sum::<f64>()
357 / total_episodes as f64;
358
359 let avg_reward: f64 = history.iter().map(|e| e.reward).sum::<f64>() / total_episodes as f64;
360
361 OptimizationStatistics {
362 total_episodes,
363 average_depth_improvement: avg_depth_improvement,
364 average_gate_reduction: avg_gate_reduction,
365 average_reward: avg_reward,
366 current_epsilon: self.epsilon,
367 q_table_size: self.q_table.read().unwrap_or_else(|e| e.into_inner()).len(),
368 }
369 }
370
371 fn state_to_key(&self, state: &CircuitState) -> Vec<u8> {
373 let features = state.to_features();
375 features
376 .iter()
377 .map(|&f| ((f * 10.0).round() as i32).clamp(0, 255) as u8)
378 .collect()
379 }
380
381 pub const fn save_q_table(&self, path: &str) -> QuantRS2Result<()> {
383 Ok(())
386 }
387
388 pub const fn load_q_table(&mut self, path: &str) -> QuantRS2Result<()> {
390 Ok(())
393 }
394}
395
396#[derive(Debug, Clone)]
398pub struct OptimizationStatistics {
399 pub total_episodes: usize,
400 pub average_depth_improvement: f64,
401 pub average_gate_reduction: f64,
402 pub average_reward: f64,
403 pub current_epsilon: f64,
404 pub q_table_size: usize,
405}
406
407impl Default for QLearningOptimizer {
408 fn default() -> Self {
409 Self::new(0.1, 0.95, 0.3)
410 }
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416
417 #[test]
418 fn test_circuit_state_creation() {
419 let state = CircuitState {
420 depth: 10,
421 gate_count: 50,
422 two_qubit_count: 15,
423 fidelity: 0.95,
424 qubit_count: 5,
425 connectivity_density: 0.6,
426 entanglement_measure: 0.8,
427 };
428
429 let features = state.to_features();
430 assert_eq!(features.len(), 7);
431 assert!(features.iter().all(|&f| f >= 0.0 && f <= 1.1)); }
433
434 #[test]
435 fn test_q_learning_optimizer_creation() {
436 let optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
437 assert_eq!(optimizer.learning_rate, 0.1);
438 assert_eq!(optimizer.discount_factor, 0.95);
439 assert_eq!(optimizer.epsilon, 0.3);
440 }
441
442 #[test]
443 fn test_action_selection() {
444 let optimizer = QLearningOptimizer::new(0.1, 0.95, 0.0); let state = CircuitState {
447 depth: 10,
448 gate_count: 50,
449 two_qubit_count: 15,
450 fidelity: 0.95,
451 qubit_count: 5,
452 connectivity_density: 0.6,
453 entanglement_measure: 0.8,
454 };
455
456 let actions = vec![
457 OptimizationAction::MergeSingleQubitGates { gate_index: 0 },
458 OptimizationAction::CancelInversePairs { gate_index: 1 },
459 ];
460
461 let action = optimizer.choose_action(&state, &actions);
462 assert!(actions.contains(&action));
463 }
464
465 #[test]
466 fn test_reward_calculation() {
467 let optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
468
469 let old_state = CircuitState {
470 depth: 10,
471 gate_count: 50,
472 two_qubit_count: 15,
473 fidelity: 0.95,
474 qubit_count: 5,
475 connectivity_density: 0.6,
476 entanglement_measure: 0.8,
477 };
478
479 let new_state = CircuitState {
480 depth: 8,
481 gate_count: 45,
482 two_qubit_count: 12,
483 fidelity: 0.96,
484 qubit_count: 5,
485 connectivity_density: 0.6,
486 entanglement_measure: 0.8,
487 };
488
489 let reward = optimizer.calculate_reward(&old_state, &new_state);
490 assert!(reward > 0.0); }
492
493 #[test]
494 fn test_q_value_update() {
495 let mut optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
496
497 let state = CircuitState {
498 depth: 10,
499 gate_count: 50,
500 two_qubit_count: 15,
501 fidelity: 0.95,
502 qubit_count: 5,
503 connectivity_density: 0.6,
504 entanglement_measure: 0.8,
505 };
506
507 let action = OptimizationAction::MergeSingleQubitGates { gate_index: 0 };
508
509 let next_state = CircuitState {
510 depth: 9,
511 gate_count: 48,
512 two_qubit_count: 15,
513 fidelity: 0.95,
514 qubit_count: 5,
515 connectivity_density: 0.6,
516 entanglement_measure: 0.8,
517 };
518
519 optimizer.update_q_value(&state, action, 5.0, &next_state, &[]);
520
521 let q_table = optimizer
523 .q_table
524 .read()
525 .expect("Failed to acquire Q-table read lock");
526 assert!(!q_table.is_empty());
527 }
528
529 #[test]
530 fn test_epsilon_decay() {
531 let mut optimizer = QLearningOptimizer::new(0.1, 0.95, 0.5);
532 let initial_epsilon = optimizer.epsilon;
533
534 let episode = OptimizationEpisode {
535 initial_depth: 10,
536 final_depth: 8,
537 initial_gate_count: 50,
538 final_gate_count: 45,
539 reward: 10.0,
540 steps_taken: 5,
541 };
542
543 optimizer.finish_episode(episode);
544
545 assert!(optimizer.epsilon < initial_epsilon);
546 assert!(optimizer.epsilon >= optimizer.min_epsilon);
547 }
548
549 #[test]
550 fn test_statistics() {
551 let mut optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
552
553 let episode1 = OptimizationEpisode {
554 initial_depth: 10,
555 final_depth: 8,
556 initial_gate_count: 50,
557 final_gate_count: 45,
558 reward: 10.0,
559 steps_taken: 5,
560 };
561
562 let episode2 = OptimizationEpisode {
563 initial_depth: 12,
564 final_depth: 9,
565 initial_gate_count: 60,
566 final_gate_count: 52,
567 reward: 15.0,
568 steps_taken: 7,
569 };
570
571 optimizer.finish_episode(episode1);
572 optimizer.finish_episode(episode2);
573
574 let stats = optimizer.get_statistics();
575 assert_eq!(stats.total_episodes, 2);
576 assert!(stats.average_depth_improvement > 0.0);
577 assert!(stats.average_gate_reduction > 0.0);
578 }
579}