use crate::error::{QuantRS2Error, QuantRS2Result};
use crate::gate::GateOp;
use crate::qubit::QubitId;
use scirs2_core::ndarray::{Array1, Array2};
use scirs2_core::random::{thread_rng, Rng};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum OptimizationAction {
MergeSingleQubitGates { gate_index: usize },
CancelInversePairs { gate_index: usize },
CommuteGates {
gate1_index: usize,
gate2_index: usize,
},
ReplaceSequence {
start_index: usize,
end_index: usize,
},
OptimizeTwoQubitGate { gate_index: usize },
NoOp,
}
#[derive(Debug, Clone)]
pub struct CircuitState {
pub depth: usize,
pub gate_count: usize,
pub two_qubit_count: usize,
pub fidelity: f64,
pub qubit_count: usize,
pub connectivity_density: f64,
pub entanglement_measure: f64,
}
impl CircuitState {
pub fn to_features(&self) -> Vec<f64> {
vec![
self.depth as f64 / 100.0, self.gate_count as f64 / 1000.0,
self.two_qubit_count as f64 / 500.0,
self.fidelity,
self.qubit_count as f64 / 50.0,
self.connectivity_density,
self.entanglement_measure,
]
}
pub fn from_circuit(gates: &[Box<dyn GateOp>], num_qubits: usize) -> Self {
let mut depth_map: HashMap<QubitId, usize> = HashMap::new();
let mut two_qubit_count = 0;
let mut connectivity_edges = 0;
for gate in gates {
let qubits = gate.qubits();
if qubits.len() == 2 {
two_qubit_count += 1;
connectivity_edges += 1;
}
let max_depth = qubits
.iter()
.map(|q| *depth_map.get(q).unwrap_or(&0))
.max()
.unwrap_or(0);
for qubit in qubits {
depth_map.insert(qubit, max_depth + 1);
}
}
let depth = depth_map.values().max().copied().unwrap_or(0);
let gate_count = gates.len();
let fidelity = 0.9999_f64.powi(gate_count as i32 - two_qubit_count as i32)
* 0.99_f64.powi(two_qubit_count as i32);
let max_edges = num_qubits * (num_qubits - 1) / 2;
let connectivity_density = if max_edges > 0 {
connectivity_edges as f64 / max_edges as f64
} else {
0.0
};
let entanglement_measure = (two_qubit_count as f64 / num_qubits as f64).min(1.0);
Self {
depth,
gate_count,
two_qubit_count,
fidelity,
qubit_count: num_qubits,
connectivity_density,
entanglement_measure,
}
}
}
pub struct QLearningOptimizer {
q_table: Arc<RwLock<HashMap<(Vec<u8>, OptimizationAction), f64>>>,
learning_rate: f64,
discount_factor: f64,
epsilon: f64,
epsilon_decay: f64,
min_epsilon: f64,
episodes: Arc<RwLock<usize>>,
performance_history: Arc<RwLock<Vec<OptimizationEpisode>>>,
}
#[derive(Debug, Clone)]
pub struct OptimizationEpisode {
pub initial_depth: usize,
pub final_depth: usize,
pub initial_gate_count: usize,
pub final_gate_count: usize,
pub reward: f64,
pub steps_taken: usize,
}
impl QLearningOptimizer {
pub fn new(learning_rate: f64, discount_factor: f64, initial_epsilon: f64) -> Self {
Self {
q_table: Arc::new(RwLock::new(HashMap::new())),
learning_rate,
discount_factor,
epsilon: initial_epsilon,
epsilon_decay: 0.995,
min_epsilon: 0.01,
episodes: Arc::new(RwLock::new(0)),
performance_history: Arc::new(RwLock::new(Vec::new())),
}
}
pub fn choose_action(
&self,
state: &CircuitState,
available_actions: &[OptimizationAction],
) -> OptimizationAction {
if available_actions.is_empty() {
return OptimizationAction::NoOp;
}
let mut rng = thread_rng();
if rng.random::<f64>() < self.epsilon {
available_actions[rng.random_range(0..available_actions.len())]
} else {
self.get_best_action(state, available_actions)
}
}
fn get_best_action(
&self,
state: &CircuitState,
available_actions: &[OptimizationAction],
) -> OptimizationAction {
let state_key = self.state_to_key(state);
let q_table = self.q_table.read().unwrap_or_else(|e| e.into_inner());
let mut best_action = available_actions[0];
let mut best_q_value = f64::NEG_INFINITY;
for &action in available_actions {
let q_value = *q_table.get(&(state_key.clone(), action)).unwrap_or(&0.0);
if q_value > best_q_value {
best_q_value = q_value;
best_action = action;
}
}
best_action
}
pub fn update_q_value(
&mut self,
state: &CircuitState,
action: OptimizationAction,
reward: f64,
next_state: &CircuitState,
next_actions: &[OptimizationAction],
) {
let state_key = self.state_to_key(state);
let next_state_key = self.state_to_key(next_state);
let q_table = self.q_table.read().unwrap_or_else(|e| e.into_inner());
let max_next_q = if next_actions.is_empty() {
0.0
} else {
next_actions
.iter()
.map(|&a| *q_table.get(&(next_state_key.clone(), a)).unwrap_or(&0.0))
.fold(f64::NEG_INFINITY, f64::max)
};
drop(q_table);
let mut q_table = self.q_table.write().unwrap_or_else(|e| e.into_inner());
let current_q = *q_table.get(&(state_key.clone(), action)).unwrap_or(&0.0);
let new_q = self.learning_rate.mul_add(
self.discount_factor.mul_add(max_next_q, reward) - current_q,
current_q,
);
q_table.insert((state_key, action), new_q);
}
pub fn calculate_reward(&self, old_state: &CircuitState, new_state: &CircuitState) -> f64 {
let mut reward = 0.0;
let depth_improvement = old_state.depth as f64 - new_state.depth as f64;
reward += depth_improvement * 2.0;
let gate_improvement = old_state.gate_count as f64 - new_state.gate_count as f64;
reward += gate_improvement * 1.0;
let two_qubit_improvement =
old_state.two_qubit_count as f64 - new_state.two_qubit_count as f64;
reward += two_qubit_improvement * 3.0;
let fidelity_change = new_state.fidelity - old_state.fidelity;
reward += fidelity_change * 100.0;
if reward == 0.0 {
reward = -0.1;
}
reward
}
pub fn finish_episode(&mut self, episode: OptimizationEpisode) {
self.epsilon = (self.epsilon * self.epsilon_decay).max(self.min_epsilon);
{
let mut episodes = self.episodes.write().unwrap_or_else(|e| e.into_inner());
*episodes += 1;
let mut history = self
.performance_history
.write()
.unwrap_or_else(|e| e.into_inner());
history.push(episode);
if history.len() > 1000 {
let len = history.len();
history.drain(0..len - 1000);
}
}
}
pub fn get_statistics(&self) -> OptimizationStatistics {
let history = self
.performance_history
.read()
.unwrap_or_else(|e| e.into_inner());
if history.is_empty() {
return OptimizationStatistics {
total_episodes: 0,
average_depth_improvement: 0.0,
average_gate_reduction: 0.0,
average_reward: 0.0,
current_epsilon: self.epsilon,
q_table_size: self.q_table.read().unwrap_or_else(|e| e.into_inner()).len(),
};
}
let total_episodes = history.len();
let avg_depth_improvement: f64 = history
.iter()
.map(|e| (e.initial_depth - e.final_depth) as f64)
.sum::<f64>()
/ total_episodes as f64;
let avg_gate_reduction: f64 = history
.iter()
.map(|e| (e.initial_gate_count - e.final_gate_count) as f64)
.sum::<f64>()
/ total_episodes as f64;
let avg_reward: f64 = history.iter().map(|e| e.reward).sum::<f64>() / total_episodes as f64;
OptimizationStatistics {
total_episodes,
average_depth_improvement: avg_depth_improvement,
average_gate_reduction: avg_gate_reduction,
average_reward: avg_reward,
current_epsilon: self.epsilon,
q_table_size: self.q_table.read().unwrap_or_else(|e| e.into_inner()).len(),
}
}
fn state_to_key(&self, state: &CircuitState) -> Vec<u8> {
let features = state.to_features();
features
.iter()
.map(|&f| ((f * 10.0).round() as i32).clamp(0, 255) as u8)
.collect()
}
pub const fn save_q_table(&self, path: &str) -> QuantRS2Result<()> {
Ok(())
}
pub const fn load_q_table(&mut self, path: &str) -> QuantRS2Result<()> {
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct OptimizationStatistics {
pub total_episodes: usize,
pub average_depth_improvement: f64,
pub average_gate_reduction: f64,
pub average_reward: f64,
pub current_epsilon: f64,
pub q_table_size: usize,
}
impl Default for QLearningOptimizer {
fn default() -> Self {
Self::new(0.1, 0.95, 0.3)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_circuit_state_creation() {
let state = CircuitState {
depth: 10,
gate_count: 50,
two_qubit_count: 15,
fidelity: 0.95,
qubit_count: 5,
connectivity_density: 0.6,
entanglement_measure: 0.8,
};
let features = state.to_features();
assert_eq!(features.len(), 7);
assert!(features.iter().all(|&f| f >= 0.0 && f <= 1.1)); }
#[test]
fn test_q_learning_optimizer_creation() {
let optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
assert_eq!(optimizer.learning_rate, 0.1);
assert_eq!(optimizer.discount_factor, 0.95);
assert_eq!(optimizer.epsilon, 0.3);
}
#[test]
fn test_action_selection() {
let optimizer = QLearningOptimizer::new(0.1, 0.95, 0.0);
let state = CircuitState {
depth: 10,
gate_count: 50,
two_qubit_count: 15,
fidelity: 0.95,
qubit_count: 5,
connectivity_density: 0.6,
entanglement_measure: 0.8,
};
let actions = vec![
OptimizationAction::MergeSingleQubitGates { gate_index: 0 },
OptimizationAction::CancelInversePairs { gate_index: 1 },
];
let action = optimizer.choose_action(&state, &actions);
assert!(actions.contains(&action));
}
#[test]
fn test_reward_calculation() {
let optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
let old_state = CircuitState {
depth: 10,
gate_count: 50,
two_qubit_count: 15,
fidelity: 0.95,
qubit_count: 5,
connectivity_density: 0.6,
entanglement_measure: 0.8,
};
let new_state = CircuitState {
depth: 8,
gate_count: 45,
two_qubit_count: 12,
fidelity: 0.96,
qubit_count: 5,
connectivity_density: 0.6,
entanglement_measure: 0.8,
};
let reward = optimizer.calculate_reward(&old_state, &new_state);
assert!(reward > 0.0); }
#[test]
fn test_q_value_update() {
let mut optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
let state = CircuitState {
depth: 10,
gate_count: 50,
two_qubit_count: 15,
fidelity: 0.95,
qubit_count: 5,
connectivity_density: 0.6,
entanglement_measure: 0.8,
};
let action = OptimizationAction::MergeSingleQubitGates { gate_index: 0 };
let next_state = CircuitState {
depth: 9,
gate_count: 48,
two_qubit_count: 15,
fidelity: 0.95,
qubit_count: 5,
connectivity_density: 0.6,
entanglement_measure: 0.8,
};
optimizer.update_q_value(&state, action, 5.0, &next_state, &[]);
let q_table = optimizer
.q_table
.read()
.expect("Failed to acquire Q-table read lock");
assert!(!q_table.is_empty());
}
#[test]
fn test_epsilon_decay() {
let mut optimizer = QLearningOptimizer::new(0.1, 0.95, 0.5);
let initial_epsilon = optimizer.epsilon;
let episode = OptimizationEpisode {
initial_depth: 10,
final_depth: 8,
initial_gate_count: 50,
final_gate_count: 45,
reward: 10.0,
steps_taken: 5,
};
optimizer.finish_episode(episode);
assert!(optimizer.epsilon < initial_epsilon);
assert!(optimizer.epsilon >= optimizer.min_epsilon);
}
#[test]
fn test_statistics() {
let mut optimizer = QLearningOptimizer::new(0.1, 0.95, 0.3);
let episode1 = OptimizationEpisode {
initial_depth: 10,
final_depth: 8,
initial_gate_count: 50,
final_gate_count: 45,
reward: 10.0,
steps_taken: 5,
};
let episode2 = OptimizationEpisode {
initial_depth: 12,
final_depth: 9,
initial_gate_count: 60,
final_gate_count: 52,
reward: 15.0,
steps_taken: 7,
};
optimizer.finish_episode(episode1);
optimizer.finish_episode(episode2);
let stats = optimizer.get_statistics();
assert_eq!(stats.total_episodes, 2);
assert!(stats.average_depth_improvement > 0.0);
assert!(stats.average_gate_reduction > 0.0);
}
}