quant_iron/components/
operator.rs

1use crate::compiler::compilable::Compilable;
2#[cfg(feature = "gpu")]
3use crate::components::gpu_context::GpuKernelArgs;
4#[cfg(feature = "gpu")]
5use crate::components::gpu_context::{GPU_CONTEXT, KernelType};
6use crate::{components::state::State, errors::Error};
7use dyn_clone::DynClone;
8use num_complex::Complex;
9#[cfg(feature = "gpu")]
10use ocl::prm::Float2;
11use rayon::prelude::*;
12#[cfg(feature = "gpu")]
13use std::f64::consts::PI;
14use std::{collections::HashSet, fmt::Debug};
15
16/// Threshold for using parallel CPU implementation
17const PARALLEL_THRESHOLD_NUM_QUBITS: usize = 10;
18
19/// Threshold for using OpenCL (GPU acceleration)
20const OPENCL_THRESHOLD_NUM_QUBITS: usize = 15;
21
22#[cfg(feature = "gpu")]
23fn execute_on_gpu(
24    state: &State,
25    target_qubit: usize,
26    control_qubits: &[usize],
27    kernel_type: KernelType,
28    global_work_size: usize,
29    kernel_args: GpuKernelArgs,
30) -> Result<Vec<Complex<f64>>, Error> {
31    let mut context_guard = GPU_CONTEXT.lock().map_err(|_| Error::GpuContextLockError)?;
32    let context = match *context_guard {
33        Ok(ref mut ctx) => ctx,
34        Err(ref e) => return Err(e.clone()), // Propagate initialisation error
35    };
36
37    let num_qubits = state.num_qubits();
38    let num_state_elements = state.state_vector.len();
39
40    // Ensure buffers are ready and get mutable references
41    let state_buffer_cloned = context.ensure_state_buffer(num_state_elements)?.clone();
42
43    let control_qubits_i32: Vec<i32> = control_qubits.iter().map(|&q| q as i32).collect();
44    let control_buffer_len = control_qubits_i32.len();
45    let control_buffer_cloned = context.ensure_control_buffer(control_buffer_len)?.clone();
46
47    let state_vector_f32: Vec<Float2> = state
48        .state_vector
49        .iter()
50        .map(|c| Float2::new(c.re as f32, c.im as f32))
51        .collect();
52
53    // Copy data to GPU buffers
54    state_buffer_cloned
55        .write(&state_vector_f32)
56        .enq()
57        .map_err(|e| Error::OpenCLError(format!("Failed to write to state buffer: {}", e)))?;
58
59    if !control_qubits_i32.is_empty() {
60        control_buffer_cloned
61            .write(&control_qubits_i32)
62            .enq()
63            .map_err(|e| Error::OpenCLError(format!("Failed to write to control buffer: {}", e)))?;
64    } else {
65        // Write dummy data if no control qubits
66        let dummy_control_data = vec![0; 1]; // Dummy data for control buffer
67        control_buffer_cloned
68            .write(&dummy_control_data)
69            .enq()
70            .map_err(|e| {
71                Error::OpenCLError(format!("Failed to write to dummy control buffer: {}", e))
72            })?;
73    }
74
75    let mut kernel_builder = context.pro_que.kernel_builder(kernel_type.name());
76    kernel_builder
77        .global_work_size(global_work_size)
78        .arg(&state_buffer_cloned) // Pass by reference
79        .arg(num_qubits as i32)
80        .arg(target_qubit as i32)
81        .arg(control_buffer_cloned)
82        .arg(control_qubits_i32.len() as i32);
83
84    match kernel_args {
85        GpuKernelArgs::None => {
86            // No additional arguments needed for Hadamard, PauliX, PauliY, PauliZ
87        }
88        GpuKernelArgs::SOrSdag { sign } => {
89            kernel_builder.arg(sign);
90        }
91        GpuKernelArgs::PhaseShift {
92            cos_angle,
93            sin_angle,
94        } => {
95            kernel_builder.arg(cos_angle).arg(sin_angle);
96        }
97        GpuKernelArgs::SwapTarget { q1 } => {
98            kernel_builder.arg(q1);
99        }
100        GpuKernelArgs::RotationGate {
101            cos_half_angle,
102            sin_half_angle,
103        } => {
104            kernel_builder.arg(cos_half_angle).arg(sin_half_angle);
105        }
106        GpuKernelArgs::Matchgate {
107            q1,
108            cos_theta_half,
109            sin_theta_half,
110            exp_i_phi1,
111            exp_i_phi2,
112        } => {
113            kernel_builder
114                .arg(q1)
115                .arg(cos_theta_half)
116                .arg(sin_theta_half)
117                .arg(exp_i_phi1)
118                .arg(exp_i_phi2);
119        }
120    }
121
122    let kernel = kernel_builder.build().map_err(|e| {
123        Error::OpenCLError(format!(
124            "Failed to build kernel '{}': {}",
125            kernel_type.name(),
126            e
127        ))
128    })?;
129
130    unsafe {
131        kernel
132            .enq()
133            .map_err(|e| Error::OpenCLError(format!("Failed to enqueue kernel: {}", e)))?;
134    }
135
136    let mut state_vector_ocl_result = vec![Float2::new(0.0, 0.0); num_state_elements];
137    // Read data back from state_buffer
138    state_buffer_cloned
139        .read(&mut state_vector_ocl_result)
140        .enq()
141        .map_err(|e| Error::OpenCLError(format!("Failed to read state buffer: {}", e)))?;
142
143    Ok(state_vector_ocl_result
144        .iter()
145        .map(|f2| Complex::new(f2[0] as f64, f2[1] as f64))
146        .collect())
147}
148
149/// A trait defining the interface for all operators.
150pub trait Operator: Send + Sync + Debug + DynClone {
151    /// Applies the operator to the given state's target qubits, using the control qubits if required.
152    ///
153    /// # Arguments:
154    ///
155    /// * `state` - The state to apply the operator to.
156    ///
157    /// * `target_qubits` - The target qubits to apply the operator to. If no target qubits are specified, the operator will be applied to all qubits in the state.
158    ///
159    /// * `control_qubits` - The control qubits to apply the operator to.
160    ///
161    /// # Returns:
162    ///
163    /// * The new state after applying the operator.
164    fn apply(
165        &self,
166        state: &State,
167        target_qubits: &[usize],
168        control_qubits: &[usize],
169    ) -> Result<State, Error>;
170
171    /// Returns the number of qubits that the operator acts on.
172    ///
173    /// # Returns:
174    ///
175    /// * The number of qubits that the operator acts on.
176    fn base_qubits(&self) -> usize;
177
178    /// Optionally returns an intermediate representation of the operator for compilation to OpenQASM.
179    ///
180    /// If you are not planning to compile the operator to an IR, you can ignore this method.
181    /// If you want to compile the operator to QASM, you should implement this method.
182    ///
183    /// # Returns:
184    ///  * An optional vector of `InstructionIR` representing the operator in an intermediate representation.
185    fn to_compilable(&self) -> Option<&dyn Compilable> {
186        // Default implementation returns None, indicating no compilable representation
187        None
188    }
189}
190
191dyn_clone::clone_trait_object!(Operator);
192
193/// Helper function to check if all control qubits are in the |1> state for a given basis state index.
194fn check_controls(index: usize, control_qubits: &[usize]) -> bool {
195    control_qubits
196        .iter()
197        .all(|&qubit| (index >> qubit) & 1 == 1)
198}
199
200/// Helper function to validate target and control qubits
201///
202/// # Arguments:
203///
204/// * `state` - The quantum state that contains information about the number of qubits.
205/// * `target_qubits` - The target qubits to validate.
206/// * `control_qubits` - The control qubits to validate.
207/// * `expected_targets` - The expected number of target qubits.
208///
209/// # Returns:
210///
211/// * `Ok(())` if all validations pass, ie. the target qubits are valid indices, the control qubits are valid indices, and there are no overlaps between control and target qubits.
212/// * `Err(Error)` if any validation fails.
213fn validate_qubits(
214    state: &State,
215    target_qubits: &[usize],
216    control_qubits: &[usize],
217    expected_targets: usize,
218) -> Result<(), Error> {
219    // Check if we have the expected number of target qubits
220    if target_qubits.len() != expected_targets {
221        return Err(Error::InvalidNumberOfQubits(target_qubits.len()));
222    }
223
224    let num_qubits = state.num_qubits();
225
226    // Check if all target qubits are valid indices
227    for &target_qubit in target_qubits {
228        if target_qubit >= num_qubits {
229            return Err(Error::InvalidQubitIndex(target_qubit, num_qubits));
230        }
231    }
232
233    // Check if all control qubits are valid indices and don't overlap with target qubits
234    for &control_qubit in control_qubits {
235        if control_qubit >= num_qubits {
236            return Err(Error::InvalidQubitIndex(control_qubit, num_qubits));
237        }
238
239        for &target_qubit in target_qubits {
240            if control_qubit == target_qubit {
241                return Err(Error::OverlappingControlAndTargetQubits(
242                    control_qubit,
243                    target_qubit,
244                ));
245            }
246        }
247    }
248
249    // Special check for multiple target qubits to ensure no duplicates
250    if expected_targets > 1 {
251        if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
252            // Use HashSet for efficient duplicate detection in larger systems
253            let mut seen_targets: HashSet<usize> = HashSet::with_capacity(target_qubits.len());
254            for &target_qubit in target_qubits {
255                if !seen_targets.insert(target_qubit) {
256                    return Err(Error::InvalidQubitIndex(target_qubit, num_qubits));
257                }
258            }
259        } else {
260            // Use nested loops for smaller systems
261            for i in 0..target_qubits.len() {
262                for j in i + 1..target_qubits.len() {
263                    if target_qubits[i] == target_qubits[j] {
264                        return Err(Error::InvalidQubitIndex(target_qubits[i], num_qubits));
265                    }
266                }
267            }
268        }
269    }
270
271    Ok(())
272}
273
274/// Defines a Hadamard operator.
275///
276/// A single-qubit operator that transforms the state of a qubit into a superposition of its basis states.
277#[derive(Debug, Clone, Copy)]
278pub struct Hadamard;
279
280impl Operator for Hadamard {
281    /// Applies the Hadamard operator to the given state's target qubit.
282    ///
283    /// # Arguments:
284    ///
285    /// * `state` - The state to apply the operator to.
286    ///
287    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
288    ///
289    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
290    ///
291    /// # Returns:
292    ///
293    /// * The new state after applying the Hadamard operator.
294    ///
295    /// # Errors:
296    ///
297    /// * `Error::InvalidNumberOfQubits` - If the number of target qubits is not 1.
298    ///
299    /// * `Error::InvalidQubitIndex` - If the target qubit or control qubit index is invalid for the number of qubits in the state.
300    ///
301    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
302    fn apply(
303        &self,
304        state: &State,
305        target_qubits: &[usize],
306        control_qubits: &[usize],
307    ) -> Result<State, Error> {
308        // Validation
309        validate_qubits(state, target_qubits, control_qubits, 1)?;
310
311        let target_qubit: usize = target_qubits[0];
312        let num_qubits: usize = state.num_qubits();
313
314        // Apply potentially controlled Hadamard operator
315        let sqrt_2_inv: f64 = 1.0 / (2.0f64).sqrt();
316        let dim: usize = 1 << num_qubits;
317        #[allow(unused_assignments)]
318        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
319        let gpu_enabled: bool = cfg!(feature = "gpu");
320
321        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
322            #[cfg(feature = "gpu")]
323            {
324                let global_work_size = if num_qubits > 0 {
325                    1 << (num_qubits - 1)
326                } else {
327                    1
328                };
329                new_state_vec = execute_on_gpu(
330                    state,
331                    target_qubit,
332                    control_qubits,
333                    KernelType::Hadamard,
334                    global_work_size,
335                    GpuKernelArgs::None,
336                )?;
337            }
338        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
339            // Rayon CPU Parallel implementation
340            new_state_vec = state.state_vector.clone(); // Initialise for CPU path
341            if control_qubits.is_empty() {
342                // Parallel uncontrolled Hadamard
343                let updates: Vec<(usize, Complex<f64>)> = (0..(1 << (num_qubits - 1)))
344                    .into_par_iter()
345                    .flat_map(|k| {
346                        let i0 = (k >> target_qubit << (target_qubit + 1))
347                            | (k & ((1 << target_qubit) - 1));
348                        let i1 = i0 | (1 << target_qubit);
349                        let amp0 = state.state_vector[i0];
350                        let amp1 = state.state_vector[i1];
351                        vec![
352                            (i0, sqrt_2_inv * (amp0 + amp1)),
353                            (i1, sqrt_2_inv * (amp0 - amp1)),
354                        ]
355                    })
356                    .collect();
357                for (idx, val) in updates {
358                    new_state_vec[idx] = val;
359                }
360            } else {
361                // Rayon CPU Parallel controlled Hadamard
362                let updates: Vec<(usize, Complex<f64>)> = (0..dim)
363                    .into_par_iter()
364                    .filter_map(|i| {
365                        if (i >> target_qubit) & 1 == 0 {
366                            // Process pairs (i, j) where i has 0 at target_qubit
367                            let j = i | (1 << target_qubit); // j has 1 at target_qubit
368                            if check_controls(i, control_qubits) {
369                                // Check controls based on i
370                                let amp_i = state.state_vector[i];
371                                let amp_j = state.state_vector[j];
372                                Some(vec![
373                                    (i, sqrt_2_inv * (amp_i + amp_j)),
374                                    (j, sqrt_2_inv * (amp_i - amp_j)),
375                                ])
376                            } else {
377                                None // Controls not met for this pair
378                            }
379                        } else {
380                            None // Already processed as part of a pair starting with 0 at target_qubit
381                        }
382                    })
383                    .flatten()
384                    .collect();
385                for (idx, val) in updates {
386                    new_state_vec[idx] = val;
387                }
388            }
389        } else {
390            // Sequential CPU implementation
391            new_state_vec = state.state_vector.clone(); // initialise for CPU path
392            if control_qubits.is_empty() {
393                // Sequential uncontrolled Hadamard
394                for k in 0..(1 << (num_qubits - 1)) {
395                    let i0 =
396                        (k >> target_qubit << (target_qubit + 1)) | (k & ((1 << target_qubit) - 1));
397                    let i1 = i0 | (1 << target_qubit);
398                    let amp0 = state.state_vector[i0];
399                    let amp1 = state.state_vector[i1];
400                    new_state_vec[i0] = sqrt_2_inv * (amp0 + amp1);
401                    new_state_vec[i1] = sqrt_2_inv * (amp0 - amp1);
402                }
403            } else {
404                // Sequential controlled Hadamard
405                for i in 0..dim {
406                    if (i >> target_qubit) & 1 == 0 {
407                        let j = i | (1 << target_qubit);
408                        if check_controls(i, control_qubits) {
409                            let amp_i = state.state_vector[i];
410                            let amp_j = state.state_vector[j];
411                            new_state_vec[i] = sqrt_2_inv * (amp_i + amp_j);
412                            new_state_vec[j] = sqrt_2_inv * (amp_i - amp_j);
413                        }
414                    }
415                }
416            }
417        }
418
419        Ok(State {
420            state_vector: new_state_vec,
421            num_qubits,
422        })
423    }
424
425    fn base_qubits(&self) -> usize {
426        1 // Hadamard acts on 1 qubit only
427    }
428
429    fn to_compilable(&self) -> Option<&dyn Compilable> {
430        Some(self)
431    }
432}
433
434/// Defines the Pauli operators: X, Y, Z.
435#[derive(Debug, Clone, Copy, PartialEq)]
436pub enum Pauli {
437    /// Pauli-X operator (NOT gate)
438    X,
439    /// Pauli-Y operator
440    Y,
441    /// Pauli-Z operator
442    Z,
443}
444
445impl Operator for Pauli {
446    /// Applies the Pauli operator to the given state's target qubit.
447    ///
448    /// # Arguments:
449    ///
450    /// * `state` - The state to apply the operator to.
451    ///
452    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
453    ///
454    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
455    ///
456    /// # Returns:
457    ///
458    /// * The new state after applying the Pauli operator.
459    ///
460    /// # Errors:
461    ///
462    /// * `Error::InvalidNumberOfQubits` - If the number of target qubits is not 1.
463    ///
464    /// * `Error::InvalidQubitIndex` - If the target qubit index is invalid for the number of qubits in the state.
465    ///
466    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
467    fn apply(
468        &self,
469        state: &State,
470        target_qubits: &[usize],
471        control_qubits: &[usize],
472    ) -> Result<State, Error> {
473        // Validation
474        validate_qubits(state, target_qubits, control_qubits, 1)?;
475
476        let target_qubit: usize = target_qubits[0];
477        let num_qubits: usize = state.num_qubits();
478
479        // Apply potentially controlled Pauli operator
480        let dim: usize = 1 << num_qubits;
481        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
482        let i_complex: Complex<f64> = Complex::new(0.0, 1.0);
483        let gpu_enabled: bool = cfg!(feature = "gpu");
484
485        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
486            #[cfg(feature = "gpu")]
487            {
488                let kernel_type = match self {
489                    Pauli::X => KernelType::PauliX,
490                    Pauli::Y => KernelType::PauliY,
491                    Pauli::Z => KernelType::PauliZ,
492                };
493                let global_work_size = if num_qubits == 0 {
494                    1
495                } else {
496                    match self {
497                        Pauli::Z => 1 << num_qubits, // N work items for Pauli Z
498                        _ => 1 << (num_qubits - 1),  // N/2 work items for Pauli X, Y
499                    }
500                };
501                new_state_vec = execute_on_gpu(
502                    state,
503                    target_qubit,
504                    control_qubits,
505                    kernel_type,
506                    global_work_size,
507                    GpuKernelArgs::None,
508                )?;
509            }
510        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
511            // Parallel implementation
512            match self {
513                Pauli::X => {
514                    let updates: Vec<(usize, Complex<f64>)> = (0..dim)
515                        .into_par_iter()
516                        .filter_map(|i| {
517                            if check_controls(i, control_qubits) && ((i >> target_qubit) & 1 == 0) {
518                                let j = i | (1 << target_qubit);
519                                let amp_i = state.state_vector[i];
520                                let amp_j = state.state_vector[j];
521                                Some(vec![(i, amp_j), (j, amp_i)])
522                            } else {
523                                None
524                            }
525                        })
526                        .flatten()
527                        .collect();
528                    for (idx, val) in updates {
529                        new_state_vec[idx] = val;
530                    }
531                }
532                Pauli::Y => {
533                    let updates: Vec<(usize, Complex<f64>)> = (0..dim)
534                        .into_par_iter()
535                        .filter_map(|i| {
536                            if check_controls(i, control_qubits) && ((i >> target_qubit) & 1 == 0) {
537                                let j = i | (1 << target_qubit);
538                                let amp_i = state.state_vector[i];
539                                let amp_j = state.state_vector[j];
540                                Some(vec![(i, -i_complex * amp_j), (j, i_complex * amp_i)])
541                            } else {
542                                None
543                            }
544                        })
545                        .flatten()
546                        .collect();
547                    for (idx, val) in updates {
548                        new_state_vec[idx] = val;
549                    }
550                }
551                Pauli::Z => {
552                    new_state_vec
553                        .par_iter_mut()
554                        .enumerate()
555                        .for_each(|(i, current_amp_ref)| {
556                            if check_controls(i, control_qubits) && ((i >> target_qubit) & 1 == 1) {
557                                *current_amp_ref = -state.state_vector[i];
558                            }
559                        });
560                }
561            }
562        } else {
563            // Sequential implementation
564            for i in 0..dim {
565                if check_controls(i, control_qubits) {
566                    match self {
567                        Pauli::X => {
568                            if (i >> target_qubit) & 1 == 0 {
569                                let j = i | (1 << target_qubit);
570                                let amp_i = state.state_vector[i];
571                                let amp_j = state.state_vector[j];
572                                new_state_vec[i] = amp_j;
573                                new_state_vec[j] = amp_i;
574                            }
575                        }
576                        Pauli::Y => {
577                            if (i >> target_qubit) & 1 == 0 {
578                                let j = i | (1 << target_qubit);
579                                let amp_i = state.state_vector[i];
580                                let amp_j = state.state_vector[j];
581                                new_state_vec[i] = -i_complex * amp_j;
582                                new_state_vec[j] = i_complex * amp_i;
583                            }
584                        }
585                        Pauli::Z => {
586                            if (i >> target_qubit) & 1 == 1 {
587                                new_state_vec[i] = -state.state_vector[i];
588                            }
589                        }
590                    }
591                }
592            }
593        }
594
595        Ok(State {
596            state_vector: new_state_vec,
597            num_qubits: state.num_qubits(),
598        })
599    }
600
601    fn base_qubits(&self) -> usize {
602        1 // Pauli operators act on 1 qubit only
603    }
604
605    fn to_compilable(&self) -> Option<&dyn Compilable> {
606        Some(self) // Manual implementation for enum
607    }
608}
609
610impl std::fmt::Display for Pauli {
611    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
612        match self {
613            Pauli::X => write!(f, "X"),
614            Pauli::Y => write!(f, "Y"),
615            Pauli::Z => write!(f, "Z"),
616        }
617    }
618}
619
620/// Defines a CNOT operator.
621///
622/// A two-qubit operator that flips the target qubit if the control qubit is in the |1> state.
623#[derive(Debug, Clone, Copy)]
624pub struct CNOT;
625
626impl Operator for CNOT {
627    /// Applies the CNOT operator to the given state's target qubit, using the control qubit.
628    ///
629    /// # Arguments:
630    ///
631    /// * `state` - The state to apply the operator to.
632    ///
633    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
634    ///
635    /// * `control_qubits` - The control qubits for the operator. This should be a single qubit.
636    ///
637    /// # Returns:
638    ///
639    /// * The new state after applying the CNOT operator.
640    ///
641    /// # Errors:
642    ///
643    /// * `Error::InvalidNumberOfQubits` - If the target or control qubits is not 1.
644    ///
645    /// * `Error::InvalidQubitIndex` - If the target or control qubit index is invalid for the number of qubits in the state.
646    ///
647    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
648    fn apply(
649        &self,
650        state: &State,
651        target_qubits: &[usize],
652        control_qubits: &[usize],
653    ) -> Result<State, Error> {
654        // Validation
655        validate_qubits(state, target_qubits, control_qubits, 1)?;
656
657        // Additional validation for CNOT: exactly one control qubit
658        if control_qubits.len() != 1 {
659            return Err(Error::InvalidNumberOfQubits(control_qubits.len()));
660        }
661
662        let control_qubit: usize = control_qubits[0];
663
664        // Apply CNOT operator (same as Pauli-X with 1 control qubit)
665        Pauli::X.apply(state, target_qubits, &[control_qubit])
666    }
667
668    fn base_qubits(&self) -> usize {
669        2 // CNOT acts on 2 qubits (1 control, 1 target)
670    }
671
672    fn to_compilable(&self) -> Option<&dyn Compilable> {
673        Some(self)
674    }
675}
676
677/// Defines a SWAP operator.
678///
679/// A two-qubit operator that swaps the states of the two qubits.
680#[derive(Debug, Clone, Copy)]
681pub struct SWAP;
682
683impl Operator for SWAP {
684    /// Applies the SWAP operator to the given state's target qubits.
685    ///
686    /// # Arguments:
687    ///
688    /// * `state` - The state to apply the operator to.
689    ///
690    /// * `target_qubits` - The target qubits to apply the operator to. This should be two qubits.
691    ///
692    /// * `control_qubits` - The control qubits. If empty, the swap is unconditional. Otherwise, the swap occurs only if all control qubits are |1> for the relevant basis states.
693    /// # Returns:
694    ///
695    /// * The new state after applying the SWAP operator.
696    ///
697    /// # Errors:
698    ///
699    /// * `Error::InvalidNumberOfQubits` - If the target qubits are not 2 different qubits.
700    ///
701    /// * `Error::InvalidQubitIndex` - If the target qubit indices are invalid for the number of qubits in the state.
702    ///
703    /// * `Error::InvalidQubitIndex` - If the target qubit indices are not different.
704    ///
705    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
706    fn apply(
707        &self,
708        state: &State,
709        target_qubits: &[usize],
710        control_qubits: &[usize],
711    ) -> Result<State, Error> {
712        // Validation
713        validate_qubits(state, target_qubits, control_qubits, 2)?;
714
715        let target_qubit_1: usize = target_qubits[0];
716        let target_qubit_2: usize = target_qubits[1];
717        let num_qubits: usize = state.num_qubits();
718
719        // Apply potentially controlled SWAP operator
720        let dim: usize = 1 << num_qubits;
721        #[allow(unused_assignments)] // new_state_vec might be reassigned by GPU path
722        let mut new_state_vec = state.state_vector.clone(); // Start with a copy
723        let gpu_enabled: bool = cfg!(feature = "gpu");
724
725        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
726            #[cfg(feature = "gpu")]
727            {
728                // For SWAP, global_work_size is 2^(N-2) because each work item handles
729                // a pair of states differing at target_qubit_1 and target_qubit_2.
730                // The kernel iterates over the 2^(N-2) combinations of other qubits.
731                let global_work_size = if num_qubits >= 2 {
732                    1 << (num_qubits - 2)
733                } else {
734                    1
735                }; // Handle N=0,1 edge cases for work size
736                new_state_vec = execute_on_gpu(
737                    state,
738                    target_qubit_1, // target_a in kernel
739                    control_qubits,
740                    KernelType::Swap,
741                    global_work_size,
742                    GpuKernelArgs::SwapTarget {
743                        q1: target_qubit_2 as i32,
744                    }, // target_b in kernel
745                )?;
746            }
747        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
748            // Parallel implementation
749            let updates: Vec<(usize, Complex<f64>)> = (0..dim)
750                .into_par_iter()
751                .filter_map(|i| {
752                    let target_bit_1 = (i >> target_qubit_1) & 1;
753                    let target_bit_2 = (i >> target_qubit_2) & 1;
754
755                    if target_bit_1 != target_bit_2 {
756                        let j = i ^ (1 << target_qubit_1) ^ (1 << target_qubit_2);
757                        if i < j && check_controls(i, control_qubits) {
758                            let amp_i = state.state_vector[i];
759                            let amp_j = state.state_vector[j];
760                            Some(vec![(i, amp_j), (j, amp_i)])
761                        } else {
762                            None
763                        }
764                    } else {
765                        None
766                    }
767                })
768                .flatten()
769                .collect();
770            for (idx, val) in updates {
771                new_state_vec[idx] = val;
772            }
773        } else {
774            // Sequential implementation
775            for i in 0..dim {
776                let target_bit_1 = (i >> target_qubit_1) & 1;
777                let target_bit_2 = (i >> target_qubit_2) & 1;
778
779                if target_bit_1 != target_bit_2 {
780                    let j = i ^ (1 << target_qubit_1) ^ (1 << target_qubit_2);
781                    if i < j && check_controls(i, control_qubits) {
782                        let amp_i = state.state_vector[i];
783                        let amp_j = state.state_vector[j];
784                        new_state_vec[i] = amp_j;
785                        new_state_vec[j] = amp_i;
786                    }
787                }
788            }
789        }
790
791        Ok(State {
792            state_vector: new_state_vec,
793            num_qubits: state.num_qubits(),
794        })
795    }
796
797    fn base_qubits(&self) -> usize {
798        2 // SWAP acts on 2 qubits
799    }
800
801    fn to_compilable(&self) -> Option<&dyn Compilable> {
802        Some(self)
803    }
804}
805
806/// Defines a matchgate operator.
807///
808/// A two-qubit operator that applies a matchgate transformation to the adjacent target qubits.
809/// This gate can be decomposed into a two-qubit rotation and phase shifts.
810/// It is designed to simulate nearest-neighbor interactions in fermionic systems.
811///
812/// It resembles the matrix operator:
813/// ```
814/// [1 0 0 0; 0 cos(theta/2) -e^(i*phi1)sin(theta/2) 0; 0 sin(theta/2) e^(i*phi2)cos(theta/2) 0; 0 0 0 e^(i*phi2)]
815/// ```
816/// 
817/// # Warning
818/// 
819/// This gate is not yet compilable to OpenQASM, since it requires advanced decomposition techniques.
820#[derive(Debug, Clone, Copy)]
821pub struct Matchgate {
822    pub(crate) theta: f64,
823    pub(crate) phi1: f64,
824    pub(crate) phi2: f64,
825}
826
827impl Matchgate {
828    /// Creates a new Matchgate operator with the specified parameters.
829    ///
830    /// # Arguments:
831    ///
832    /// * `theta` - The angle of rotation in radians.
833    /// * `phi1` - The first phase shift in radians.
834    /// * `phi2` - The second phase shift in radians.
835    pub fn new(theta: f64, phi1: f64, phi2: f64) -> Self {
836        Matchgate { theta, phi1, phi2 }
837    }
838}
839
840impl Operator for Matchgate {
841    /// Applies the matchgate operator to the given state's target qubit and its adjacent qubit.
842    ///
843    /// # Arguments:
844    ///
845    /// * `state` - The state to apply the operator to.
846    ///
847    /// * `target_qubit` - The target qubit to apply the operator to. The adjacent is automatically determined as the next qubit.
848    ///
849    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
850    ///
851    /// # Returns:
852    ///
853    /// * The new state after applying the matchgate operator.
854    ///
855    /// # Errors:
856    ///
857    /// * `Error::InvalidNumberOfQubits` - If the number of target qubits is not 1.
858    ///
859    /// * `Error::InvalidQubitIndex` - If the target qubit indices are invalid for the number of qubits in the state, or if the target qubit is the last qubit.
860    ///
861    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
862    fn apply(
863        &self,
864        state: &State,
865        target_qubit: &[usize],
866        control_qubits: &[usize],
867    ) -> Result<State, Error> {
868        validate_qubits(state, target_qubit, control_qubits, 1)?;
869
870        let q1 = target_qubit[0];
871        // If q1 is the last qubit, return error
872        if q1 == state.num_qubits() - 1 {
873            return Err(Error::InvalidQubitIndex(q1, state.num_qubits()));
874        }
875
876        let q2 = q1 + 1; // q2 is always the next adjacent qubit.
877
878        let num_qubits = state.num_qubits();
879        let mut new_state_vec = state.state_vector.clone();
880        let gpu_enabled: bool = cfg!(feature = "gpu");
881
882        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
883            #[cfg(feature = "gpu")]
884            {
885                let global_work_size = 1 << (num_qubits - 2);
886                let cos_theta_half = (self.theta / 2.0).cos() as f32;
887                let sin_theta_half = (self.theta / 2.0).sin() as f32;
888                let exp_i_phi1 = Complex::new(0.0, self.phi1).exp();
889                let exp_i_phi2 = Complex::new(0.0, self.phi2).exp();
890
891                new_state_vec = execute_on_gpu(
892                    state,
893                    q1,
894                    control_qubits,
895                    KernelType::Matchgate,
896                    global_work_size,
897                    GpuKernelArgs::Matchgate {
898                        q1: q2 as i32,
899                        cos_theta_half,
900                        sin_theta_half,
901                        exp_i_phi1: Float2::new(exp_i_phi1.re as f32, exp_i_phi1.im as f32),
902                        exp_i_phi2: Float2::new(exp_i_phi2.re as f32, exp_i_phi2.im as f32),
903                    },
904                )?;
905            }
906        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
907            // Parallel implementation
908            let cos_theta_half = (self.theta / 2.0).cos();
909            let sin_theta_half = (self.theta / 2.0).sin();
910            let exp_i_phi1 = Complex::new(0.0, self.phi1).exp();
911            let exp_i_phi2 = Complex::new(0.0, self.phi2).exp();
912
913            let updates: Vec<(usize, Complex<f64>)> = (0..(1 << (num_qubits - 2)))
914                .into_par_iter()
915                .flat_map(|i| {
916                    let k = ((i >> q1) << (q1 + 1)) | (i & ((1 << q1) - 1));
917                    let l = ((k >> (q2 - 1)) << q2) | (k & ((1 << (q2 - 1)) - 1));
918
919                    let i01 = l | (1 << q1);
920                    let i10 = l | (1 << q2);
921                    let i11 = l | (1 << q1) | (1 << q2);
922
923                    let mut updates = Vec::new();
924
925                    if check_controls(i01, control_qubits) {
926                        let amp01 = state.state_vector[i01];
927                        let amp10 = state.state_vector[i10];
928
929                        let new_amp01 =
930                            cos_theta_half * amp01 - exp_i_phi1 * sin_theta_half * amp10;
931                        let new_amp10 =
932                            sin_theta_half * amp01 + exp_i_phi1 * cos_theta_half * amp10;
933
934                        updates.push((i01, new_amp01));
935                        updates.push((i10, new_amp10));
936                    }
937
938                    if check_controls(i11, control_qubits) {
939                        let new_amp11 = state.state_vector[i11] * exp_i_phi2;
940                        updates.push((i11, new_amp11));
941                    }
942                    updates
943                })
944                .collect();
945
946            for (idx, val) in updates {
947                new_state_vec[idx] = val;
948            }
949        } else {
950            // Sequential implementation
951            let cos_theta_half = (self.theta / 2.0).cos();
952            let sin_theta_half = (self.theta / 2.0).sin();
953            let exp_i_phi1 = Complex::new(0.0, self.phi1).exp();
954            let exp_i_phi2 = Complex::new(0.0, self.phi2).exp();
955
956            for i in 0..(1 << (num_qubits - 2)) {
957                let k = ((i >> q1) << (q1 + 1)) | (i & ((1 << q1) - 1));
958                let l = ((k >> (q2 - 1)) << q2) | (k & ((1 << (q2 - 1)) - 1));
959
960                let i01 = l | (1 << q1);
961                let i10 = l | (1 << q2);
962                let i11 = l | (1 << q1) | (1 << q2);
963
964                if check_controls(i01, control_qubits) {
965                    let amp01 = state.state_vector[i01];
966                    let amp10 = state.state_vector[i10];
967
968                    new_state_vec[i01] =
969                        cos_theta_half * amp01 - exp_i_phi1 * sin_theta_half * amp10;
970                    new_state_vec[i10] =
971                        sin_theta_half * amp01 + exp_i_phi1 * cos_theta_half * amp10;
972                }
973                if check_controls(i11, control_qubits) {
974                    new_state_vec[i11] = state.state_vector[i11] * exp_i_phi2;
975                }
976            }
977        }
978
979        Ok(State {
980            state_vector: new_state_vec,
981            num_qubits,
982        })
983    }
984
985    fn base_qubits(&self) -> usize {
986        2
987    }
988}
989
990/// Defines a Toffoli operator.
991///
992/// A three-qubit operator that flips the target qubit if both control qubits are in the |1> state. Also known as CCNOT (Controlled-Controlled-NOT).
993#[derive(Debug, Clone, Copy)]
994pub struct Toffoli;
995
996impl Operator for Toffoli {
997    /// Applies the Toffoli operator to the given state's target qubit, using the control qubits.
998    ///
999    /// # Arguments:
1000    ///
1001    /// * `state` - The state to apply the operator to.
1002    ///
1003    /// * `target_qubits` - The target qubit to apply the operator to. This should be a single qubit.
1004    ///
1005    /// * `control_qubits` - The control qubits for the operator. This should be two qubits.
1006    ///
1007    /// # Returns:
1008    ///
1009    /// * The new state after applying the Toffoli operator.
1010    ///
1011    /// # Errors:
1012    ///
1013    /// * `Error::InvalidNumberOfQubits` - If the target or control qubits are not 1 and 2 respectively, or if the control qubits are not different.
1014    ///
1015    /// * `Error::InvalidQubitIndex` - If the target or control qubit indices are invalid for the number of qubits in the state.
1016    ///
1017    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
1018    fn apply(
1019        &self,
1020        state: &State,
1021        target_qubits: &[usize],
1022        control_qubits: &[usize],
1023    ) -> Result<State, Error> {
1024        // Validation
1025        validate_qubits(state, target_qubits, control_qubits, 1)?;
1026
1027        // Additional validation for Toffoli: exactly two control qubits
1028        if control_qubits.len() != 2 {
1029            return Err(Error::InvalidNumberOfQubits(control_qubits.len()));
1030        }
1031
1032        // Additional validation for Toffoli: control qubits must be different
1033        if control_qubits[0] == control_qubits[1] {
1034            return Err(Error::InvalidNumberOfQubits(control_qubits.len()));
1035        }
1036
1037        Pauli::X.apply(state, target_qubits, control_qubits)
1038    }
1039
1040    fn base_qubits(&self) -> usize {
1041        3 // Toffoli acts on 3 qubits (2 control, 1 target)
1042    }
1043
1044    fn to_compilable(&self) -> Option<&dyn Compilable> {
1045        Some(self)
1046    }
1047}
1048
1049/// Defines an identity operator
1050///
1051/// A single-qubit operator that does not change the state of the qubit.
1052#[derive(Debug, Clone, Copy)]
1053pub struct Identity;
1054
1055impl Operator for Identity {
1056    /// Applies the identity operator to the given state's target qubit.
1057    ///
1058    /// # Arguments:
1059    ///
1060    /// * `state` - The state to apply the operator to.
1061    ///
1062    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1063    ///
1064    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1065    ///
1066    /// # Returns:
1067    ///
1068    /// * The new state after applying the identity operator.
1069    fn apply(
1070        &self,
1071        state: &State,
1072        target_qubits: &[usize],
1073        control_qubits: &[usize],
1074    ) -> Result<State, Error> {
1075        // Validation
1076        validate_qubits(state, target_qubits, control_qubits, 1)?;
1077
1078        // Apply identity operator (no change)
1079        Ok(state.clone())
1080    }
1081
1082    fn base_qubits(&self) -> usize {
1083        1 // Identity acts on 1 qubit only
1084    }
1085
1086    fn to_compilable(&self) -> Option<&dyn Compilable> {
1087        Some(self)
1088    }
1089}
1090
1091/// Defines a Phase S operator.
1092///
1093/// A single-qubit operator that applies a phase shift to the |1> state. Also known as the S gate or Phase gate.
1094#[derive(Debug, Clone, Copy)]
1095pub struct PhaseS;
1096
1097impl Operator for PhaseS {
1098    /// Applies the Phase S operator to the given state's target qubit.
1099    ///
1100    /// # Arguments:
1101    ///
1102    /// * `state` - The state to apply the operator to.
1103    ///
1104    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1105    ///
1106    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1107    ///
1108    /// # Returns:
1109    ///
1110    /// * The new state after applying the Phase S operator.
1111    fn apply(
1112        &self,
1113        state: &State,
1114        target_qubits: &[usize],
1115        control_qubits: &[usize],
1116    ) -> Result<State, Error> {
1117        // Validation
1118        validate_qubits(state, target_qubits, control_qubits, 1)?;
1119
1120        let target_qubit: usize = target_qubits[0];
1121        let num_qubits: usize = state.num_qubits();
1122
1123        // Apply potentially controlled Phase S operator
1124        #[allow(unused_assignments)]
1125        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1126        let gpu_enabled: bool = cfg!(feature = "gpu");
1127
1128        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1129            #[cfg(feature = "gpu")]
1130            {
1131                let global_work_size = 1 << num_qubits;
1132                new_state_vec = execute_on_gpu(
1133                    state,
1134                    target_qubit,
1135                    control_qubits,
1136                    KernelType::PhaseSOrSdag,
1137                    global_work_size,
1138                    GpuKernelArgs::SOrSdag { sign: 1.0f32 },
1139                )?;
1140            }
1141        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1142            let phase_factor = Complex::new(0.0, 1.0); // Phase shift of pi/2 (i)
1143            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1144            new_state_vec
1145                .par_iter_mut()
1146                .enumerate()
1147                .for_each(|(i, current_amp_ref)| {
1148                    if ((i >> target_qubit) & 1 == 1) && check_controls(i, control_qubits) {
1149                        *current_amp_ref = state.state_vector[i] * phase_factor;
1150                    }
1151                });
1152        } else {
1153            let phase_factor = Complex::new(0.0, 1.0); // Phase shift of pi/2 (i)
1154            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1155            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1156                let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1157                if target_bit_is_one && check_controls(i, control_qubits) {
1158                    *current_amp_ref = state.state_vector[i] * phase_factor;
1159                }
1160            }
1161        }
1162
1163        Ok(State {
1164            state_vector: new_state_vec,
1165            num_qubits,
1166        })
1167    }
1168
1169    fn base_qubits(&self) -> usize {
1170        1 // Phase S acts on 1 qubit only
1171    }
1172
1173    fn to_compilable(&self) -> Option<&dyn Compilable> {
1174        Some(self)
1175    }
1176}
1177
1178/// Defines a Phase T operator.
1179///
1180/// A single-qubit operator that applies a phase shift to the |1> state. Also known as the T gate or π/8 gate.
1181#[derive(Debug, Clone, Copy)]
1182pub struct PhaseT;
1183
1184impl Operator for PhaseT {
1185    /// Applies the Phase T operator to the given state's target qubit.
1186    ///
1187    /// # Arguments:
1188    ///
1189    /// * `state` - The state to apply the operator to.
1190    ///
1191    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1192    ///
1193    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1194    ///
1195    /// # Returns:
1196    ///
1197    /// * The new state after applying the Phase T operator.
1198    fn apply(
1199        &self,
1200        state: &State,
1201        target_qubits: &[usize],
1202        control_qubits: &[usize],
1203    ) -> Result<State, Error> {
1204        // Validation
1205        validate_qubits(state, target_qubits, control_qubits, 1)?;
1206
1207        let target_qubit = target_qubits[0];
1208        let num_qubits = state.num_qubits();
1209
1210        // Apply potentially controlled Phase T operator
1211        #[allow(unused_assignments)]
1212        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1213        let gpu_enabled: bool = cfg!(feature = "gpu");
1214
1215        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1216            #[cfg(feature = "gpu")]
1217            {
1218                let global_work_size = 1 << num_qubits;
1219                let angle = PI / 4.0;
1220                new_state_vec = execute_on_gpu(
1221                    state,
1222                    target_qubit,
1223                    control_qubits,
1224                    KernelType::PhaseShift,
1225                    global_work_size,
1226                    GpuKernelArgs::PhaseShift {
1227                        cos_angle: angle.cos() as f32,
1228                        sin_angle: angle.sin() as f32,
1229                    },
1230                )?;
1231            }
1232        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1233            let invsqrt2: f64 = 1.0 / (2.0f64).sqrt();
1234            let phase_factor = Complex::new(invsqrt2, invsqrt2); // Phase shift of pi/4 (exp(i*pi/4))
1235            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1236            new_state_vec
1237                .par_iter_mut()
1238                .enumerate()
1239                .for_each(|(i, current_amp_ref)| {
1240                    if ((i >> target_qubit) & 1 == 1) && check_controls(i, control_qubits) {
1241                        *current_amp_ref = state.state_vector[i] * phase_factor;
1242                    }
1243                });
1244        } else {
1245            let invsqrt2: f64 = 1.0 / (2.0f64).sqrt();
1246            let phase_factor = Complex::new(invsqrt2, invsqrt2); // Phase shift of pi/4 (exp(i*pi/4))
1247            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1248            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1249                let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1250                if target_bit_is_one && check_controls(i, control_qubits) {
1251                    *current_amp_ref = state.state_vector[i] * phase_factor;
1252                }
1253            }
1254        }
1255
1256        Ok(State {
1257            state_vector: new_state_vec,
1258            num_qubits,
1259        })
1260    }
1261
1262    fn base_qubits(&self) -> usize {
1263        1 // Phase T acts on 1 qubit only
1264    }
1265
1266    fn to_compilable(&self) -> Option<&dyn Compilable> {
1267        Some(self)
1268    }
1269}
1270
1271/// Defines a Phase Sdag operator.
1272///
1273/// A single-qubit operator that applies a phase shift to the |1> state. Also known as the S† gate or Phase† gate. Inverse of S gate.
1274#[derive(Debug, Clone, Copy)]
1275pub struct PhaseSdag;
1276
1277impl Operator for PhaseSdag {
1278    /// Applies the Phase Sdag operator to the given state's target qubit.
1279    ///
1280    /// # Arguments:
1281    ///
1282    /// * `state` - The state to apply the operator to.
1283    ///
1284    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1285    ///
1286    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1287    ///
1288    /// # Returns:
1289    ///
1290    /// * The new state after applying the Phase Sdag operator.
1291    fn apply(
1292        &self,
1293        state: &State,
1294        target_qubits: &[usize],
1295        control_qubits: &[usize],
1296    ) -> Result<State, Error> {
1297        // Validation
1298        validate_qubits(state, target_qubits, control_qubits, 1)?;
1299
1300        let target_qubit = target_qubits[0];
1301        let num_qubits = state.num_qubits();
1302
1303        // Apply potentially controlled Phase Sdag operator
1304        #[allow(unused_assignments)]
1305        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1306        let gpu_enabled: bool = cfg!(feature = "gpu");
1307
1308        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1309            #[cfg(feature = "gpu")]
1310            {
1311                let global_work_size = 1 << num_qubits;
1312                new_state_vec = execute_on_gpu(
1313                    state,
1314                    target_qubit,
1315                    control_qubits,
1316                    KernelType::PhaseSOrSdag,
1317                    global_work_size,
1318                    GpuKernelArgs::SOrSdag { sign: -1.0f32 },
1319                )?;
1320            }
1321        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1322            let phase_factor = Complex::new(0.0, -1.0); // Phase shift of -pi/2 (-i)
1323            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1324            new_state_vec
1325                .par_iter_mut()
1326                .enumerate()
1327                .for_each(|(i, current_amp_ref)| {
1328                    if ((i >> target_qubit) & 1 == 1) && check_controls(i, control_qubits) {
1329                        *current_amp_ref = state.state_vector[i] * phase_factor;
1330                    }
1331                });
1332        } else {
1333            let phase_factor = Complex::new(0.0, -1.0); // Phase shift of -pi/2 (-i)
1334            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1335            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1336                let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1337                if target_bit_is_one && check_controls(i, control_qubits) {
1338                    *current_amp_ref = state.state_vector[i] * phase_factor;
1339                }
1340            }
1341        }
1342
1343        Ok(State {
1344            state_vector: new_state_vec,
1345            num_qubits,
1346        })
1347    }
1348
1349    fn base_qubits(&self) -> usize {
1350        1 // Phase Sdag acts on 1 qubit only
1351    }
1352
1353    fn to_compilable(&self) -> Option<&dyn Compilable> {
1354        Some(self)
1355    }
1356}
1357
1358/// Defines a Phase Tdag operator.
1359///
1360/// A single-qubit operator that applies a phase shift to the |1> state. Also known as the T† gate or π/8† gate. Inverse of T gate.
1361#[derive(Debug, Clone, Copy)]
1362pub struct PhaseTdag;
1363
1364impl Operator for PhaseTdag {
1365    /// Applies the Phase Tdag operator to the given state's target qubit.
1366    ///
1367    /// # Arguments:
1368    ///
1369    /// * `state` - The state to apply the operator to.
1370    ///
1371    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1372    ///
1373    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1374    ///
1375    /// # Returns:
1376    ///
1377    /// * The new state after applying the Phase Tdag operator.
1378    fn apply(
1379        &self,
1380        state: &State,
1381        target_qubits: &[usize],
1382        control_qubits: &[usize],
1383    ) -> Result<State, Error> {
1384        // Validation
1385        validate_qubits(state, target_qubits, control_qubits, 1)?;
1386
1387        let target_qubit = target_qubits[0];
1388        let num_qubits = state.num_qubits();
1389
1390        // Apply potentially controlled Phase Tdag operator
1391        #[allow(unused_assignments)]
1392        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1393        let gpu_enabled: bool = cfg!(feature = "gpu");
1394
1395        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1396            #[cfg(feature = "gpu")]
1397            {
1398                let global_work_size = 1 << num_qubits;
1399                let angle = -PI / 4.0;
1400                new_state_vec = execute_on_gpu(
1401                    state,
1402                    target_qubit,
1403                    control_qubits,
1404                    KernelType::PhaseShift,
1405                    global_work_size,
1406                    GpuKernelArgs::PhaseShift {
1407                        cos_angle: angle.cos() as f32,
1408                        sin_angle: angle.sin() as f32,
1409                    },
1410                )?;
1411            }
1412        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1413            let invsqrt2: f64 = 1.0 / (2.0f64).sqrt();
1414            let phase_factor = Complex::new(invsqrt2, -invsqrt2);
1415            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1416            new_state_vec
1417                .par_iter_mut()
1418                .enumerate()
1419                .for_each(|(i, current_amp_ref)| {
1420                    if ((i >> target_qubit) & 1 == 1) && check_controls(i, control_qubits) {
1421                        *current_amp_ref = state.state_vector[i] * phase_factor;
1422                    }
1423                });
1424        } else {
1425            let invsqrt2: f64 = 1.0 / (2.0f64).sqrt();
1426            let phase_factor = Complex::new(invsqrt2, -invsqrt2);
1427            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1428            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1429                let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1430                if target_bit_is_one && check_controls(i, control_qubits) {
1431                    *current_amp_ref = state.state_vector[i] * phase_factor;
1432                }
1433            }
1434        }
1435
1436        Ok(State {
1437            state_vector: new_state_vec,
1438            num_qubits,
1439        })
1440    }
1441
1442    fn base_qubits(&self) -> usize {
1443        1 // Phase Tdag acts on 1 qubit only
1444    }
1445
1446    fn to_compilable(&self) -> Option<&dyn Compilable> {
1447        Some(self)
1448    }
1449}
1450
1451/// Defines the phase shift operator
1452///
1453/// A single-qubit operator that applies a phase shift of the provided angle to the |1> state. Also known as the phase shift gate.
1454#[derive(Debug, Clone, Copy)]
1455pub struct PhaseShift {
1456    pub(crate) angle: f64,
1457}
1458
1459impl PhaseShift {
1460    /// Creates a new PhaseShift operator with the given angle.
1461    ///
1462    /// # Arguments:
1463    ///
1464    /// * `angle` - The angle of the phase shift in radians.
1465    pub fn new(angle: f64) -> Self {
1466        PhaseShift { angle }
1467    }
1468}
1469
1470impl Operator for PhaseShift {
1471    /// Applies the phase shift operator to the given state's target qubit.
1472    ///
1473    /// # Arguments:
1474    ///
1475    /// * `state` - The state to apply the operator to.
1476    ///
1477    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1478    ///
1479    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1480    ///
1481    /// # Returns:
1482    ///
1483    /// * The new state after applying the phase shift operator.
1484    ///
1485    /// # Errors:
1486    ///
1487    /// * `Error::InvalidNumberOfQubits` - If the number of target qubits is not 1.
1488    ///
1489    /// * `Error::InvalidQubitIndex` - If the target qubit index or control qubit index is invalid for the number of qubits in the state.
1490    ///
1491    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
1492    fn apply(
1493        &self,
1494        state: &State,
1495        target_qubits: &[usize],
1496        control_qubits: &[usize],
1497    ) -> Result<State, Error> {
1498        // Validation
1499        validate_qubits(state, target_qubits, control_qubits, 1)?;
1500
1501        let target_qubit = target_qubits[0];
1502        let num_qubits = state.num_qubits();
1503
1504        // Apply potentially controlled Phase Shift operator
1505        #[allow(unused_assignments)]
1506        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1507        let gpu_enabled: bool = cfg!(feature = "gpu");
1508
1509        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1510            #[cfg(feature = "gpu")]
1511            {
1512                let global_work_size = 1 << num_qubits;
1513                new_state_vec = execute_on_gpu(
1514                    state,
1515                    target_qubit,
1516                    control_qubits,
1517                    KernelType::PhaseShift,
1518                    global_work_size,
1519                    GpuKernelArgs::PhaseShift {
1520                        cos_angle: self.angle.cos() as f32,
1521                        sin_angle: self.angle.sin() as f32,
1522                    },
1523                )?;
1524            }
1525        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1526            let phase_factor = Complex::new(self.angle.cos(), self.angle.sin());
1527            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1528            new_state_vec
1529                .par_iter_mut()
1530                .enumerate()
1531                .for_each(|(i, current_amp_ref)| {
1532                    if ((i >> target_qubit) & 1 == 1) && check_controls(i, control_qubits) {
1533                        *current_amp_ref = state.state_vector[i] * phase_factor;
1534                    }
1535                });
1536        } else {
1537            let phase_factor = Complex::new(self.angle.cos(), self.angle.sin());
1538            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1539            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1540                let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1541                if target_bit_is_one && check_controls(i, control_qubits) {
1542                    *current_amp_ref = state.state_vector[i] * phase_factor;
1543                }
1544            }
1545        }
1546
1547        Ok(State {
1548            state_vector: new_state_vec,
1549            num_qubits,
1550        })
1551    }
1552
1553    fn base_qubits(&self) -> usize {
1554        1 // Phase shift acts on 1 qubit only
1555    }
1556
1557    fn to_compilable(&self) -> Option<&dyn Compilable> {
1558        Some(self)
1559    }
1560}
1561
1562/// Defines the rotate-X operator
1563///
1564/// A single-qubit operator that applies a rotation around the X axis of the Bloch sphere by the given angle. Also known as the RX gate.
1565#[derive(Debug, Clone, Copy)]
1566pub struct RotateX {
1567    pub(crate) angle: f64,
1568}
1569
1570impl RotateX {
1571    /// Creates a new RotateX operator with the given angle.
1572    ///
1573    /// # Arguments:
1574    ///
1575    /// * `angle` - The angle of rotation in radians.
1576    pub fn new(angle: f64) -> Self {
1577        RotateX { angle }
1578    }
1579}
1580
1581impl Operator for RotateX {
1582    /// Applies the RotateX operator to the given state's target qubit.
1583    ///
1584    /// # Arguments:
1585    ///
1586    /// * `state` - The state to apply the operator to.
1587    ///
1588    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1589    ///
1590    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1591    ///
1592    /// # Returns:
1593    ///
1594    /// * The new state after applying the RotateX operator.
1595    fn apply(
1596        &self,
1597        state: &State,
1598        target_qubits: &[usize],
1599        control_qubits: &[usize],
1600    ) -> Result<State, Error> {
1601        // Validation
1602        validate_qubits(state, target_qubits, control_qubits, 1)?;
1603
1604        let target_qubit = target_qubits[0];
1605        let num_qubits = state.num_qubits();
1606
1607        // Apply potentially controlled RotateX operator
1608        #[allow(unused_assignments)]
1609        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1610        let gpu_enabled: bool = cfg!(feature = "gpu");
1611
1612        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1613            #[cfg(feature = "gpu")]
1614            {
1615                let half_angle = self.angle / 2.0;
1616                let global_work_size = if num_qubits > 0 {
1617                    1 << (num_qubits - 1)
1618                } else {
1619                    1
1620                };
1621                new_state_vec = execute_on_gpu(
1622                    state,
1623                    target_qubit,
1624                    control_qubits,
1625                    KernelType::RotateX,
1626                    global_work_size,
1627                    GpuKernelArgs::RotationGate {
1628                        cos_half_angle: half_angle.cos() as f32,
1629                        sin_half_angle: half_angle.sin() as f32,
1630                    },
1631                )?;
1632            }
1633        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1634            // Parallel implementation
1635            let half_angle: f64 = self.angle / 2.0;
1636            let cos_half: f64 = half_angle.cos();
1637            let sin_half: f64 = half_angle.sin();
1638            let i_complex: Complex<f64> = Complex::new(0.0, 1.0);
1639            let dim: usize = 1 << num_qubits;
1640            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1641
1642            let updates: Vec<(usize, Complex<f64>)> = (0..dim)
1643                .into_par_iter()
1644                .filter_map(|i| {
1645                    if ((i >> target_qubit) & 1 == 0) && check_controls(i, control_qubits) {
1646                        let j = i | (1 << target_qubit);
1647                        let amp_i = state.state_vector[i];
1648                        let amp_j = state.state_vector[j];
1649                        Some(vec![
1650                            (i, cos_half * amp_i - i_complex * sin_half * amp_j),
1651                            (j, -i_complex * sin_half * amp_i + cos_half * amp_j),
1652                        ])
1653                    } else {
1654                        None
1655                    }
1656                })
1657                .flatten()
1658                .collect();
1659            for (idx, val) in updates {
1660                new_state_vec[idx] = val;
1661            }
1662        } else {
1663            // Sequential implementation
1664            let half_angle: f64 = self.angle / 2.0;
1665            let cos_half: f64 = half_angle.cos();
1666            let sin_half: f64 = half_angle.sin();
1667            let i_complex: Complex<f64> = Complex::new(0.0, 1.0);
1668            let dim: usize = 1 << num_qubits;
1669            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1670
1671            for i in 0..dim {
1672                if (i >> target_qubit) & 1 == 0 {
1673                    let j = i | (1 << target_qubit);
1674                    if check_controls(i, control_qubits) {
1675                        let amp_i = state.state_vector[i];
1676                        let amp_j = state.state_vector[j];
1677                        new_state_vec[i] = cos_half * amp_i - i_complex * sin_half * amp_j;
1678                        new_state_vec[j] = -i_complex * sin_half * amp_i + cos_half * amp_j;
1679                    }
1680                }
1681            }
1682        }
1683
1684        Ok(State {
1685            state_vector: new_state_vec,
1686            num_qubits: state.num_qubits(),
1687        })
1688    }
1689
1690    fn base_qubits(&self) -> usize {
1691        1 // RotateX acts on 1 qubit only
1692    }
1693
1694    fn to_compilable(&self) -> Option<&dyn Compilable> {
1695        Some(self)
1696    }
1697}
1698
1699/// Defines the rotate-Y operator
1700///
1701/// A single-qubit operator that applies a rotation around the Y axis of the Bloch sphere by the given angle. Also known as the RY gate.
1702#[derive(Debug, Clone, Copy)]
1703pub struct RotateY {
1704    pub(crate) angle: f64,
1705}
1706
1707impl RotateY {
1708    /// Creates a new RotateY operator with the given angle.
1709    ///
1710    /// # Arguments:
1711    ///
1712    /// * `angle` - The angle of rotation in radians.
1713    pub fn new(angle: f64) -> Self {
1714        RotateY { angle }
1715    }
1716}
1717
1718impl Operator for RotateY {
1719    /// Applies the RotateY operator to the given state's target qubit.
1720    ///
1721    /// # Arguments:
1722    ///
1723    /// * `state` - The state to apply the operator to.
1724    ///
1725    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1726    ///
1727    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1728    ///
1729    /// # Returns:
1730    ///
1731    /// * The new state after applying the RotateY operator.
1732    fn apply(
1733        &self,
1734        state: &State,
1735        target_qubits: &[usize],
1736        control_qubits: &[usize],
1737    ) -> Result<State, Error> {
1738        // Validation
1739        validate_qubits(state, target_qubits, control_qubits, 1)?;
1740
1741        let target_qubit = target_qubits[0];
1742        let num_qubits = state.num_qubits();
1743
1744        // Apply potentially controlled RotateY operator
1745        #[allow(unused_assignments)]
1746        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1747        let gpu_enabled: bool = cfg!(feature = "gpu");
1748
1749        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1750            #[cfg(feature = "gpu")]
1751            {
1752                let half_angle = self.angle / 2.0;
1753                let global_work_size = if num_qubits > 0 {
1754                    1 << (num_qubits - 1)
1755                } else {
1756                    1
1757                };
1758                new_state_vec = execute_on_gpu(
1759                    state,
1760                    target_qubit,
1761                    control_qubits,
1762                    KernelType::RotateY,
1763                    global_work_size,
1764                    GpuKernelArgs::RotationGate {
1765                        cos_half_angle: half_angle.cos() as f32,
1766                        sin_half_angle: half_angle.sin() as f32,
1767                    },
1768                )?;
1769            }
1770        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1771            // Parallel implementation
1772            let half_angle: f64 = self.angle / 2.0;
1773            let cos_half: f64 = half_angle.cos();
1774            let sin_half: f64 = half_angle.sin();
1775            let dim: usize = 1 << num_qubits;
1776            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1777
1778            let updates: Vec<(usize, Complex<f64>)> = (0..dim)
1779                .into_par_iter()
1780                .filter_map(|i| {
1781                    if ((i >> target_qubit) & 1 == 0) && check_controls(i, control_qubits) {
1782                        let j = i | (1 << target_qubit);
1783                        let amp_i = state.state_vector[i];
1784                        let amp_j = state.state_vector[j];
1785                        Some(vec![
1786                            (i, cos_half * amp_i - sin_half * amp_j),
1787                            (j, sin_half * amp_i + cos_half * amp_j),
1788                        ])
1789                    } else {
1790                        None
1791                    }
1792                })
1793                .flatten()
1794                .collect();
1795            for (idx, val) in updates {
1796                new_state_vec[idx] = val;
1797            }
1798        } else {
1799            // Sequential implementation
1800            let half_angle: f64 = self.angle / 2.0;
1801            let cos_half: f64 = half_angle.cos();
1802            let sin_half: f64 = half_angle.sin();
1803            let dim: usize = 1 << num_qubits;
1804            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1805
1806            for i in 0..dim {
1807                if (i >> target_qubit) & 1 == 0 {
1808                    let j = i | (1 << target_qubit);
1809                    if check_controls(i, control_qubits) {
1810                        let amp_i = state.state_vector[i];
1811                        let amp_j = state.state_vector[j];
1812                        new_state_vec[i] = cos_half * amp_i - sin_half * amp_j;
1813                        new_state_vec[j] = sin_half * amp_i + cos_half * amp_j;
1814                    }
1815                }
1816            }
1817        }
1818
1819        Ok(State {
1820            state_vector: new_state_vec,
1821            num_qubits: state.num_qubits(),
1822        })
1823    }
1824
1825    fn base_qubits(&self) -> usize {
1826        1 // RotateY acts on 1 qubit only
1827    }
1828
1829    fn to_compilable(&self) -> Option<&dyn Compilable> {
1830        Some(self)
1831    }
1832}
1833
1834/// Defines the rotate-Z operator
1835///
1836/// A single-qubit operator that applies a rotation around the Z axis of the Bloch sphere by the given angle. Also known as the RZ gate.
1837#[derive(Debug, Clone, Copy)]
1838pub struct RotateZ {
1839    pub(crate) angle: f64,
1840}
1841
1842impl RotateZ {
1843    /// Creates a new RotateZ operator with the given angle.
1844    ///
1845    /// # Arguments:
1846    ///
1847    /// * `angle` - The angle of rotation in radians.
1848    pub fn new(angle: f64) -> Self {
1849        RotateZ { angle }
1850    }
1851}
1852
1853impl Operator for RotateZ {
1854    /// Applies the RotateZ operator to the given state's target qubit.
1855    ///
1856    /// # Arguments:
1857    ///
1858    /// * `state` - The state to apply the operator to.
1859    ///
1860    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1861    ///
1862    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1863    ///
1864    /// # Returns:
1865    ///
1866    /// * The new state after applying the RotateZ operator.
1867    fn apply(
1868        &self,
1869        state: &State,
1870        target_qubits: &[usize],
1871        control_qubits: &[usize],
1872    ) -> Result<State, Error> {
1873        // Validation
1874        validate_qubits(state, target_qubits, control_qubits, 1)?;
1875
1876        let target_qubit = target_qubits[0];
1877        let num_qubits = state.num_qubits();
1878
1879        // Apply potentially controlled RotateZ operator
1880        #[allow(unused_assignments)]
1881        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1882        let gpu_enabled: bool = cfg!(feature = "gpu");
1883
1884        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1885            #[cfg(feature = "gpu")]
1886            {
1887                let half_angle = self.angle / 2.0;
1888                let global_work_size = 1 << num_qubits; // N work items for RZ
1889                new_state_vec = execute_on_gpu(
1890                    state,
1891                    target_qubit,
1892                    control_qubits,
1893                    KernelType::RotateZ,
1894                    global_work_size,
1895                    GpuKernelArgs::RotationGate {
1896                        cos_half_angle: half_angle.cos() as f32,
1897                        sin_half_angle: half_angle.sin() as f32,
1898                    },
1899                )?;
1900            }
1901        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1902            // Parallel implementation
1903            let half_angle = self.angle / 2.0;
1904            let phase_0 = Complex::new(half_angle.cos(), -half_angle.sin());
1905            let phase_1 = Complex::new(half_angle.cos(), half_angle.sin());
1906            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1907
1908            new_state_vec
1909                .par_iter_mut()
1910                .enumerate()
1911                .for_each(|(i, current_amp_ref)| {
1912                    if check_controls(i, control_qubits) {
1913                        let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1914                        if target_bit_is_one {
1915                            *current_amp_ref = state.state_vector[i] * phase_1;
1916                        } else {
1917                            *current_amp_ref = state.state_vector[i] * phase_0;
1918                        }
1919                    }
1920                });
1921        } else {
1922            // Sequential implementation
1923            let half_angle = self.angle / 2.0;
1924            let phase_0 = Complex::new(half_angle.cos(), -half_angle.sin());
1925            let phase_1 = Complex::new(half_angle.cos(), half_angle.sin());
1926            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1927
1928            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1929                if check_controls(i, control_qubits) {
1930                    let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1931                    if target_bit_is_one {
1932                        *current_amp_ref = state.state_vector[i] * phase_1;
1933                    } else {
1934                        *current_amp_ref = state.state_vector[i] * phase_0;
1935                    }
1936                }
1937            }
1938        }
1939
1940        Ok(State {
1941            state_vector: new_state_vec,
1942            num_qubits: state.num_qubits(),
1943        })
1944    }
1945
1946    fn base_qubits(&self) -> usize {
1947        1 // RotateZ acts on 1 qubit only
1948    }
1949
1950    fn to_compilable(&self) -> Option<&dyn Compilable> {
1951        Some(self)
1952    }
1953}
1954
1955/// An arbitrary 2×2 unitary operator.
1956///
1957/// This operator can be applied to a single qubit in a quantum state. It is represented by a 2×2 unitary matrix.
1958/// The matrix must be unitary.
1959/// The operator can be constructed fallibly from a 2 x 2 unitary matrix, or infallibly from a rotation angle and phase shift angle.
1960#[derive(Debug, Clone, Copy)]
1961pub struct Unitary2 {
1962    /// The 2×2 unitary matrix representing the operator.
1963    pub(crate) matrix: [[Complex<f64>; 2]; 2],
1964}
1965
1966impl Unitary2 {
1967    /// Creates a new Unitary2 operator with the given 2×2 unitary matrix.
1968    ///
1969    /// # Arguments:
1970    ///
1971    /// * `matrix` - A 2×2 unitary matrix represented as a 2D array of complex numbers.
1972    ///
1973    /// # Returns:
1974    ///
1975    /// * `Result<Self, Error>` - A result containing the new Unitary2 operator or an error if the matrix is not unitary.
1976    ///
1977    /// # Errors:
1978    ///
1979    /// * `Error::NonUnitaryMatrix` - If the provided matrix is not unitary.
1980    pub fn new(matrix: [[Complex<f64>; 2]; 2]) -> Result<Self, Error> {
1981        // Faster 2×2 unitary check: U U_dagger = I (rows are orthonormal)
1982        let tol: f64 = f64::EPSILON * 2.0; // Tolerance for floating point comparisons
1983        let a: Complex<f64> = matrix[0][0]; // U_00
1984        let b: Complex<f64> = matrix[0][1]; // U_01
1985        let c: Complex<f64> = matrix[1][0]; // U_10
1986        let d: Complex<f64> = matrix[1][1]; // U_11
1987
1988        // Check if each row has norm 1
1989        // Row 0: |a|^2 + |b|^2 == 1
1990        if ((a.norm_sqr() + b.norm_sqr()) - 1.0).abs() > tol {
1991            return Err(Error::NonUnitaryMatrix);
1992        }
1993        // Row 1: |c|^2 + |d|^2 == 1
1994        if ((c.norm_sqr() + d.norm_sqr()) - 1.0).abs() > tol {
1995            return Err(Error::NonUnitaryMatrix);
1996        }
1997
1998        // Check if rows are orthogonal
1999        // Row 0 dot Row 1_conj: a*c_conj + b*d_conj == 0
2000        if (a * c.conj() + b * d.conj()).norm_sqr() > tol * tol {
2001            // Compare norm_sqr with tol^2
2002            return Err(Error::NonUnitaryMatrix);
2003        }
2004
2005        Ok(Unitary2 { matrix })
2006    }
2007
2008    /// Creates a new Unitary2 operator from a rotation angle theta and phase shift angle phi.
2009    /// This operator can be decomposed into a rotation around the Y axis followed by a phase shift.
2010    /// The enclosed unitary matrix is guaranteed to be unitary.
2011    /// 
2012    /// Special cases include:
2013    /// 
2014    /// * U(theta, 0) = RY(theta)
2015    /// * U(0, phi) = PhaseShift(phi)
2016    /// * U(Pi/2, Pi) = Hadamard
2017    /// * U(Pi, Pi) = Pauli-X
2018    ///
2019    /// # Arguments:
2020    ///
2021    /// * `theta` - The rotation angle in radians.
2022    ///
2023    /// * `phi` - The phase shift angle in radians
2024    ///
2025    /// # Returns:
2026    ///
2027    /// * `Self` - A new Unitary2 operator representing the rotation and phase shift.
2028    pub fn from_ry_phase(theta: f64, phi: f64) -> Self {
2029        let cos_half_theta = (theta / 2.0).cos();
2030        let sin_half_theta = (theta / 2.0).sin();
2031        let expi_phi = Complex::new(0.0, phi).exp();
2032
2033        // Construct the unitary matrix
2034        let matrix = [
2035            [
2036                Complex::new(cos_half_theta, 0.0),
2037                -expi_phi * sin_half_theta,
2038            ],
2039            [Complex::new(sin_half_theta, 0.0), expi_phi * cos_half_theta],
2040        ];
2041
2042        // Create Unitary2 operator unchecked
2043        Unitary2 { matrix }
2044    }
2045}
2046
2047impl Operator for Unitary2 {
2048    /// Applies the Unitary2 operator to the given state's target qubit.
2049    ///
2050    /// # Arguments:
2051    ///
2052    /// * `state` - The state to apply the operator to.
2053    ///
2054    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
2055    ///
2056    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
2057    ///
2058    /// # Returns:
2059    ///
2060    /// * The new state after applying the Unitary2 operator.
2061    fn apply(
2062        &self,
2063        state: &State,
2064        target_qubits: &[usize],
2065        control_qubits: &[usize],
2066    ) -> Result<State, Error> {
2067        // Validation
2068        validate_qubits(state, target_qubits, control_qubits, 1)?;
2069
2070        let t: usize = target_qubits[0];
2071        let nq: usize = state.num_qubits();
2072
2073        // Apply 2×2 block on each basis‐pair
2074        let dim = 1 << nq;
2075        let mut new_state_vec = state.state_vector.clone();
2076
2077        if nq >= PARALLEL_THRESHOLD_NUM_QUBITS {
2078            // Parallel implementation
2079            let updates: Vec<(usize, Complex<f64>)> = (0..dim)
2080                .into_par_iter()
2081                .filter_map(|i| {
2082                    if ((i >> t) & 1 == 0) && check_controls(i, control_qubits) {
2083                        let j = i | (1 << t);
2084                        let ai = state.state_vector[i];
2085                        let aj = state.state_vector[j];
2086                        Some(vec![
2087                            (i, self.matrix[0][0] * ai + self.matrix[0][1] * aj),
2088                            (j, self.matrix[1][0] * ai + self.matrix[1][1] * aj),
2089                        ])
2090                    } else {
2091                        None
2092                    }
2093                })
2094                .flatten()
2095                .collect();
2096            for (idx, val) in updates {
2097                new_state_vec[idx] = val;
2098            }
2099        } else {
2100            // Sequential implementation
2101            for i in 0..dim {
2102                if (i >> t) & 1 == 0 {
2103                    let j = i | (1 << t);
2104                    if check_controls(i, control_qubits) {
2105                        let ai = state.state_vector[i];
2106                        let aj = state.state_vector[j];
2107                        new_state_vec[i] = self.matrix[0][0] * ai + self.matrix[0][1] * aj;
2108                        new_state_vec[j] = self.matrix[1][0] * ai + self.matrix[1][1] * aj;
2109                    }
2110                }
2111            }
2112        }
2113
2114        Ok(State {
2115            state_vector: new_state_vec,
2116            num_qubits: nq,
2117        })
2118    }
2119
2120    fn base_qubits(&self) -> usize {
2121        1
2122    }
2123
2124    fn to_compilable(&self) -> Option<&dyn Compilable> {
2125        Some(self)
2126    }
2127}