quant_iron/components/
operator.rs

1use crate::compiler::compilable::Compilable;
2#[cfg(feature = "gpu")]
3use crate::components::gpu_context::GpuKernelArgs;
4#[cfg(feature = "gpu")]
5use crate::components::gpu_context::{GPU_CONTEXT, KernelType};
6use crate::{components::{state::State, pauli_string::PauliString}, errors::Error};
7use dyn_clone::DynClone;
8use num_complex::Complex;
9#[cfg(feature = "gpu")]
10use ocl::prm::Float2;
11use rayon::prelude::*;
12#[cfg(feature = "gpu")]
13use std::f64::consts::PI;
14use std::fmt::Display;
15use std::{collections::HashSet, fmt::Debug};
16
17/// Threshold for using parallel CPU implementation
18const PARALLEL_THRESHOLD_NUM_QUBITS: usize = 10;
19
20/// Threshold for using OpenCL (GPU acceleration)
21const OPENCL_THRESHOLD_NUM_QUBITS: usize = 15;
22
23#[cfg(feature = "gpu")]
24fn execute_on_gpu(
25    state: &State,
26    target_qubit: usize,
27    control_qubits: &[usize],
28    kernel_type: KernelType,
29    global_work_size: usize,
30    kernel_args: GpuKernelArgs,
31) -> Result<Vec<Complex<f64>>, Error> {
32    let mut context_guard = GPU_CONTEXT.lock().map_err(|_| Error::GpuContextLockError)?;
33    let context = match *context_guard {
34        Ok(ref mut ctx) => ctx,
35        Err(ref e) => return Err(e.clone()), // Propagate initialisation error
36    };
37
38    let num_qubits = state.num_qubits();
39    let num_state_elements = state.state_vector.len();
40
41    // Ensure buffers are ready and get mutable references
42    let state_buffer_cloned = context.ensure_state_buffer(num_state_elements)?.clone();
43
44    let control_qubits_i32: Vec<i32> = control_qubits.iter().map(|&q| q as i32).collect();
45    let control_buffer_len = control_qubits_i32.len();
46    let control_buffer_cloned = context.ensure_control_buffer(control_buffer_len)?.clone();
47
48    let state_vector_f32: Vec<Float2> = state
49        .state_vector
50        .iter()
51        .map(|c| Float2::new(c.re as f32, c.im as f32))
52        .collect();
53
54    // Copy data to GPU buffers
55    state_buffer_cloned
56        .write(&state_vector_f32)
57        .enq()
58        .map_err(|e| Error::OpenCLError(format!("Failed to write to state buffer: {}", e)))?;
59
60    if !control_qubits_i32.is_empty() {
61        control_buffer_cloned
62            .write(&control_qubits_i32)
63            .enq()
64            .map_err(|e| Error::OpenCLError(format!("Failed to write to control buffer: {}", e)))?;
65    } else {
66        // Write dummy data if no control qubits
67        let dummy_control_data = vec![0; 1]; // Dummy data for control buffer
68        control_buffer_cloned
69            .write(&dummy_control_data)
70            .enq()
71            .map_err(|e| {
72                Error::OpenCLError(format!("Failed to write to dummy control buffer: {}", e))
73            })?;
74    }
75
76    let mut kernel_builder = context.pro_que.kernel_builder(kernel_type.name());
77    kernel_builder
78        .global_work_size(global_work_size)
79        .arg(&state_buffer_cloned) // Pass by reference
80        .arg(num_qubits as i32)
81        .arg(target_qubit as i32)
82        .arg(control_buffer_cloned)
83        .arg(control_qubits_i32.len() as i32);
84
85    match kernel_args {
86        GpuKernelArgs::None => {
87            // No additional arguments needed for Hadamard, PauliX, PauliY, PauliZ
88        }
89        GpuKernelArgs::SOrSdag { sign } => {
90            kernel_builder.arg(sign);
91        }
92        GpuKernelArgs::PhaseShift {
93            cos_angle,
94            sin_angle,
95        } => {
96            kernel_builder.arg(cos_angle).arg(sin_angle);
97        }
98        GpuKernelArgs::SwapTarget { q1 } => {
99            kernel_builder.arg(q1);
100        }
101        GpuKernelArgs::RotationGate {
102            cos_half_angle,
103            sin_half_angle,
104        } => {
105            kernel_builder.arg(cos_half_angle).arg(sin_half_angle);
106        }
107        GpuKernelArgs::Matchgate {
108            q1,
109            cos_theta_half,
110            sin_theta_half,
111            exp_i_phi1,
112            exp_i_phi2,
113        } => {
114            kernel_builder
115                .arg(q1)
116                .arg(cos_theta_half)
117                .arg(sin_theta_half)
118                .arg(exp_i_phi1)
119                .arg(exp_i_phi2);
120        }
121    }
122
123    let kernel = kernel_builder.build().map_err(|e| {
124        Error::OpenCLError(format!(
125            "Failed to build kernel '{}': {}",
126            kernel_type.name(),
127            e
128        ))
129    })?;
130
131    unsafe {
132        kernel
133            .enq()
134            .map_err(|e| Error::OpenCLError(format!("Failed to enqueue kernel: {}", e)))?;
135    }
136
137    let mut state_vector_ocl_result = vec![Float2::new(0.0, 0.0); num_state_elements];
138    // Read data back from state_buffer
139    state_buffer_cloned
140        .read(&mut state_vector_ocl_result)
141        .enq()
142        .map_err(|e| Error::OpenCLError(format!("Failed to read state buffer: {}", e)))?;
143
144    Ok(state_vector_ocl_result
145        .iter()
146        .map(|f2| Complex::new(f2[0] as f64, f2[1] as f64))
147        .collect())
148}
149
150/// A trait defining the interface for all operators.
151pub trait Operator: Send + Sync + Debug + DynClone + Display {
152    /// Applies the operator to the given state's target qubits, using the control qubits if required.
153    ///
154    /// # Arguments:
155    ///
156    /// * `state` - The state to apply the operator to.
157    ///
158    /// * `target_qubits` - The target qubits to apply the operator to. If no target qubits are specified, the operator will be applied to all qubits in the state.
159    ///
160    /// * `control_qubits` - The control qubits to apply the operator to.
161    ///
162    /// # Returns:
163    ///
164    /// * The new state after applying the operator.
165    fn apply(
166        &self,
167        state: &State,
168        target_qubits: &[usize],
169        control_qubits: &[usize],
170    ) -> Result<State, Error>;
171
172    /// Returns the number of qubits that the operator acts on.
173    ///
174    /// # Returns:
175    ///
176    /// * The number of qubits that the operator acts on.
177    fn base_qubits(&self) -> usize;
178
179    /// Optionally returns an intermediate representation of the operator for compilation to OpenQASM.
180    ///
181    /// If you are not planning to compile the operator to an IR, you can ignore this method.
182    /// If you want to compile the operator to QASM, you should implement this method.
183    ///
184    /// # Returns:
185    ///  * An optional vector of `InstructionIR` representing the operator in an intermediate representation.
186    fn to_compilable(&self) -> Option<&dyn Compilable> {
187        // Default implementation returns None, indicating no compilable representation
188        None
189    }
190}
191
192dyn_clone::clone_trait_object!(Operator);
193
194/// Helper function to check if all control qubits are in the |1> state for a given basis state index.
195fn check_controls(index: usize, control_qubits: &[usize]) -> bool {
196    control_qubits
197        .iter()
198        .all(|&qubit| (index >> qubit) & 1 == 1)
199}
200
201/// Helper function to validate target and control qubits
202///
203/// # Arguments:
204///
205/// * `state` - The quantum state that contains information about the number of qubits.
206/// * `target_qubits` - The target qubits to validate.
207/// * `control_qubits` - The control qubits to validate.
208/// * `expected_targets` - The expected number of target qubits.
209///
210/// # Returns:
211///
212/// * `Ok(())` if all validations pass, ie. the target qubits are valid indices, the control qubits are valid indices, and there are no overlaps between control and target qubits.
213/// * `Err(Error)` if any validation fails.
214fn validate_qubits(
215    state: &State,
216    target_qubits: &[usize],
217    control_qubits: &[usize],
218    expected_targets: usize,
219) -> Result<(), Error> {
220    // Check if we have the expected number of target qubits
221    if target_qubits.len() != expected_targets {
222        return Err(Error::InvalidNumberOfQubits(target_qubits.len()));
223    }
224
225    let num_qubits = state.num_qubits();
226
227    // Check if all target qubits are valid indices
228    for &target_qubit in target_qubits {
229        if target_qubit >= num_qubits {
230            return Err(Error::InvalidQubitIndex(target_qubit, num_qubits));
231        }
232    }
233
234    // Check if all control qubits are valid indices and don't overlap with target qubits
235    for &control_qubit in control_qubits {
236        if control_qubit >= num_qubits {
237            return Err(Error::InvalidQubitIndex(control_qubit, num_qubits));
238        }
239
240        for &target_qubit in target_qubits {
241            if control_qubit == target_qubit {
242                return Err(Error::OverlappingControlAndTargetQubits(
243                    control_qubit,
244                    target_qubit,
245                ));
246            }
247        }
248    }
249
250    // Special check for multiple target qubits to ensure no duplicates
251    if expected_targets > 1 {
252        if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
253            // Use HashSet for efficient duplicate detection in larger systems
254            let mut seen_targets: HashSet<usize> = HashSet::with_capacity(target_qubits.len());
255            for &target_qubit in target_qubits {
256                if !seen_targets.insert(target_qubit) {
257                    return Err(Error::InvalidQubitIndex(target_qubit, num_qubits));
258                }
259            }
260        } else {
261            // Use nested loops for smaller systems
262            for i in 0..target_qubits.len() {
263                for j in i + 1..target_qubits.len() {
264                    if target_qubits[i] == target_qubits[j] {
265                        return Err(Error::InvalidQubitIndex(target_qubits[i], num_qubits));
266                    }
267                }
268            }
269        }
270    }
271
272    Ok(())
273}
274
275/// Defines a Hadamard operator.
276///
277/// A single-qubit operator that transforms the state of a qubit into a superposition of its basis states.
278#[derive(Debug, Clone, Copy)]
279pub struct Hadamard;
280
281impl Operator for Hadamard {
282    /// Applies the Hadamard operator to the given state's target qubit.
283    ///
284    /// # Arguments:
285    ///
286    /// * `state` - The state to apply the operator to.
287    ///
288    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
289    ///
290    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
291    ///
292    /// # Returns:
293    ///
294    /// * The new state after applying the Hadamard operator.
295    ///
296    /// # Errors:
297    ///
298    /// * `Error::InvalidNumberOfQubits` - If the number of target qubits is not 1.
299    ///
300    /// * `Error::InvalidQubitIndex` - If the target qubit or control qubit index is invalid for the number of qubits in the state.
301    ///
302    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
303    fn apply(
304        &self,
305        state: &State,
306        target_qubits: &[usize],
307        control_qubits: &[usize],
308    ) -> Result<State, Error> {
309        // Validation
310        validate_qubits(state, target_qubits, control_qubits, 1)?;
311
312        let target_qubit: usize = target_qubits[0];
313        let num_qubits: usize = state.num_qubits();
314
315        // Apply potentially controlled Hadamard operator
316        let sqrt_2_inv: f64 = 1.0 / (2.0f64).sqrt();
317        let dim: usize = 1 << num_qubits;
318        #[allow(unused_assignments)]
319        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
320        let gpu_enabled: bool = cfg!(feature = "gpu");
321
322        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
323            #[cfg(feature = "gpu")]
324            {
325                let global_work_size = if num_qubits > 0 {
326                    1 << (num_qubits - 1)
327                } else {
328                    1
329                };
330                new_state_vec = execute_on_gpu(
331                    state,
332                    target_qubit,
333                    control_qubits,
334                    KernelType::Hadamard,
335                    global_work_size,
336                    GpuKernelArgs::None,
337                )?;
338            }
339        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
340            // Rayon CPU Parallel implementation
341            new_state_vec = state.state_vector.clone(); // Initialise for CPU path
342            if control_qubits.is_empty() {
343                // Parallel uncontrolled Hadamard
344                let updates: Vec<(usize, Complex<f64>)> = (0..(1 << (num_qubits - 1)))
345                    .into_par_iter()
346                    .flat_map(|k| {
347                        let i0 = (k >> target_qubit << (target_qubit + 1))
348                            | (k & ((1 << target_qubit) - 1));
349                        let i1 = i0 | (1 << target_qubit);
350                        let amp0 = state.state_vector[i0];
351                        let amp1 = state.state_vector[i1];
352                        vec![
353                            (i0, sqrt_2_inv * (amp0 + amp1)),
354                            (i1, sqrt_2_inv * (amp0 - amp1)),
355                        ]
356                    })
357                    .collect();
358                for (idx, val) in updates {
359                    new_state_vec[idx] = val;
360                }
361            } else {
362                // Rayon CPU Parallel controlled Hadamard
363                let updates: Vec<(usize, Complex<f64>)> = (0..dim)
364                    .into_par_iter()
365                    .filter_map(|i| {
366                        if (i >> target_qubit) & 1 == 0 {
367                            // Process pairs (i, j) where i has 0 at target_qubit
368                            let j = i | (1 << target_qubit); // j has 1 at target_qubit
369                            if check_controls(i, control_qubits) {
370                                // Check controls based on i
371                                let amp_i = state.state_vector[i];
372                                let amp_j = state.state_vector[j];
373                                Some(vec![
374                                    (i, sqrt_2_inv * (amp_i + amp_j)),
375                                    (j, sqrt_2_inv * (amp_i - amp_j)),
376                                ])
377                            } else {
378                                None // Controls not met for this pair
379                            }
380                        } else {
381                            None // Already processed as part of a pair starting with 0 at target_qubit
382                        }
383                    })
384                    .flatten()
385                    .collect();
386                for (idx, val) in updates {
387                    new_state_vec[idx] = val;
388                }
389            }
390        } else {
391            // Sequential CPU implementation
392            new_state_vec = state.state_vector.clone(); // initialise for CPU path
393            if control_qubits.is_empty() {
394                // Sequential uncontrolled Hadamard
395                for k in 0..(1 << (num_qubits - 1)) {
396                    let i0 =
397                        (k >> target_qubit << (target_qubit + 1)) | (k & ((1 << target_qubit) - 1));
398                    let i1 = i0 | (1 << target_qubit);
399                    let amp0 = state.state_vector[i0];
400                    let amp1 = state.state_vector[i1];
401                    new_state_vec[i0] = sqrt_2_inv * (amp0 + amp1);
402                    new_state_vec[i1] = sqrt_2_inv * (amp0 - amp1);
403                }
404            } else {
405                // Sequential controlled Hadamard
406                for i in 0..dim {
407                    if (i >> target_qubit) & 1 == 0 {
408                        let j = i | (1 << target_qubit);
409                        if check_controls(i, control_qubits) {
410                            let amp_i = state.state_vector[i];
411                            let amp_j = state.state_vector[j];
412                            new_state_vec[i] = sqrt_2_inv * (amp_i + amp_j);
413                            new_state_vec[j] = sqrt_2_inv * (amp_i - amp_j);
414                        }
415                    }
416                }
417            }
418        }
419
420        Ok(State {
421            state_vector: new_state_vec,
422            num_qubits,
423        })
424    }
425
426    fn base_qubits(&self) -> usize {
427        1 // Hadamard acts on 1 qubit only
428    }
429
430    fn to_compilable(&self) -> Option<&dyn Compilable> {
431        Some(self)
432    }
433}
434
435impl Display for Hadamard {
436    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
437        write!(f, "H")
438    }
439}
440
441/// Defines the Pauli operators: X, Y, Z.
442#[derive(Debug, Clone, Copy, PartialEq)]
443pub enum Pauli {
444    /// Pauli-X operator (NOT gate)
445    X,
446    /// Pauli-Y operator
447    Y,
448    /// Pauli-Z operator
449    Z,
450}
451
452impl Operator for Pauli {
453    /// Applies the Pauli operator to the given state's target qubit.
454    ///
455    /// # Arguments:
456    ///
457    /// * `state` - The state to apply the operator to.
458    ///
459    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
460    ///
461    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
462    ///
463    /// # Returns:
464    ///
465    /// * The new state after applying the Pauli operator.
466    ///
467    /// # Errors:
468    ///
469    /// * `Error::InvalidNumberOfQubits` - If the number of target qubits is not 1.
470    ///
471    /// * `Error::InvalidQubitIndex` - If the target qubit index is invalid for the number of qubits in the state.
472    ///
473    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
474    fn apply(
475        &self,
476        state: &State,
477        target_qubits: &[usize],
478        control_qubits: &[usize],
479    ) -> Result<State, Error> {
480        // Validation
481        validate_qubits(state, target_qubits, control_qubits, 1)?;
482
483        let target_qubit: usize = target_qubits[0];
484        let num_qubits: usize = state.num_qubits();
485
486        // Apply potentially controlled Pauli operator
487        let dim: usize = 1 << num_qubits;
488        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
489        let i_complex: Complex<f64> = Complex::new(0.0, 1.0);
490        let gpu_enabled: bool = cfg!(feature = "gpu");
491
492        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
493            #[cfg(feature = "gpu")]
494            {
495                let kernel_type = match self {
496                    Pauli::X => KernelType::PauliX,
497                    Pauli::Y => KernelType::PauliY,
498                    Pauli::Z => KernelType::PauliZ,
499                };
500                let global_work_size = if num_qubits == 0 {
501                    1
502                } else {
503                    match self {
504                        Pauli::Z => 1 << num_qubits, // N work items for Pauli Z
505                        _ => 1 << (num_qubits - 1),  // N/2 work items for Pauli X, Y
506                    }
507                };
508                new_state_vec = execute_on_gpu(
509                    state,
510                    target_qubit,
511                    control_qubits,
512                    kernel_type,
513                    global_work_size,
514                    GpuKernelArgs::None,
515                )?;
516            }
517        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
518            // Parallel implementation
519            match self {
520                Pauli::X => {
521                    let updates: Vec<(usize, Complex<f64>)> = (0..dim)
522                        .into_par_iter()
523                        .filter_map(|i| {
524                            if check_controls(i, control_qubits) && ((i >> target_qubit) & 1 == 0) {
525                                let j = i | (1 << target_qubit);
526                                let amp_i = state.state_vector[i];
527                                let amp_j = state.state_vector[j];
528                                Some(vec![(i, amp_j), (j, amp_i)])
529                            } else {
530                                None
531                            }
532                        })
533                        .flatten()
534                        .collect();
535                    for (idx, val) in updates {
536                        new_state_vec[idx] = val;
537                    }
538                }
539                Pauli::Y => {
540                    let updates: Vec<(usize, Complex<f64>)> = (0..dim)
541                        .into_par_iter()
542                        .filter_map(|i| {
543                            if check_controls(i, control_qubits) && ((i >> target_qubit) & 1 == 0) {
544                                let j = i | (1 << target_qubit);
545                                let amp_i = state.state_vector[i];
546                                let amp_j = state.state_vector[j];
547                                Some(vec![(i, -i_complex * amp_j), (j, i_complex * amp_i)])
548                            } else {
549                                None
550                            }
551                        })
552                        .flatten()
553                        .collect();
554                    for (idx, val) in updates {
555                        new_state_vec[idx] = val;
556                    }
557                }
558                Pauli::Z => {
559                    new_state_vec
560                        .par_iter_mut()
561                        .enumerate()
562                        .for_each(|(i, current_amp_ref)| {
563                            if check_controls(i, control_qubits) && ((i >> target_qubit) & 1 == 1) {
564                                *current_amp_ref = -state.state_vector[i];
565                            }
566                        });
567                }
568            }
569        } else {
570            // Sequential implementation
571            for i in 0..dim {
572                if check_controls(i, control_qubits) {
573                    match self {
574                        Pauli::X => {
575                            if (i >> target_qubit) & 1 == 0 {
576                                let j = i | (1 << target_qubit);
577                                let amp_i = state.state_vector[i];
578                                let amp_j = state.state_vector[j];
579                                new_state_vec[i] = amp_j;
580                                new_state_vec[j] = amp_i;
581                            }
582                        }
583                        Pauli::Y => {
584                            if (i >> target_qubit) & 1 == 0 {
585                                let j = i | (1 << target_qubit);
586                                let amp_i = state.state_vector[i];
587                                let amp_j = state.state_vector[j];
588                                new_state_vec[i] = -i_complex * amp_j;
589                                new_state_vec[j] = i_complex * amp_i;
590                            }
591                        }
592                        Pauli::Z => {
593                            if (i >> target_qubit) & 1 == 1 {
594                                new_state_vec[i] = -state.state_vector[i];
595                            }
596                        }
597                    }
598                }
599            }
600        }
601
602        Ok(State {
603            state_vector: new_state_vec,
604            num_qubits: state.num_qubits(),
605        })
606    }
607
608    fn base_qubits(&self) -> usize {
609        1 // Pauli operators act on 1 qubit only
610    }
611
612    fn to_compilable(&self) -> Option<&dyn Compilable> {
613        Some(self) // Manual implementation for enum
614    }
615}
616
617impl std::fmt::Display for Pauli {
618    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
619        match self {
620            Pauli::X => write!(f, "X"),
621            Pauli::Y => write!(f, "Y"),
622            Pauli::Z => write!(f, "Z"),
623        }
624    }
625}
626
627impl Pauli {
628    /// Returns the Pauli string representation of the operator.
629    /// 
630    /// # Returns
631    /// * A Pauli string with the specified Pauli operator on the target qubit and a coefficient of 1.0.
632    pub fn to_pauli_string(&self, target_qubit: usize) -> PauliString {
633        let mut pauli_map = std::collections::HashMap::new();
634        pauli_map.insert(target_qubit, *self);
635        PauliString::with_ops(1.0.into(), pauli_map)
636    }
637}
638
639/// Defines a CNOT operator.
640///
641/// A two-qubit operator that flips the target qubit if the control qubit is in the |1> state.
642#[derive(Debug, Clone, Copy)]
643pub struct CNOT;
644
645impl Operator for CNOT {
646    /// Applies the CNOT operator to the given state's target qubit, using the control qubit.
647    ///
648    /// # Arguments:
649    ///
650    /// * `state` - The state to apply the operator to.
651    ///
652    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
653    ///
654    /// * `control_qubits` - The control qubits for the operator. This should be a single qubit.
655    ///
656    /// # Returns:
657    ///
658    /// * The new state after applying the CNOT operator.
659    ///
660    /// # Errors:
661    ///
662    /// * `Error::InvalidNumberOfQubits` - If the target or control qubits is not 1.
663    ///
664    /// * `Error::InvalidQubitIndex` - If the target or control qubit index is invalid for the number of qubits in the state.
665    ///
666    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
667    fn apply(
668        &self,
669        state: &State,
670        target_qubits: &[usize],
671        control_qubits: &[usize],
672    ) -> Result<State, Error> {
673        // Validation
674        validate_qubits(state, target_qubits, control_qubits, 1)?;
675
676        // Additional validation for CNOT: exactly one control qubit
677        if control_qubits.len() != 1 {
678            return Err(Error::InvalidNumberOfQubits(control_qubits.len()));
679        }
680
681        let control_qubit: usize = control_qubits[0];
682
683        // Apply CNOT operator (same as Pauli-X with 1 control qubit)
684        Pauli::X.apply(state, target_qubits, &[control_qubit])
685    }
686
687    fn base_qubits(&self) -> usize {
688        2 // CNOT acts on 2 qubits (1 control, 1 target)
689    }
690
691    fn to_compilable(&self) -> Option<&dyn Compilable> {
692        Some(self)
693    }
694}
695
696impl std::fmt::Display for CNOT {
697    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
698        write!(f, "CNOT")
699    }
700}
701
702/// Defines a SWAP operator.
703///
704/// A two-qubit operator that swaps the states of the two qubits.
705#[derive(Debug, Clone, Copy)]
706pub struct SWAP;
707
708impl Operator for SWAP {
709    /// Applies the SWAP operator to the given state's target qubits.
710    ///
711    /// # Arguments:
712    ///
713    /// * `state` - The state to apply the operator to.
714    ///
715    /// * `target_qubits` - The target qubits to apply the operator to. This should be two qubits.
716    ///
717    /// * `control_qubits` - The control qubits. If empty, the swap is unconditional. Otherwise, the swap occurs only if all control qubits are |1> for the relevant basis states.
718    /// # Returns:
719    ///
720    /// * The new state after applying the SWAP operator.
721    ///
722    /// # Errors:
723    ///
724    /// * `Error::InvalidNumberOfQubits` - If the target qubits are not 2 different qubits.
725    ///
726    /// * `Error::InvalidQubitIndex` - If the target qubit indices are invalid for the number of qubits in the state.
727    ///
728    /// * `Error::InvalidQubitIndex` - If the target qubit indices are not different.
729    ///
730    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
731    fn apply(
732        &self,
733        state: &State,
734        target_qubits: &[usize],
735        control_qubits: &[usize],
736    ) -> Result<State, Error> {
737        // Validation
738        validate_qubits(state, target_qubits, control_qubits, 2)?;
739
740        let target_qubit_1: usize = target_qubits[0];
741        let target_qubit_2: usize = target_qubits[1];
742        let num_qubits: usize = state.num_qubits();
743
744        // Apply potentially controlled SWAP operator
745        let dim: usize = 1 << num_qubits;
746        #[allow(unused_assignments)] // new_state_vec might be reassigned by GPU path
747        let mut new_state_vec = state.state_vector.clone(); // Start with a copy
748        let gpu_enabled: bool = cfg!(feature = "gpu");
749
750        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
751            #[cfg(feature = "gpu")]
752            {
753                // For SWAP, global_work_size is 2^(N-2) because each work item handles
754                // a pair of states differing at target_qubit_1 and target_qubit_2.
755                // The kernel iterates over the 2^(N-2) combinations of other qubits.
756                let global_work_size = if num_qubits >= 2 {
757                    1 << (num_qubits - 2)
758                } else {
759                    1
760                }; // Handle N=0,1 edge cases for work size
761                new_state_vec = execute_on_gpu(
762                    state,
763                    target_qubit_1, // target_a in kernel
764                    control_qubits,
765                    KernelType::Swap,
766                    global_work_size,
767                    GpuKernelArgs::SwapTarget {
768                        q1: target_qubit_2 as i32,
769                    }, // target_b in kernel
770                )?;
771            }
772        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
773            // Parallel implementation
774            let updates: Vec<(usize, Complex<f64>)> = (0..dim)
775                .into_par_iter()
776                .filter_map(|i| {
777                    let target_bit_1 = (i >> target_qubit_1) & 1;
778                    let target_bit_2 = (i >> target_qubit_2) & 1;
779
780                    if target_bit_1 != target_bit_2 {
781                        let j = i ^ (1 << target_qubit_1) ^ (1 << target_qubit_2);
782                        if i < j && check_controls(i, control_qubits) {
783                            let amp_i = state.state_vector[i];
784                            let amp_j = state.state_vector[j];
785                            Some(vec![(i, amp_j), (j, amp_i)])
786                        } else {
787                            None
788                        }
789                    } else {
790                        None
791                    }
792                })
793                .flatten()
794                .collect();
795            for (idx, val) in updates {
796                new_state_vec[idx] = val;
797            }
798        } else {
799            // Sequential implementation
800            for i in 0..dim {
801                let target_bit_1 = (i >> target_qubit_1) & 1;
802                let target_bit_2 = (i >> target_qubit_2) & 1;
803
804                if target_bit_1 != target_bit_2 {
805                    let j = i ^ (1 << target_qubit_1) ^ (1 << target_qubit_2);
806                    if i < j && check_controls(i, control_qubits) {
807                        let amp_i = state.state_vector[i];
808                        let amp_j = state.state_vector[j];
809                        new_state_vec[i] = amp_j;
810                        new_state_vec[j] = amp_i;
811                    }
812                }
813            }
814        }
815
816        Ok(State {
817            state_vector: new_state_vec,
818            num_qubits: state.num_qubits(),
819        })
820    }
821
822    fn base_qubits(&self) -> usize {
823        2 // SWAP acts on 2 qubits
824    }
825
826    fn to_compilable(&self) -> Option<&dyn Compilable> {
827        Some(self)
828    }
829}
830
831impl std::fmt::Display for SWAP {
832    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
833        write!(f, "SWAP")
834    }
835}
836
837/// Defines a matchgate operator.
838///
839/// A two-qubit operator that applies a matchgate transformation to the adjacent target qubits.
840/// This gate can be decomposed into a two-qubit rotation and phase shifts.
841/// It is designed to simulate nearest-neighbor interactions in fermionic systems.
842///
843/// It resembles the matrix operator:
844/// ```
845/// [1 0 0 0; 0 cos(theta/2) -e^(i*phi1)sin(theta/2) 0; 0 sin(theta/2) e^(i*phi2)cos(theta/2) 0; 0 0 0 e^(i*phi2)]
846/// ```
847///
848/// # Warning
849///
850/// This gate is not yet compilable to OpenQASM, since it requires advanced decomposition techniques.
851#[derive(Debug, Clone, Copy)]
852pub struct Matchgate {
853    pub(crate) theta: f64,
854    pub(crate) phi1: f64,
855    pub(crate) phi2: f64,
856}
857
858impl Matchgate {
859    /// Creates a new Matchgate operator with the specified parameters.
860    ///
861    /// # Arguments:
862    ///
863    /// * `theta` - The angle of rotation in radians.
864    /// * `phi1` - The first phase shift in radians.
865    /// * `phi2` - The second phase shift in radians.
866    pub fn new(theta: f64, phi1: f64, phi2: f64) -> Self {
867        Matchgate { theta, phi1, phi2 }
868    }
869}
870
871impl Operator for Matchgate {
872    /// Applies the matchgate operator to the given state's target qubit and its adjacent qubit.
873    ///
874    /// # Arguments:
875    ///
876    /// * `state` - The state to apply the operator to.
877    ///
878    /// * `target_qubit` - The target qubit to apply the operator to. The adjacent is automatically determined as the next qubit.
879    ///
880    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
881    ///
882    /// # Returns:
883    ///
884    /// * The new state after applying the matchgate operator.
885    ///
886    /// # Errors:
887    ///
888    /// * `Error::InvalidNumberOfQubits` - If the number of target qubits is not 1.
889    ///
890    /// * `Error::InvalidQubitIndex` - If the target qubit indices are invalid for the number of qubits in the state, or if the target qubit is the last qubit.
891    ///
892    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
893    fn apply(
894        &self,
895        state: &State,
896        target_qubit: &[usize],
897        control_qubits: &[usize],
898    ) -> Result<State, Error> {
899        validate_qubits(state, target_qubit, control_qubits, 1)?;
900
901        let q1 = target_qubit[0];
902        // If q1 is the last qubit, return error
903        if q1 == state.num_qubits() - 1 {
904            return Err(Error::InvalidQubitIndex(q1, state.num_qubits()));
905        }
906
907        let q2 = q1 + 1; // q2 is always the next adjacent qubit.
908
909        let num_qubits = state.num_qubits();
910        let mut new_state_vec = state.state_vector.clone();
911        let gpu_enabled: bool = cfg!(feature = "gpu");
912
913        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
914            #[cfg(feature = "gpu")]
915            {
916                let global_work_size = 1 << (num_qubits - 2);
917                let cos_theta_half = (self.theta / 2.0).cos() as f32;
918                let sin_theta_half = (self.theta / 2.0).sin() as f32;
919                let exp_i_phi1 = Complex::new(0.0, self.phi1).exp();
920                let exp_i_phi2 = Complex::new(0.0, self.phi2).exp();
921
922                new_state_vec = execute_on_gpu(
923                    state,
924                    q1,
925                    control_qubits,
926                    KernelType::Matchgate,
927                    global_work_size,
928                    GpuKernelArgs::Matchgate {
929                        q1: q2 as i32,
930                        cos_theta_half,
931                        sin_theta_half,
932                        exp_i_phi1: Float2::new(exp_i_phi1.re as f32, exp_i_phi1.im as f32),
933                        exp_i_phi2: Float2::new(exp_i_phi2.re as f32, exp_i_phi2.im as f32),
934                    },
935                )?;
936            }
937        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
938            // Parallel implementation
939            let cos_theta_half = (self.theta / 2.0).cos();
940            let sin_theta_half = (self.theta / 2.0).sin();
941            let exp_i_phi1 = Complex::new(0.0, self.phi1).exp();
942            let exp_i_phi2 = Complex::new(0.0, self.phi2).exp();
943
944            let updates: Vec<(usize, Complex<f64>)> = (0..(1 << (num_qubits - 2)))
945                .into_par_iter()
946                .flat_map(|i| {
947                    let k = ((i >> q1) << (q1 + 1)) | (i & ((1 << q1) - 1));
948                    let l = ((k >> (q2 - 1)) << q2) | (k & ((1 << (q2 - 1)) - 1));
949
950                    let i01 = l | (1 << q1);
951                    let i10 = l | (1 << q2);
952                    let i11 = l | (1 << q1) | (1 << q2);
953
954                    let mut updates = Vec::new();
955
956                    if check_controls(i01, control_qubits) {
957                        let amp01 = state.state_vector[i01];
958                        let amp10 = state.state_vector[i10];
959
960                        let new_amp01 =
961                            cos_theta_half * amp01 - exp_i_phi1 * sin_theta_half * amp10;
962                        let new_amp10 =
963                            sin_theta_half * amp01 + exp_i_phi1 * cos_theta_half * amp10;
964
965                        updates.push((i01, new_amp01));
966                        updates.push((i10, new_amp10));
967                    }
968
969                    if check_controls(i11, control_qubits) {
970                        let new_amp11 = state.state_vector[i11] * exp_i_phi2;
971                        updates.push((i11, new_amp11));
972                    }
973                    updates
974                })
975                .collect();
976
977            for (idx, val) in updates {
978                new_state_vec[idx] = val;
979            }
980        } else {
981            // Sequential implementation
982            let cos_theta_half = (self.theta / 2.0).cos();
983            let sin_theta_half = (self.theta / 2.0).sin();
984            let exp_i_phi1 = Complex::new(0.0, self.phi1).exp();
985            let exp_i_phi2 = Complex::new(0.0, self.phi2).exp();
986
987            for i in 0..(1 << (num_qubits - 2)) {
988                let k = ((i >> q1) << (q1 + 1)) | (i & ((1 << q1) - 1));
989                let l = ((k >> (q2 - 1)) << q2) | (k & ((1 << (q2 - 1)) - 1));
990
991                let i01 = l | (1 << q1);
992                let i10 = l | (1 << q2);
993                let i11 = l | (1 << q1) | (1 << q2);
994
995                if check_controls(i01, control_qubits) {
996                    let amp01 = state.state_vector[i01];
997                    let amp10 = state.state_vector[i10];
998
999                    new_state_vec[i01] =
1000                        cos_theta_half * amp01 - exp_i_phi1 * sin_theta_half * amp10;
1001                    new_state_vec[i10] =
1002                        sin_theta_half * amp01 + exp_i_phi1 * cos_theta_half * amp10;
1003                }
1004                if check_controls(i11, control_qubits) {
1005                    new_state_vec[i11] = state.state_vector[i11] * exp_i_phi2;
1006                }
1007            }
1008        }
1009
1010        Ok(State {
1011            state_vector: new_state_vec,
1012            num_qubits,
1013        })
1014    }
1015
1016    fn base_qubits(&self) -> usize {
1017        2
1018    }
1019}
1020
1021impl std::fmt::Display for Matchgate {
1022    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1023        write!(f, "Matchgate({:.3}, {:.3}, {:.3})", self.theta, self.phi1, self.phi2)
1024    }
1025}
1026
1027/// Defines a Toffoli operator.
1028///
1029/// A three-qubit operator that flips the target qubit if both control qubits are in the |1> state. Also known as CCNOT (Controlled-Controlled-NOT).
1030#[derive(Debug, Clone, Copy)]
1031pub struct Toffoli;
1032
1033impl Operator for Toffoli {
1034    /// Applies the Toffoli operator to the given state's target qubit, using the control qubits.
1035    ///
1036    /// # Arguments:
1037    ///
1038    /// * `state` - The state to apply the operator to.
1039    ///
1040    /// * `target_qubits` - The target qubit to apply the operator to. This should be a single qubit.
1041    ///
1042    /// * `control_qubits` - The control qubits for the operator. This should be two qubits.
1043    ///
1044    /// # Returns:
1045    ///
1046    /// * The new state after applying the Toffoli operator.
1047    ///
1048    /// # Errors:
1049    ///
1050    /// * `Error::InvalidNumberOfQubits` - If the target or control qubits are not 1 and 2 respectively, or if the control qubits are not different.
1051    ///
1052    /// * `Error::InvalidQubitIndex` - If the target or control qubit indices are invalid for the number of qubits in the state.
1053    ///
1054    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
1055    fn apply(
1056        &self,
1057        state: &State,
1058        target_qubits: &[usize],
1059        control_qubits: &[usize],
1060    ) -> Result<State, Error> {
1061        // Validation
1062        validate_qubits(state, target_qubits, control_qubits, 1)?;
1063
1064        // Additional validation for Toffoli: exactly two control qubits
1065        if control_qubits.len() != 2 {
1066            return Err(Error::InvalidNumberOfQubits(control_qubits.len()));
1067        }
1068
1069        // Additional validation for Toffoli: control qubits must be different
1070        if control_qubits[0] == control_qubits[1] {
1071            return Err(Error::InvalidNumberOfQubits(control_qubits.len()));
1072        }
1073
1074        Pauli::X.apply(state, target_qubits, control_qubits)
1075    }
1076
1077    fn base_qubits(&self) -> usize {
1078        3 // Toffoli acts on 3 qubits (2 control, 1 target)
1079    }
1080
1081    fn to_compilable(&self) -> Option<&dyn Compilable> {
1082        Some(self)
1083    }
1084}
1085
1086impl std::fmt::Display for Toffoli {
1087    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1088        write!(f, "Toffoli")
1089    }
1090}
1091
1092/// Defines an identity operator
1093///
1094/// A single-qubit operator that does not change the state of the qubit.
1095#[derive(Debug, Clone, Copy)]
1096pub struct Identity;
1097
1098impl Operator for Identity {
1099    /// Applies the identity operator to the given state's target qubit.
1100    ///
1101    /// # Arguments:
1102    ///
1103    /// * `state` - The state to apply the operator to.
1104    ///
1105    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1106    ///
1107    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1108    ///
1109    /// # Returns:
1110    ///
1111    /// * The new state after applying the identity operator.
1112    fn apply(
1113        &self,
1114        state: &State,
1115        target_qubits: &[usize],
1116        control_qubits: &[usize],
1117    ) -> Result<State, Error> {
1118        // Validation
1119        validate_qubits(state, target_qubits, control_qubits, 1)?;
1120
1121        // Apply identity operator (no change)
1122        Ok(state.clone())
1123    }
1124
1125    fn base_qubits(&self) -> usize {
1126        1 // Identity acts on 1 qubit only
1127    }
1128
1129    fn to_compilable(&self) -> Option<&dyn Compilable> {
1130        Some(self)
1131    }
1132}
1133
1134impl std::fmt::Display for Identity {
1135    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1136        write!(f, "I")
1137    }
1138}
1139
1140/// Defines a Phase S operator.
1141///
1142/// A single-qubit operator that applies a phase shift to the |1> state. Also known as the S gate or Phase gate.
1143#[derive(Debug, Clone, Copy)]
1144pub struct PhaseS;
1145
1146impl Operator for PhaseS {
1147    /// Applies the Phase S operator to the given state's target qubit.
1148    ///
1149    /// # Arguments:
1150    ///
1151    /// * `state` - The state to apply the operator to.
1152    ///
1153    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1154    ///
1155    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1156    ///
1157    /// # Returns:
1158    ///
1159    /// * The new state after applying the Phase S operator.
1160    fn apply(
1161        &self,
1162        state: &State,
1163        target_qubits: &[usize],
1164        control_qubits: &[usize],
1165    ) -> Result<State, Error> {
1166        // Validation
1167        validate_qubits(state, target_qubits, control_qubits, 1)?;
1168
1169        let target_qubit: usize = target_qubits[0];
1170        let num_qubits: usize = state.num_qubits();
1171
1172        // Apply potentially controlled Phase S operator
1173        #[allow(unused_assignments)]
1174        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1175        let gpu_enabled: bool = cfg!(feature = "gpu");
1176
1177        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1178            #[cfg(feature = "gpu")]
1179            {
1180                let global_work_size = 1 << num_qubits;
1181                new_state_vec = execute_on_gpu(
1182                    state,
1183                    target_qubit,
1184                    control_qubits,
1185                    KernelType::PhaseSOrSdag,
1186                    global_work_size,
1187                    GpuKernelArgs::SOrSdag { sign: 1.0f32 },
1188                )?;
1189            }
1190        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1191            let phase_factor = Complex::new(0.0, 1.0); // Phase shift of pi/2 (i)
1192            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1193            new_state_vec
1194                .par_iter_mut()
1195                .enumerate()
1196                .for_each(|(i, current_amp_ref)| {
1197                    if ((i >> target_qubit) & 1 == 1) && check_controls(i, control_qubits) {
1198                        *current_amp_ref = state.state_vector[i] * phase_factor;
1199                    }
1200                });
1201        } else {
1202            let phase_factor = Complex::new(0.0, 1.0); // Phase shift of pi/2 (i)
1203            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1204            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1205                let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1206                if target_bit_is_one && check_controls(i, control_qubits) {
1207                    *current_amp_ref = state.state_vector[i] * phase_factor;
1208                }
1209            }
1210        }
1211
1212        Ok(State {
1213            state_vector: new_state_vec,
1214            num_qubits,
1215        })
1216    }
1217
1218    fn base_qubits(&self) -> usize {
1219        1 // Phase S acts on 1 qubit only
1220    }
1221
1222    fn to_compilable(&self) -> Option<&dyn Compilable> {
1223        Some(self)
1224    }
1225}
1226
1227impl std::fmt::Display for PhaseS {
1228    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1229        write!(f, "S")
1230    }
1231}
1232
1233/// Defines a Phase T operator.
1234///
1235/// A single-qubit operator that applies a phase shift to the |1> state. Also known as the T gate or π/8 gate.
1236#[derive(Debug, Clone, Copy)]
1237pub struct PhaseT;
1238
1239impl Operator for PhaseT {
1240    /// Applies the Phase T operator to the given state's target qubit.
1241    ///
1242    /// # Arguments:
1243    ///
1244    /// * `state` - The state to apply the operator to.
1245    ///
1246    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1247    ///
1248    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1249    ///
1250    /// # Returns:
1251    ///
1252    /// * The new state after applying the Phase T operator.
1253    fn apply(
1254        &self,
1255        state: &State,
1256        target_qubits: &[usize],
1257        control_qubits: &[usize],
1258    ) -> Result<State, Error> {
1259        // Validation
1260        validate_qubits(state, target_qubits, control_qubits, 1)?;
1261
1262        let target_qubit = target_qubits[0];
1263        let num_qubits = state.num_qubits();
1264
1265        // Apply potentially controlled Phase T operator
1266        #[allow(unused_assignments)]
1267        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1268        let gpu_enabled: bool = cfg!(feature = "gpu");
1269
1270        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1271            #[cfg(feature = "gpu")]
1272            {
1273                let global_work_size = 1 << num_qubits;
1274                let angle = PI / 4.0;
1275                new_state_vec = execute_on_gpu(
1276                    state,
1277                    target_qubit,
1278                    control_qubits,
1279                    KernelType::PhaseShift,
1280                    global_work_size,
1281                    GpuKernelArgs::PhaseShift {
1282                        cos_angle: angle.cos() as f32,
1283                        sin_angle: angle.sin() as f32,
1284                    },
1285                )?;
1286            }
1287        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1288            let invsqrt2: f64 = 1.0 / (2.0f64).sqrt();
1289            let phase_factor = Complex::new(invsqrt2, invsqrt2); // Phase shift of pi/4 (exp(i*pi/4))
1290            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1291            new_state_vec
1292                .par_iter_mut()
1293                .enumerate()
1294                .for_each(|(i, current_amp_ref)| {
1295                    if ((i >> target_qubit) & 1 == 1) && check_controls(i, control_qubits) {
1296                        *current_amp_ref = state.state_vector[i] * phase_factor;
1297                    }
1298                });
1299        } else {
1300            let invsqrt2: f64 = 1.0 / (2.0f64).sqrt();
1301            let phase_factor = Complex::new(invsqrt2, invsqrt2); // Phase shift of pi/4 (exp(i*pi/4))
1302            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1303            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1304                let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1305                if target_bit_is_one && check_controls(i, control_qubits) {
1306                    *current_amp_ref = state.state_vector[i] * phase_factor;
1307                }
1308            }
1309        }
1310
1311        Ok(State {
1312            state_vector: new_state_vec,
1313            num_qubits,
1314        })
1315    }
1316
1317    fn base_qubits(&self) -> usize {
1318        1 // Phase T acts on 1 qubit only
1319    }
1320
1321    fn to_compilable(&self) -> Option<&dyn Compilable> {
1322        Some(self)
1323    }
1324}
1325
1326impl std::fmt::Display for PhaseT {
1327    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1328        write!(f, "T")
1329    }
1330}
1331
1332/// Defines a Phase Sdag operator.
1333///
1334/// A single-qubit operator that applies a phase shift to the |1> state. Also known as the S† gate or Phase† gate. Inverse of S gate.
1335#[derive(Debug, Clone, Copy)]
1336pub struct PhaseSdag;
1337
1338impl Operator for PhaseSdag {
1339    /// Applies the Phase Sdag operator to the given state's target qubit.
1340    ///
1341    /// # Arguments:
1342    ///
1343    /// * `state` - The state to apply the operator to.
1344    ///
1345    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1346    ///
1347    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1348    ///
1349    /// # Returns:
1350    ///
1351    /// * The new state after applying the Phase Sdag operator.
1352    fn apply(
1353        &self,
1354        state: &State,
1355        target_qubits: &[usize],
1356        control_qubits: &[usize],
1357    ) -> Result<State, Error> {
1358        // Validation
1359        validate_qubits(state, target_qubits, control_qubits, 1)?;
1360
1361        let target_qubit = target_qubits[0];
1362        let num_qubits = state.num_qubits();
1363
1364        // Apply potentially controlled Phase Sdag operator
1365        #[allow(unused_assignments)]
1366        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1367        let gpu_enabled: bool = cfg!(feature = "gpu");
1368
1369        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1370            #[cfg(feature = "gpu")]
1371            {
1372                let global_work_size = 1 << num_qubits;
1373                new_state_vec = execute_on_gpu(
1374                    state,
1375                    target_qubit,
1376                    control_qubits,
1377                    KernelType::PhaseSOrSdag,
1378                    global_work_size,
1379                    GpuKernelArgs::SOrSdag { sign: -1.0f32 },
1380                )?;
1381            }
1382        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1383            let phase_factor = Complex::new(0.0, -1.0); // Phase shift of -pi/2 (-i)
1384            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1385            new_state_vec
1386                .par_iter_mut()
1387                .enumerate()
1388                .for_each(|(i, current_amp_ref)| {
1389                    if ((i >> target_qubit) & 1 == 1) && check_controls(i, control_qubits) {
1390                        *current_amp_ref = state.state_vector[i] * phase_factor;
1391                    }
1392                });
1393        } else {
1394            let phase_factor = Complex::new(0.0, -1.0); // Phase shift of -pi/2 (-i)
1395            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1396            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1397                let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1398                if target_bit_is_one && check_controls(i, control_qubits) {
1399                    *current_amp_ref = state.state_vector[i] * phase_factor;
1400                }
1401            }
1402        }
1403
1404        Ok(State {
1405            state_vector: new_state_vec,
1406            num_qubits,
1407        })
1408    }
1409
1410    fn base_qubits(&self) -> usize {
1411        1 // Phase Sdag acts on 1 qubit only
1412    }
1413
1414    fn to_compilable(&self) -> Option<&dyn Compilable> {
1415        Some(self)
1416    }
1417}
1418
1419impl std::fmt::Display for PhaseSdag {
1420    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1421        write!(f, "S†")
1422    }
1423}
1424
1425/// Defines a Phase Tdag operator.
1426///
1427/// A single-qubit operator that applies a phase shift to the |1> state. Also known as the T† gate or π/8† gate. Inverse of T gate.
1428#[derive(Debug, Clone, Copy)]
1429pub struct PhaseTdag;
1430
1431impl Operator for PhaseTdag {
1432    /// Applies the Phase Tdag operator to the given state's target qubit.
1433    ///
1434    /// # Arguments:
1435    ///
1436    /// * `state` - The state to apply the operator to.
1437    ///
1438    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1439    ///
1440    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1441    ///
1442    /// # Returns:
1443    ///
1444    /// * The new state after applying the Phase Tdag operator.
1445    fn apply(
1446        &self,
1447        state: &State,
1448        target_qubits: &[usize],
1449        control_qubits: &[usize],
1450    ) -> Result<State, Error> {
1451        // Validation
1452        validate_qubits(state, target_qubits, control_qubits, 1)?;
1453
1454        let target_qubit = target_qubits[0];
1455        let num_qubits = state.num_qubits();
1456
1457        // Apply potentially controlled Phase Tdag operator
1458        #[allow(unused_assignments)]
1459        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1460        let gpu_enabled: bool = cfg!(feature = "gpu");
1461
1462        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1463            #[cfg(feature = "gpu")]
1464            {
1465                let global_work_size = 1 << num_qubits;
1466                let angle = -PI / 4.0;
1467                new_state_vec = execute_on_gpu(
1468                    state,
1469                    target_qubit,
1470                    control_qubits,
1471                    KernelType::PhaseShift,
1472                    global_work_size,
1473                    GpuKernelArgs::PhaseShift {
1474                        cos_angle: angle.cos() as f32,
1475                        sin_angle: angle.sin() as f32,
1476                    },
1477                )?;
1478            }
1479        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1480            let invsqrt2: f64 = 1.0 / (2.0f64).sqrt();
1481            let phase_factor = Complex::new(invsqrt2, -invsqrt2);
1482            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1483            new_state_vec
1484                .par_iter_mut()
1485                .enumerate()
1486                .for_each(|(i, current_amp_ref)| {
1487                    if ((i >> target_qubit) & 1 == 1) && check_controls(i, control_qubits) {
1488                        *current_amp_ref = state.state_vector[i] * phase_factor;
1489                    }
1490                });
1491        } else {
1492            let invsqrt2: f64 = 1.0 / (2.0f64).sqrt();
1493            let phase_factor = Complex::new(invsqrt2, -invsqrt2);
1494            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1495            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1496                let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1497                if target_bit_is_one && check_controls(i, control_qubits) {
1498                    *current_amp_ref = state.state_vector[i] * phase_factor;
1499                }
1500            }
1501        }
1502
1503        Ok(State {
1504            state_vector: new_state_vec,
1505            num_qubits,
1506        })
1507    }
1508
1509    fn base_qubits(&self) -> usize {
1510        1 // Phase Tdag acts on 1 qubit only
1511    }
1512
1513    fn to_compilable(&self) -> Option<&dyn Compilable> {
1514        Some(self)
1515    }
1516}
1517
1518impl std::fmt::Display for PhaseTdag {
1519    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1520        write!(f, "T†")
1521    }
1522}
1523
1524/// Defines the phase shift operator
1525///
1526/// A single-qubit operator that applies a phase shift of the provided angle to the |1> state. Also known as the phase shift gate.
1527#[derive(Debug, Clone, Copy)]
1528pub struct PhaseShift {
1529    pub(crate) angle: f64,
1530}
1531
1532impl PhaseShift {
1533    /// Creates a new PhaseShift operator with the given angle.
1534    ///
1535    /// # Arguments:
1536    ///
1537    /// * `angle` - The angle of the phase shift in radians.
1538    pub fn new(angle: f64) -> Self {
1539        PhaseShift { angle }
1540    }
1541}
1542
1543impl Operator for PhaseShift {
1544    /// Applies the phase shift operator to the given state's target qubit.
1545    ///
1546    /// # Arguments:
1547    ///
1548    /// * `state` - The state to apply the operator to.
1549    ///
1550    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1551    ///
1552    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1553    ///
1554    /// # Returns:
1555    ///
1556    /// * The new state after applying the phase shift operator.
1557    ///
1558    /// # Errors:
1559    ///
1560    /// * `Error::InvalidNumberOfQubits` - If the number of target qubits is not 1.
1561    ///
1562    /// * `Error::InvalidQubitIndex` - If the target qubit index or control qubit index is invalid for the number of qubits in the state.
1563    ///
1564    /// * `Error::OverlappingControlAndTargetQubits` - If the control qubit and target qubit indices overlap.
1565    fn apply(
1566        &self,
1567        state: &State,
1568        target_qubits: &[usize],
1569        control_qubits: &[usize],
1570    ) -> Result<State, Error> {
1571        // Validation
1572        validate_qubits(state, target_qubits, control_qubits, 1)?;
1573
1574        let target_qubit = target_qubits[0];
1575        let num_qubits = state.num_qubits();
1576
1577        // Apply potentially controlled Phase Shift operator
1578        #[allow(unused_assignments)]
1579        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1580        let gpu_enabled: bool = cfg!(feature = "gpu");
1581
1582        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1583            #[cfg(feature = "gpu")]
1584            {
1585                let global_work_size = 1 << num_qubits;
1586                new_state_vec = execute_on_gpu(
1587                    state,
1588                    target_qubit,
1589                    control_qubits,
1590                    KernelType::PhaseShift,
1591                    global_work_size,
1592                    GpuKernelArgs::PhaseShift {
1593                        cos_angle: self.angle.cos() as f32,
1594                        sin_angle: self.angle.sin() as f32,
1595                    },
1596                )?;
1597            }
1598        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1599            let phase_factor = Complex::new(self.angle.cos(), self.angle.sin());
1600            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1601            new_state_vec
1602                .par_iter_mut()
1603                .enumerate()
1604                .for_each(|(i, current_amp_ref)| {
1605                    if ((i >> target_qubit) & 1 == 1) && check_controls(i, control_qubits) {
1606                        *current_amp_ref = state.state_vector[i] * phase_factor;
1607                    }
1608                });
1609        } else {
1610            let phase_factor = Complex::new(self.angle.cos(), self.angle.sin());
1611            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1612            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
1613                let target_bit_is_one = (i >> target_qubit) & 1 == 1;
1614                if target_bit_is_one && check_controls(i, control_qubits) {
1615                    *current_amp_ref = state.state_vector[i] * phase_factor;
1616                }
1617            }
1618        }
1619
1620        Ok(State {
1621            state_vector: new_state_vec,
1622            num_qubits,
1623        })
1624    }
1625
1626    fn base_qubits(&self) -> usize {
1627        1 // Phase shift acts on 1 qubit only
1628    }
1629
1630    fn to_compilable(&self) -> Option<&dyn Compilable> {
1631        Some(self)
1632    }
1633}
1634
1635impl std::fmt::Display for PhaseShift {
1636    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1637        write!(f, "P({:.3})", self.angle)
1638    }
1639}
1640
1641/// Defines the rotate-X operator
1642///
1643/// A single-qubit operator that applies a rotation around the X axis of the Bloch sphere by the given angle. Also known as the RX gate.
1644#[derive(Debug, Clone, Copy)]
1645pub struct RotateX {
1646    pub(crate) angle: f64,
1647}
1648
1649impl RotateX {
1650    /// Creates a new RotateX operator with the given angle.
1651    ///
1652    /// # Arguments:
1653    ///
1654    /// * `angle` - The angle of rotation in radians.
1655    pub fn new(angle: f64) -> Self {
1656        RotateX { angle }
1657    }
1658}
1659
1660impl Operator for RotateX {
1661    /// Applies the RotateX operator to the given state's target qubit.
1662    ///
1663    /// # Arguments:
1664    ///
1665    /// * `state` - The state to apply the operator to.
1666    ///
1667    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1668    ///
1669    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1670    ///
1671    /// # Returns:
1672    ///
1673    /// * The new state after applying the RotateX operator.
1674    fn apply(
1675        &self,
1676        state: &State,
1677        target_qubits: &[usize],
1678        control_qubits: &[usize],
1679    ) -> Result<State, Error> {
1680        // Validation
1681        validate_qubits(state, target_qubits, control_qubits, 1)?;
1682
1683        let target_qubit = target_qubits[0];
1684        let num_qubits = state.num_qubits();
1685
1686        // Apply potentially controlled RotateX operator
1687        #[allow(unused_assignments)]
1688        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1689        let gpu_enabled: bool = cfg!(feature = "gpu");
1690
1691        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1692            #[cfg(feature = "gpu")]
1693            {
1694                let half_angle = self.angle / 2.0;
1695                let global_work_size = if num_qubits > 0 {
1696                    1 << (num_qubits - 1)
1697                } else {
1698                    1
1699                };
1700                new_state_vec = execute_on_gpu(
1701                    state,
1702                    target_qubit,
1703                    control_qubits,
1704                    KernelType::RotateX,
1705                    global_work_size,
1706                    GpuKernelArgs::RotationGate {
1707                        cos_half_angle: half_angle.cos() as f32,
1708                        sin_half_angle: half_angle.sin() as f32,
1709                    },
1710                )?;
1711            }
1712        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1713            // Parallel implementation
1714            let half_angle: f64 = self.angle / 2.0;
1715            let cos_half: f64 = half_angle.cos();
1716            let sin_half: f64 = half_angle.sin();
1717            let i_complex: Complex<f64> = Complex::new(0.0, 1.0);
1718            let dim: usize = 1 << num_qubits;
1719            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1720
1721            let updates: Vec<(usize, Complex<f64>)> = (0..dim)
1722                .into_par_iter()
1723                .filter_map(|i| {
1724                    if ((i >> target_qubit) & 1 == 0) && check_controls(i, control_qubits) {
1725                        let j = i | (1 << target_qubit);
1726                        let amp_i = state.state_vector[i];
1727                        let amp_j = state.state_vector[j];
1728                        Some(vec![
1729                            (i, cos_half * amp_i - i_complex * sin_half * amp_j),
1730                            (j, -i_complex * sin_half * amp_i + cos_half * amp_j),
1731                        ])
1732                    } else {
1733                        None
1734                    }
1735                })
1736                .flatten()
1737                .collect();
1738            for (idx, val) in updates {
1739                new_state_vec[idx] = val;
1740            }
1741        } else {
1742            // Sequential implementation
1743            let half_angle: f64 = self.angle / 2.0;
1744            let cos_half: f64 = half_angle.cos();
1745            let sin_half: f64 = half_angle.sin();
1746            let i_complex: Complex<f64> = Complex::new(0.0, 1.0);
1747            let dim: usize = 1 << num_qubits;
1748            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1749
1750            for i in 0..dim {
1751                if (i >> target_qubit) & 1 == 0 {
1752                    let j = i | (1 << target_qubit);
1753                    if check_controls(i, control_qubits) {
1754                        let amp_i = state.state_vector[i];
1755                        let amp_j = state.state_vector[j];
1756                        new_state_vec[i] = cos_half * amp_i - i_complex * sin_half * amp_j;
1757                        new_state_vec[j] = -i_complex * sin_half * amp_i + cos_half * amp_j;
1758                    }
1759                }
1760            }
1761        }
1762
1763        Ok(State {
1764            state_vector: new_state_vec,
1765            num_qubits: state.num_qubits(),
1766        })
1767    }
1768
1769    fn base_qubits(&self) -> usize {
1770        1 // RotateX acts on 1 qubit only
1771    }
1772
1773    fn to_compilable(&self) -> Option<&dyn Compilable> {
1774        Some(self)
1775    }
1776}
1777
1778impl std::fmt::Display for RotateX {
1779    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1780        write!(f, "RX({:.3})", self.angle)
1781    }
1782}
1783
1784/// Defines the rotate-Y operator
1785///
1786/// A single-qubit operator that applies a rotation around the Y axis of the Bloch sphere by the given angle. Also known as the RY gate.
1787#[derive(Debug, Clone, Copy)]
1788pub struct RotateY {
1789    pub(crate) angle: f64,
1790}
1791
1792impl RotateY {
1793    /// Creates a new RotateY operator with the given angle.
1794    ///
1795    /// # Arguments:
1796    ///
1797    /// * `angle` - The angle of rotation in radians.
1798    pub fn new(angle: f64) -> Self {
1799        RotateY { angle }
1800    }
1801}
1802
1803impl Operator for RotateY {
1804    /// Applies the RotateY operator to the given state's target qubit.
1805    ///
1806    /// # Arguments:
1807    ///
1808    /// * `state` - The state to apply the operator to.
1809    ///
1810    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1811    ///
1812    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1813    ///
1814    /// # Returns:
1815    ///
1816    /// * The new state after applying the RotateY operator.
1817    fn apply(
1818        &self,
1819        state: &State,
1820        target_qubits: &[usize],
1821        control_qubits: &[usize],
1822    ) -> Result<State, Error> {
1823        // Validation
1824        validate_qubits(state, target_qubits, control_qubits, 1)?;
1825
1826        let target_qubit = target_qubits[0];
1827        let num_qubits = state.num_qubits();
1828
1829        // Apply potentially controlled RotateY operator
1830        #[allow(unused_assignments)]
1831        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1832        let gpu_enabled: bool = cfg!(feature = "gpu");
1833
1834        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1835            #[cfg(feature = "gpu")]
1836            {
1837                let half_angle = self.angle / 2.0;
1838                let global_work_size = if num_qubits > 0 {
1839                    1 << (num_qubits - 1)
1840                } else {
1841                    1
1842                };
1843                new_state_vec = execute_on_gpu(
1844                    state,
1845                    target_qubit,
1846                    control_qubits,
1847                    KernelType::RotateY,
1848                    global_work_size,
1849                    GpuKernelArgs::RotationGate {
1850                        cos_half_angle: half_angle.cos() as f32,
1851                        sin_half_angle: half_angle.sin() as f32,
1852                    },
1853                )?;
1854            }
1855        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1856            // Parallel implementation
1857            let half_angle: f64 = self.angle / 2.0;
1858            let cos_half: f64 = half_angle.cos();
1859            let sin_half: f64 = half_angle.sin();
1860            let dim: usize = 1 << num_qubits;
1861            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1862
1863            let updates: Vec<(usize, Complex<f64>)> = (0..dim)
1864                .into_par_iter()
1865                .filter_map(|i| {
1866                    if ((i >> target_qubit) & 1 == 0) && check_controls(i, control_qubits) {
1867                        let j = i | (1 << target_qubit);
1868                        let amp_i = state.state_vector[i];
1869                        let amp_j = state.state_vector[j];
1870                        Some(vec![
1871                            (i, cos_half * amp_i - sin_half * amp_j),
1872                            (j, sin_half * amp_i + cos_half * amp_j),
1873                        ])
1874                    } else {
1875                        None
1876                    }
1877                })
1878                .flatten()
1879                .collect();
1880            for (idx, val) in updates {
1881                new_state_vec[idx] = val;
1882            }
1883        } else {
1884            // Sequential implementation
1885            let half_angle: f64 = self.angle / 2.0;
1886            let cos_half: f64 = half_angle.cos();
1887            let sin_half: f64 = half_angle.sin();
1888            let dim: usize = 1 << num_qubits;
1889            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1890
1891            for i in 0..dim {
1892                if (i >> target_qubit) & 1 == 0 {
1893                    let j = i | (1 << target_qubit);
1894                    if check_controls(i, control_qubits) {
1895                        let amp_i = state.state_vector[i];
1896                        let amp_j = state.state_vector[j];
1897                        new_state_vec[i] = cos_half * amp_i - sin_half * amp_j;
1898                        new_state_vec[j] = sin_half * amp_i + cos_half * amp_j;
1899                    }
1900                }
1901            }
1902        }
1903
1904        Ok(State {
1905            state_vector: new_state_vec,
1906            num_qubits: state.num_qubits(),
1907        })
1908    }
1909
1910    fn base_qubits(&self) -> usize {
1911        1 // RotateY acts on 1 qubit only
1912    }
1913
1914    fn to_compilable(&self) -> Option<&dyn Compilable> {
1915        Some(self)
1916    }
1917}
1918
1919impl std::fmt::Display for RotateY {
1920    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1921        write!(f, "RY({:.3})", self.angle)
1922    }
1923}
1924
1925/// Defines the rotate-Z operator
1926///
1927/// A single-qubit operator that applies a rotation around the Z axis of the Bloch sphere by the given angle. Also known as the RZ gate.
1928#[derive(Debug, Clone, Copy)]
1929pub struct RotateZ {
1930    pub(crate) angle: f64,
1931}
1932
1933impl RotateZ {
1934    /// Creates a new RotateZ operator with the given angle.
1935    ///
1936    /// # Arguments:
1937    ///
1938    /// * `angle` - The angle of rotation in radians.
1939    pub fn new(angle: f64) -> Self {
1940        RotateZ { angle }
1941    }
1942}
1943
1944impl Operator for RotateZ {
1945    /// Applies the RotateZ operator to the given state's target qubit.
1946    ///
1947    /// # Arguments:
1948    ///
1949    /// * `state` - The state to apply the operator to.
1950    ///
1951    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
1952    ///
1953    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
1954    ///
1955    /// # Returns:
1956    ///
1957    /// * The new state after applying the RotateZ operator.
1958    fn apply(
1959        &self,
1960        state: &State,
1961        target_qubits: &[usize],
1962        control_qubits: &[usize],
1963    ) -> Result<State, Error> {
1964        // Validation
1965        validate_qubits(state, target_qubits, control_qubits, 1)?;
1966
1967        let target_qubit = target_qubits[0];
1968        let num_qubits = state.num_qubits();
1969
1970        // Apply potentially controlled RotateZ operator
1971        #[allow(unused_assignments)]
1972        let mut new_state_vec: Vec<Complex<f64>> = state.state_vector.clone();
1973        let gpu_enabled: bool = cfg!(feature = "gpu");
1974
1975        if num_qubits >= OPENCL_THRESHOLD_NUM_QUBITS && gpu_enabled {
1976            #[cfg(feature = "gpu")]
1977            {
1978                let half_angle = self.angle / 2.0;
1979                let global_work_size = 1 << num_qubits; // N work items for RZ
1980                new_state_vec = execute_on_gpu(
1981                    state,
1982                    target_qubit,
1983                    control_qubits,
1984                    KernelType::RotateZ,
1985                    global_work_size,
1986                    GpuKernelArgs::RotationGate {
1987                        cos_half_angle: half_angle.cos() as f32,
1988                        sin_half_angle: half_angle.sin() as f32,
1989                    },
1990                )?;
1991            }
1992        } else if num_qubits >= PARALLEL_THRESHOLD_NUM_QUBITS {
1993            // Parallel implementation
1994            let half_angle = self.angle / 2.0;
1995            let phase_0 = Complex::new(half_angle.cos(), -half_angle.sin());
1996            let phase_1 = Complex::new(half_angle.cos(), half_angle.sin());
1997            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
1998
1999            new_state_vec
2000                .par_iter_mut()
2001                .enumerate()
2002                .for_each(|(i, current_amp_ref)| {
2003                    if check_controls(i, control_qubits) {
2004                        let target_bit_is_one = (i >> target_qubit) & 1 == 1;
2005                        if target_bit_is_one {
2006                            *current_amp_ref = state.state_vector[i] * phase_1;
2007                        } else {
2008                            *current_amp_ref = state.state_vector[i] * phase_0;
2009                        }
2010                    }
2011                });
2012        } else {
2013            // Sequential implementation
2014            let half_angle = self.angle / 2.0;
2015            let phase_0 = Complex::new(half_angle.cos(), -half_angle.sin());
2016            let phase_1 = Complex::new(half_angle.cos(), half_angle.sin());
2017            new_state_vec = state.state_vector.clone(); // Ensure cloned for CPU path
2018
2019            for (i, current_amp_ref) in new_state_vec.iter_mut().enumerate() {
2020                if check_controls(i, control_qubits) {
2021                    let target_bit_is_one = (i >> target_qubit) & 1 == 1;
2022                    if target_bit_is_one {
2023                        *current_amp_ref = state.state_vector[i] * phase_1;
2024                    } else {
2025                        *current_amp_ref = state.state_vector[i] * phase_0;
2026                    }
2027                }
2028            }
2029        }
2030
2031        Ok(State {
2032            state_vector: new_state_vec,
2033            num_qubits: state.num_qubits(),
2034        })
2035    }
2036
2037    fn base_qubits(&self) -> usize {
2038        1 // RotateZ acts on 1 qubit only
2039    }
2040
2041    fn to_compilable(&self) -> Option<&dyn Compilable> {
2042        Some(self)
2043    }
2044}
2045
2046impl std::fmt::Display for RotateZ {
2047    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2048        write!(f, "RZ({:.3})", self.angle)
2049    }
2050}
2051
2052/// An arbitrary 2×2 unitary operator.
2053///
2054/// This operator can be applied to a single qubit in a quantum state. It is represented by a 2×2 unitary matrix.
2055/// The matrix must be unitary.
2056/// The operator can be constructed fallibly from a 2 x 2 unitary matrix, or infallibly from a rotation angle and phase shift angle.
2057#[derive(Debug, Clone, Copy)]
2058pub struct Unitary2 {
2059    /// The 2×2 unitary matrix representing the operator.
2060    pub(crate) matrix: [[Complex<f64>; 2]; 2],
2061    /// Flag to indicate whether the unitary was created from the ry_phase or ry_phase dagger constructor
2062    pub(crate) is_ryphase: IsRyPhase
2063}
2064
2065/// Indicates whether the unitary operator is a standard RY phase operator, a RY phase dagger operator, or neither.
2066#[derive(Debug, Clone, Copy, PartialEq)]
2067pub(crate) enum IsRyPhase {
2068    /// The operator is a standard RY phase operator.
2069    /// Stores the original angles
2070    RYPhase(f64, f64), // (theta, phi)
2071    /// The operator is a RY phase dagger operator.
2072    /// Stores the original angles
2073    RYPhaseDagger(f64, f64), // (theta, phi)
2074    /// The operator is not a standard RY phase operator.
2075    None
2076}
2077
2078impl Unitary2 {
2079    /// Creates a new Unitary2 operator with the given 2×2 unitary matrix.
2080    ///
2081    /// # Arguments:
2082    ///
2083    /// * `matrix` - A 2×2 unitary matrix represented as a 2D array of complex numbers.
2084    ///
2085    /// # Returns:
2086    ///
2087    /// * `Result<Self, Error>` - A result containing the new Unitary2 operator or an error if the matrix is not unitary.
2088    ///
2089    /// # Errors:
2090    ///
2091    /// * `Error::NonUnitaryMatrix` - If the provided matrix is not unitary.
2092    pub fn new(matrix: [[Complex<f64>; 2]; 2]) -> Result<Self, Error> {
2093        // Faster 2×2 unitary check: U U_dagger = I (rows are orthonormal)
2094        let tol: f64 = f64::EPSILON * 2.0; // Tolerance for floating point comparisons
2095        let a: Complex<f64> = matrix[0][0]; // U_00
2096        let b: Complex<f64> = matrix[0][1]; // U_01
2097        let c: Complex<f64> = matrix[1][0]; // U_10
2098        let d: Complex<f64> = matrix[1][1]; // U_11
2099
2100        // Check if each row has norm 1
2101        // Row 0: |a|^2 + |b|^2 == 1
2102        if ((a.norm_sqr() + b.norm_sqr()) - 1.0).abs() > tol {
2103            return Err(Error::NonUnitaryMatrix);
2104        }
2105        // Row 1: |c|^2 + |d|^2 == 1
2106        if ((c.norm_sqr() + d.norm_sqr()) - 1.0).abs() > tol {
2107            return Err(Error::NonUnitaryMatrix);
2108        }
2109
2110        // Check if rows are orthogonal
2111        // Row 0 dot Row 1_conj: a*c_conj + b*d_conj == 0
2112        if (a * c.conj() + b * d.conj()).norm_sqr() > tol * tol {
2113            // Compare norm_sqr with tol^2
2114            return Err(Error::NonUnitaryMatrix);
2115        }
2116
2117        Ok(Unitary2 { matrix, is_ryphase: IsRyPhase::None })
2118    }
2119
2120    /// Creates a new Unitary2 operator from a rotation angle theta and phase shift angle phi.
2121    /// This operator can be decomposed into a rotation around the Y axis followed by a phase shift.
2122    /// The enclosed unitary matrix is guaranteed to be unitary.
2123    ///
2124    /// Special cases include:
2125    ///
2126    /// * U(theta, 0) = RY(theta)
2127    /// * U(0, phi) = PhaseShift(phi)
2128    /// * U(Pi/2, Pi) = Hadamard
2129    /// * U(Pi, Pi) = Pauli-X
2130    ///
2131    /// # Arguments:
2132    ///
2133    /// * `theta` - The rotation angle in radians.
2134    ///
2135    /// * `phi` - The phase shift angle in radians
2136    ///
2137    /// # Returns:
2138    ///
2139    /// * `Self` - A new Unitary2 operator representing the rotation and phase shift.
2140    pub fn from_ry_phase(theta: f64, phi: f64) -> Self {
2141        let cos_half_theta = (theta / 2.0).cos();
2142        let sin_half_theta = (theta / 2.0).sin();
2143        let expi_phi = Complex::new(0.0, phi).exp();
2144
2145        // Construct the unitary matrix
2146        let matrix = [
2147            [
2148                Complex::new(cos_half_theta, 0.0),
2149                -expi_phi * sin_half_theta,
2150            ],
2151            [Complex::new(sin_half_theta, 0.0), expi_phi * cos_half_theta],
2152        ];
2153
2154        // Create Unitary2 operator unchecked
2155        Unitary2 { matrix, is_ryphase: IsRyPhase::RYPhase(theta, phi) }
2156    }
2157
2158    /// Creates a new Unitary2 operator from a rotation angle theta and phase shift angle phi.
2159    /// This operator can be decomposed into a phase shift followed by a rotation around the Y axis.
2160    /// The enclosed unitary matrix is guaranteed to be unitary.
2161    ///
2162    /// This is the adjoint of the `ry_phase` operator.
2163    ///
2164    /// # Arguments:
2165    ///
2166    /// * `theta` - The rotation angle in radians.
2167    ///
2168    /// * `phi` - The phase shift angle in radians.
2169    ///
2170    /// # Returns:
2171    ///
2172    /// * `Self` - A new Unitary2 operator representing the phase shift and rotation.
2173    pub fn from_ry_phase_dagger(theta: f64, phi: f64) -> Self {
2174        let cos_half_theta = (theta / 2.0).cos();
2175        let sin_half_theta = (theta / 2.0).sin();
2176        let expi_neg_phi = Complex::new(0.0, -phi).exp();
2177
2178        // Construct the unitary matrix
2179        let matrix = [
2180            [
2181                Complex::new(cos_half_theta, 0.0),
2182                Complex::new(sin_half_theta, 0.0),
2183            ],
2184            [
2185                -expi_neg_phi * sin_half_theta,
2186                expi_neg_phi * cos_half_theta,
2187            ],
2188        ];
2189
2190        // Create Unitary2 operator unchecked
2191        Unitary2 { matrix, is_ryphase: IsRyPhase::RYPhaseDagger(theta, phi) }
2192    }
2193}
2194
2195impl Operator for Unitary2 {
2196    /// Applies the Unitary2 operator to the given state's target qubit.
2197    ///
2198    /// # Arguments:
2199    ///
2200    /// * `state` - The state to apply the operator to.
2201    ///
2202    /// * `target_qubits` - The target qubits to apply the operator to. This should be a single qubit.
2203    ///
2204    /// * `control_qubits` - The control qubits for the operator. If not empty, the operator will be applied conditionally based on the control qubits. Otherwise, it will be applied unconditionally.
2205    ///
2206    /// # Returns:
2207    ///
2208    /// * The new state after applying the Unitary2 operator.
2209    fn apply(
2210        &self,
2211        state: &State,
2212        target_qubits: &[usize],
2213        control_qubits: &[usize],
2214    ) -> Result<State, Error> {
2215        // Validation
2216        validate_qubits(state, target_qubits, control_qubits, 1)?;
2217
2218        let t: usize = target_qubits[0];
2219        let nq: usize = state.num_qubits();
2220
2221        // Apply 2×2 block on each basis‐pair
2222        let dim = 1 << nq;
2223        let mut new_state_vec = state.state_vector.clone();
2224
2225        if nq >= PARALLEL_THRESHOLD_NUM_QUBITS {
2226            // Parallel implementation
2227            let updates: Vec<(usize, Complex<f64>)> = (0..dim)
2228                .into_par_iter()
2229                .filter_map(|i| {
2230                    if ((i >> t) & 1 == 0) && check_controls(i, control_qubits) {
2231                        let j = i | (1 << t);
2232                        let ai = state.state_vector[i];
2233                        let aj = state.state_vector[j];
2234                        Some(vec![
2235                            (i, self.matrix[0][0] * ai + self.matrix[0][1] * aj),
2236                            (j, self.matrix[1][0] * ai + self.matrix[1][1] * aj),
2237                        ])
2238                    } else {
2239                        None
2240                    }
2241                })
2242                .flatten()
2243                .collect();
2244            for (idx, val) in updates {
2245                new_state_vec[idx] = val;
2246            }
2247        } else {
2248            // Sequential implementation
2249            for i in 0..dim {
2250                if (i >> t) & 1 == 0 {
2251                    let j = i | (1 << t);
2252                    if check_controls(i, control_qubits) {
2253                        let ai = state.state_vector[i];
2254                        let aj = state.state_vector[j];
2255                        new_state_vec[i] = self.matrix[0][0] * ai + self.matrix[0][1] * aj;
2256                        new_state_vec[j] = self.matrix[1][0] * ai + self.matrix[1][1] * aj;
2257                    }
2258                }
2259            }
2260        }
2261
2262        Ok(State {
2263            state_vector: new_state_vec,
2264            num_qubits: nq,
2265        })
2266    }
2267
2268    fn base_qubits(&self) -> usize {
2269        1
2270    }
2271
2272    fn to_compilable(&self) -> Option<&dyn Compilable> {
2273        Some(self)
2274    }
2275}
2276
2277impl std::fmt::Display for Unitary2 {
2278    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2279        match self.is_ryphase {
2280            IsRyPhase::RYPhase(theta, phi) => write!(f, "RYP({:.3}, {:.3})", theta, phi),
2281            IsRyPhase::RYPhaseDagger(theta, phi) => write!(f, "RYP†({:.3}, {:.3})", theta, phi),
2282            IsRyPhase::None => write!(f, "Unitary2"),
2283        }
2284    }
2285}