Skip to main content

prism_q/sim/
dispatch.rs

1use crate::backend::mps::MpsBackend;
2use crate::backend::product::ProductStateBackend;
3use crate::backend::sparse::SparseBackend;
4use crate::backend::stabilizer::StabilizerBackend;
5use crate::backend::statevector::StatevectorBackend;
6use crate::backend::tensornetwork::TensorNetworkBackend;
7use crate::backend::{max_statevector_qubits, Backend};
8use crate::circuit::{Circuit, Instruction};
9use crate::error::{PrismError, Result};
10
11#[cfg(feature = "gpu")]
12use std::sync::Arc;
13
14#[cfg(feature = "gpu")]
15use crate::gpu::GpuContext;
16
17use super::{Probabilities, SimulationResult};
18
19pub(super) enum DispatchAction {
20    Backend(Box<dyn Backend>),
21    StabilizerRank,
22    StochasticPauli { num_samples: usize },
23    DeterministicPauli { epsilon: f64, max_terms: usize },
24}
25
26pub(super) const AUTO_MPS_BOND_DIM: usize = 256;
27
28pub(super) const MAX_AUTO_T_COUNT_EXACT: usize = 18;
29
30pub(super) const MAX_AUTO_T_COUNT_APPROX: usize = 28;
31
32pub(super) const MAX_AUTO_T_COUNT_SHOTS: usize = 40;
33
34pub(super) const MAX_STABILIZER_RANK_QUBITS: usize = 25;
35
36pub(super) const AUTO_APPROX_MAX_TERMS: usize = 8192;
37
38pub(super) const MIN_QUBITS_FOR_SPD_AUTO: usize = 12;
39
40pub(super) const AUTO_SPD_MAX_TERMS: usize = 65536;
41
42pub(super) const MIN_FACTORED_STABILIZER_QUBITS: usize = 128;
43
44pub(super) const MIN_BLOCK_FOR_FACTORED_STAB: usize = 16;
45
46#[inline]
47pub(super) fn stabilizer_rank_budget(num_qubits: usize) -> usize {
48    let log2n = if num_qubits >= 2 {
49        (num_qubits as f64).log2().ceil() as usize * 2
50    } else {
51        0
52    };
53    num_qubits.saturating_sub(log2n)
54}
55
56// GPU crossover threshold and its env override live in `crate::gpu` so users
57// can introspect them without depending on internal dispatch plumbing. The
58// dispatch layer calls `crate::gpu::min_qubits()` directly; there is no
59// private duplicate.
60
61/// Automatically select the optimal backend based on circuit analysis.
62///
63/// Decision tree:
64/// 1. No entangling gates        → ProductState (O(n))
65/// 2. All Clifford gates         → Stabilizer (O(n²))
66/// 3. Clifford+T, t ≤ 12        → StabilizerRank (O(2^t · n²))
67/// 4. Above memory limit:
68///    a. Sparse-friendly         → Sparse (O(k) where k = non-zero amplitudes)
69///    b. Otherwise               → MPS (bounded bond dimension)
70/// 5. Otherwise                  → Statevector (exact, general-purpose)
71#[derive(Debug, Clone)]
72pub enum BackendKind {
73    Auto,
74    Statevector,
75    Stabilizer,
76    Sparse,
77    Mps {
78        max_bond_dim: usize,
79    },
80    ProductState,
81    TensorNetwork,
82    Factored,
83    StabilizerRank,
84    FactoredStabilizer,
85    StochasticPauli {
86        num_samples: usize,
87    },
88    DeterministicPauli {
89        epsilon: f64,
90        max_terms: usize,
91    },
92    /// Statevector backed by a CUDA GPU execution context.
93    ///
94    /// Circuits (or decomposed sub-blocks) with fewer than
95    /// [`crate::gpu::min_qubits()`] qubits (tunable via
96    /// `PRISM_GPU_MIN_QUBITS`, default [`crate::gpu::MIN_QUBITS_DEFAULT`])
97    /// transparently fall back to the host statevector path, since
98    /// small states do not survive PCIe and launch-latency overhead.
99    /// Larger circuits allocate a device-resident state and route gate
100    /// application through GPU kernels.
101    ///
102    /// Compose with [`crate::sim::run_with`] to get fusion + independent-
103    /// subsystem decomposition for free; each sub-block is evaluated against
104    /// the crossover independently. See [`crate::sim::run_with_gpu`] for a
105    /// one-liner wrapper when you already have an `Arc<GpuContext>`.
106    #[cfg(feature = "gpu")]
107    StatevectorGpu {
108        context: Arc<GpuContext>,
109    },
110    /// Stabilizer backend backed by a CUDA GPU tableau.
111    ///
112    /// Circuits (or decomposed sub-blocks) with fewer than
113    /// [`crate::gpu::stabilizer_min_qubits()`] qubits (tunable via
114    /// `PRISM_STABILIZER_GPU_MIN_QUBITS`, default
115    /// [`crate::gpu::STABILIZER_MIN_QUBITS_DEFAULT`]) fall back to the CPU
116    /// stabilizer path. The GPU path routes gate application to device
117    /// kernels. Measurement and reset stay on device, while probabilities and
118    /// export-style helpers still read back to the CPU algorithms.
119    ///
120    /// Compose with [`crate::sim::run_with`] to pick up independent-subsystem
121    /// decomposition; non-Clifford circuits are rejected at dispatch time with
122    /// the same error shape as [`BackendKind::Stabilizer`].
123    #[cfg(feature = "gpu")]
124    StabilizerGpu {
125        context: Arc<GpuContext>,
126    },
127}
128
129impl BackendKind {
130    pub fn supports_noisy_per_shot(&self) -> bool {
131        !matches!(
132            self,
133            BackendKind::StabilizerRank
134                | BackendKind::StochasticPauli { .. }
135                | BackendKind::DeterministicPauli { .. }
136        )
137    }
138
139    pub fn supports_general_noise(&self) -> bool {
140        match self {
141            BackendKind::Auto
142            | BackendKind::Statevector
143            | BackendKind::Sparse
144            | BackendKind::Mps { .. }
145            | BackendKind::ProductState
146            | BackendKind::Factored => true,
147            #[cfg(feature = "gpu")]
148            BackendKind::StatevectorGpu { .. } => true,
149            _ => false,
150        }
151    }
152
153    pub(crate) fn is_stabilizer_family(&self) -> bool {
154        matches!(
155            self,
156            BackendKind::Stabilizer | BackendKind::FactoredStabilizer
157        ) || {
158            #[cfg(feature = "gpu")]
159            {
160                matches!(self, BackendKind::StabilizerGpu { .. })
161            }
162            #[cfg(not(feature = "gpu"))]
163            {
164                false
165            }
166        }
167    }
168
169    pub(crate) fn general_noise_backend_names() -> &'static str {
170        #[cfg(feature = "gpu")]
171        {
172            "Auto, Statevector, StatevectorGpu, Sparse, Mps, ProductState, or Factored"
173        }
174        #[cfg(not(feature = "gpu"))]
175        {
176            "Auto, Statevector, Sparse, Mps, ProductState, or Factored"
177        }
178    }
179}
180
181pub(super) fn validate_explicit_backend(kind: &BackendKind, circuit: &Circuit) -> Result<()> {
182    if kind.is_stabilizer_family() && !circuit.is_clifford_only() {
183        return Err(PrismError::IncompatibleBackend {
184            backend: "stabilizer".into(),
185            reason: "circuit contains non-Clifford gates".into(),
186        });
187    }
188    match kind {
189        BackendKind::ProductState if circuit.has_entangling_gates() => {
190            return Err(PrismError::IncompatibleBackend {
191                backend: "productstate".into(),
192                reason: "circuit contains entangling gates".into(),
193            });
194        }
195        BackendKind::StabilizerRank if !circuit.has_t_gates() => {
196            return Err(PrismError::IncompatibleBackend {
197                backend: "stabilizer_rank".into(),
198                reason: "circuit has no T gates; use Stabilizer instead".into(),
199            });
200        }
201        _ => {}
202    }
203    Ok(())
204}
205
206pub(super) fn supports_fused_for_kind(kind: &BackendKind, circuit: &Circuit) -> bool {
207    match kind {
208        BackendKind::Stabilizer
209        | BackendKind::FactoredStabilizer
210        | BackendKind::StabilizerRank
211        | BackendKind::StochasticPauli { .. }
212        | BackendKind::DeterministicPauli { .. } => false,
213        #[cfg(feature = "gpu")]
214        BackendKind::StabilizerGpu { .. } => false,
215        BackendKind::Auto => !(circuit.is_clifford_only() && circuit.has_entangling_gates()),
216        _ => true,
217    }
218}
219
220/// Build a `StatevectorBackend` configured for GPU execution if the circuit
221/// is large enough to clear the crossover, otherwise a plain host
222/// backend. Called from `select_dispatch` (which runs per sub-block after
223/// decomposition) so small blocks transparently stay on CPU.
224#[cfg(feature = "gpu")]
225fn statevector_gpu_with_crossover(
226    context: &Arc<GpuContext>,
227    circuit: &Circuit,
228    seed: u64,
229) -> StatevectorBackend {
230    if circuit.num_qubits >= crate::gpu::min_qubits() {
231        StatevectorBackend::new(seed).with_gpu(context.clone())
232    } else {
233        StatevectorBackend::new(seed)
234    }
235}
236
237/// Build a `StabilizerBackend` configured for GPU execution if the circuit
238/// is large enough to clear the stabilizer crossover, otherwise a plain
239/// host backend.
240#[cfg(feature = "gpu")]
241fn stabilizer_gpu_with_crossover(
242    context: &Arc<GpuContext>,
243    circuit: &Circuit,
244    seed: u64,
245) -> StabilizerBackend {
246    if circuit.num_qubits >= crate::gpu::stabilizer_min_qubits() {
247        StabilizerBackend::new(seed).with_gpu(context.clone())
248    } else {
249        StabilizerBackend::new(seed)
250    }
251}
252
253pub(super) fn select_dispatch(
254    kind: &BackendKind,
255    circuit: &Circuit,
256    seed: u64,
257    has_partial_independence: bool,
258) -> DispatchAction {
259    match kind {
260        BackendKind::Auto => {
261            if !circuit.has_entangling_gates() {
262                DispatchAction::Backend(Box::new(ProductStateBackend::new(seed)))
263            } else if circuit.is_clifford_only() {
264                DispatchAction::Backend(Box::new(StabilizerBackend::new(seed)))
265            } else if circuit.num_qubits > max_statevector_qubits() {
266                if circuit.is_sparse_friendly() {
267                    DispatchAction::Backend(Box::new(SparseBackend::new(seed)))
268                } else {
269                    DispatchAction::Backend(Box::new(MpsBackend::new(seed, AUTO_MPS_BOND_DIM)))
270                }
271            } else if has_partial_independence {
272                DispatchAction::Backend(Box::new(crate::backend::factored::FactoredBackend::new(
273                    seed,
274                )))
275            } else {
276                DispatchAction::Backend(Box::new(StatevectorBackend::new(seed)))
277            }
278        }
279        BackendKind::Statevector => {
280            DispatchAction::Backend(Box::new(StatevectorBackend::new(seed)))
281        }
282        BackendKind::Stabilizer => DispatchAction::Backend(Box::new(StabilizerBackend::new(seed))),
283        BackendKind::Sparse => DispatchAction::Backend(Box::new(SparseBackend::new(seed))),
284        BackendKind::Mps { max_bond_dim } => {
285            DispatchAction::Backend(Box::new(MpsBackend::new(seed, *max_bond_dim)))
286        }
287        BackendKind::ProductState => {
288            DispatchAction::Backend(Box::new(ProductStateBackend::new(seed)))
289        }
290        BackendKind::TensorNetwork => {
291            DispatchAction::Backend(Box::new(TensorNetworkBackend::new(seed)))
292        }
293        BackendKind::Factored => DispatchAction::Backend(Box::new(
294            crate::backend::factored::FactoredBackend::new(seed),
295        )),
296        BackendKind::FactoredStabilizer => DispatchAction::Backend(Box::new(
297            crate::backend::factored_stabilizer::FactoredStabilizerBackend::new(seed),
298        )),
299        BackendKind::StabilizerRank => DispatchAction::StabilizerRank,
300        BackendKind::StochasticPauli { num_samples } => DispatchAction::StochasticPauli {
301            num_samples: *num_samples,
302        },
303        BackendKind::DeterministicPauli { epsilon, max_terms } => {
304            DispatchAction::DeterministicPauli {
305                epsilon: *epsilon,
306                max_terms: *max_terms,
307            }
308        }
309        #[cfg(feature = "gpu")]
310        BackendKind::StatevectorGpu { context } => DispatchAction::Backend(Box::new(
311            statevector_gpu_with_crossover(context, circuit, seed),
312        )),
313        #[cfg(feature = "gpu")]
314        BackendKind::StabilizerGpu { context } => DispatchAction::Backend(Box::new(
315            stabilizer_gpu_with_crossover(context, circuit, seed),
316        )),
317    }
318}
319
320pub(super) fn select_backend(
321    kind: &BackendKind,
322    circuit: &Circuit,
323    seed: u64,
324    has_partial_independence: bool,
325) -> Box<dyn Backend> {
326    match select_dispatch(kind, circuit, seed, has_partial_independence) {
327        DispatchAction::Backend(b) => b,
328        _ => unreachable!("non-backend dispatch should be handled by caller"),
329    }
330}
331
332#[inline]
333pub(super) fn min_clifford_prefix_gates(num_qubits: usize) -> usize {
334    (num_qubits * 2).max(16)
335}
336
337pub(super) fn has_temporal_clifford_opportunity(kind: &BackendKind, circuit: &Circuit) -> bool {
338    if !matches!(kind, BackendKind::Auto) {
339        return false;
340    }
341    if circuit.num_qubits > max_statevector_qubits() {
342        return false;
343    }
344    let min_gates = min_clifford_prefix_gates(circuit.num_qubits);
345    let mut prefix_gates = 0;
346    for inst in &circuit.instructions {
347        match inst {
348            Instruction::Gate { gate, .. } => {
349                if !gate.is_clifford() {
350                    break;
351                }
352                prefix_gates += 1;
353            }
354            Instruction::Measure { .. }
355            | Instruction::Reset { .. }
356            | Instruction::Conditional { .. } => break,
357            Instruction::Barrier { .. } => {}
358        }
359    }
360    prefix_gates >= min_gates && prefix_gates < circuit.instructions.len()
361}
362
363pub(super) fn try_temporal_clifford(
364    kind: &BackendKind,
365    circuit: &Circuit,
366    seed: u64,
367) -> Option<Result<SimulationResult>> {
368    if !matches!(kind, BackendKind::Auto) {
369        return None;
370    }
371    if circuit.num_qubits > max_statevector_qubits() {
372        return None;
373    }
374    let (prefix, tail) = circuit.clifford_prefix_split()?;
375    if prefix.gate_count() < min_clifford_prefix_gates(circuit.num_qubits) {
376        return None;
377    }
378
379    let mut stab = StabilizerBackend::new(seed);
380    if let Err(e) = stab.init(prefix.num_qubits, prefix.num_classical_bits) {
381        return Some(Err(e));
382    }
383    stab.enable_lazy_destab();
384    for inst in &prefix.instructions {
385        if let Err(e) = stab.apply(inst) {
386            return Some(Err(e));
387        }
388    }
389
390    let state = match stab.export_statevector() {
391        Ok(s) => s,
392        Err(e) => return Some(Err(e)),
393    };
394
395    let mut sv = StatevectorBackend::new(seed);
396    if let Err(e) = sv.init_from_state(state, tail.num_classical_bits) {
397        return Some(Err(e));
398    }
399
400    let fused_tail = crate::circuit::fusion::fuse_circuit(&tail, sv.supports_fused_gates());
401    for inst in &fused_tail.instructions {
402        if let Err(e) = sv.apply(inst) {
403            return Some(Err(e));
404        }
405    }
406
407    let probs = sv.probabilities().ok().map(Probabilities::Dense);
408
409    Some(Ok(SimulationResult {
410        classical_bits: sv.classical_results().to_vec(),
411        probabilities: probs,
412    }))
413}
414
415#[cfg(all(test, feature = "gpu"))]
416mod gpu_crossover_tests {
417    use super::*;
418    use crate::gates::Gate;
419    use crate::sim::run_with;
420
421    fn stub_kind() -> BackendKind {
422        BackendKind::StatevectorGpu {
423            context: GpuContext::stub_for_tests(),
424        }
425    }
426
427    /// `run_with_gpu` must compose identically to constructing the variant
428    /// manually. Uses the stub context at a small circuit so crossover fires
429    /// and proves the composition is side-effect equivalent.
430    #[test]
431    fn run_with_gpu_wraps_statevector_gpu_variant() {
432        let ctx = GpuContext::stub_for_tests();
433        let mut circuit = Circuit::new(4, 0);
434        circuit.add_gate(Gate::H, &[0]);
435        circuit.add_gate(Gate::Cx, &[0, 1]);
436
437        let direct = crate::sim::run_with_gpu(&circuit, 42, ctx.clone())
438            .expect("run_with_gpu must honor crossover and route to CPU");
439        let manual = run_with(stub_kind(), &circuit, 42).expect("manual variant reference");
440
441        let dp = direct.probabilities.expect("direct probs").to_vec();
442        let mp = manual.probabilities.expect("manual probs").to_vec();
443        assert_eq!(dp, mp);
444    }
445
446    /// A 4q circuit is far below the default 14q threshold. If the dispatch
447    /// layer were to build a GPU backend anyway, `GpuState::new` on the stub
448    /// context would return `BackendUnsupported`. Success proves the
449    /// crossover in `select_dispatch` is routing small circuits to the host
450    /// path.
451    #[test]
452    fn small_circuit_routes_to_cpu() {
453        let mut circuit = Circuit::new(4, 0);
454        circuit.add_gate(Gate::H, &[0]);
455        circuit.add_gate(Gate::Cx, &[0, 1]);
456        circuit.add_gate(Gate::H, &[2]);
457        circuit.add_gate(Gate::Cx, &[2, 3]);
458
459        let result = run_with(stub_kind(), &circuit, 42)
460            .expect("stub context must not be touched for a 4q circuit");
461        let probs = result
462            .probabilities
463            .expect("probabilities missing")
464            .to_vec();
465
466        let mut expected = [0.0_f64; 16];
467        expected[0b0000] = 0.25;
468        expected[0b0011] = 0.25;
469        expected[0b1100] = 0.25;
470        expected[0b1111] = 0.25;
471        for (i, (p, e)) in probs.iter().zip(&expected).enumerate() {
472            assert!((p - e).abs() < 1e-10, "p[{i}] = {p}, expected {e}");
473        }
474    }
475
476    /// `independent_bell_pairs(8)` spans 16 qubits but decomposes into 8
477    /// independent 2q blocks. With `BackendKind::StatevectorGpu`, each
478    /// sub-block is below the 14q threshold and must route to CPU. If
479    /// decomposition failed to fire, the 16q monolithic path would attempt
480    /// `GpuState::new` through the stub and return `BackendUnsupported`.
481    /// Success here proves decomposition survives across the GPU dispatch.
482    #[test]
483    fn decomposable_16q_circuit_runs_per_block_on_cpu() {
484        let circuit = crate::circuits::independent_bell_pairs(8);
485        assert_eq!(circuit.num_qubits, 16);
486
487        let cpu = run_with(BackendKind::Statevector, &circuit, 42).expect("cpu baseline");
488        let gpu = run_with(stub_kind(), &circuit, 42).expect("stub must stay out of the way");
489
490        let cpu_p = cpu.probabilities.expect("cpu probs").to_vec();
491        let gpu_p = gpu.probabilities.expect("gpu probs").to_vec();
492        assert_eq!(cpu_p.len(), gpu_p.len());
493        for (i, (c, g)) in cpu_p.iter().zip(gpu_p.iter()).enumerate() {
494            assert!(
495                (c - g).abs() < 1e-10,
496                "prob[{i}] cpu={c}, gpu={g}, diff={}",
497                (c - g).abs()
498            );
499        }
500    }
501
502    fn stabilizer_stub_kind() -> BackendKind {
503        BackendKind::StabilizerGpu {
504            context: GpuContext::stub_for_tests(),
505        }
506    }
507
508    /// A 4q Clifford circuit is far below the stabilizer GPU threshold, so the
509    /// stub context must never be touched. Produces the same measurement bits
510    /// as a plain CPU stabilizer run.
511    #[test]
512    fn stabilizer_gpu_small_circuit_routes_to_cpu() {
513        let mut circuit = Circuit::new(4, 4);
514        circuit.add_gate(Gate::H, &[0]);
515        circuit.add_gate(Gate::Cx, &[0, 1]);
516        circuit.add_gate(Gate::Cx, &[1, 2]);
517        circuit.add_gate(Gate::Cx, &[2, 3]);
518        circuit.add_measure(0, 0);
519        circuit.add_measure(1, 1);
520        circuit.add_measure(2, 2);
521        circuit.add_measure(3, 3);
522
523        let cpu_run = run_with(BackendKind::Stabilizer, &circuit, 42).expect("cpu baseline");
524        let gpu_run = run_with(stabilizer_stub_kind(), &circuit, 42)
525            .expect("stub must stay out of the way for small circuits");
526        assert_eq!(cpu_run.classical_bits, gpu_run.classical_bits);
527    }
528
529    /// Non-Clifford circuits are rejected at dispatch time with the same error
530    /// shape as `BackendKind::Stabilizer`.
531    #[test]
532    fn stabilizer_gpu_rejects_non_clifford_at_dispatch() {
533        let mut circuit = Circuit::new(2, 0);
534        circuit.add_gate(Gate::T, &[0]);
535        let err = run_with(stabilizer_stub_kind(), &circuit, 42).unwrap_err();
536        assert!(matches!(err, PrismError::IncompatibleBackend { .. }));
537    }
538}