use num_complex::Complex64;
use rand::SeedableRng;
use rand_chacha::ChaCha8Rng;
use smallvec::smallvec;
use crate::backend::stabilizer::StabilizerBackend;
use crate::backend::Backend;
use crate::circuit::{Circuit, Instruction, SmallVec};
use crate::error::Result;
use crate::gates::Gate;
use crate::sim::compiled::{
batch_propagate_backward, compile_measurements, xor_words, PackedShots,
};
use crate::sim::ShotsResult;
#[derive(Debug, Clone)]
pub enum NoiseChannel {
Pauli { px: f64, py: f64, pz: f64 },
Depolarizing { p: f64 },
AmplitudeDamping { gamma: f64 },
PhaseDamping { gamma: f64 },
ThermalRelaxation { t1: f64, t2: f64, gate_time: f64 },
TwoQubitDepolarizing { p: f64 },
Custom { kraus: Vec<[[Complex64; 2]; 2]> },
}
impl NoiseChannel {
pub fn as_pauli(&self) -> Option<(f64, f64, f64)> {
match self {
NoiseChannel::Pauli { px, py, pz } => Some((*px, *py, *pz)),
NoiseChannel::Depolarizing { p } => {
let pp = p / 3.0;
Some((pp, pp, pp))
}
_ => None,
}
}
pub fn is_pauli(&self) -> bool {
matches!(
self,
NoiseChannel::Pauli { .. } | NoiseChannel::Depolarizing { .. }
)
}
pub fn is_exactly_samplable(&self) -> bool {
self.validate().is_ok()
}
pub fn validate(&self) -> Result<()> {
match self {
NoiseChannel::Pauli { px, py, pz } => {
validate_probability("px", *px)?;
validate_probability("py", *py)?;
validate_probability("pz", *pz)?;
if *px + *py + *pz > 1.0 + 1e-12 {
return Err(crate::error::PrismError::InvalidParameter {
message: "Pauli channel probabilities must sum to <= 1".into(),
});
}
}
NoiseChannel::Depolarizing { p } | NoiseChannel::TwoQubitDepolarizing { p } => {
validate_probability("p", *p)?;
}
NoiseChannel::AmplitudeDamping { gamma } | NoiseChannel::PhaseDamping { gamma } => {
validate_probability("gamma", *gamma)?;
}
NoiseChannel::ThermalRelaxation { t1, t2, gate_time } => {
if !t1.is_finite() || *t1 <= 0.0 {
return Err(crate::error::PrismError::InvalidParameter {
message: "thermal relaxation t1 must be finite and positive".into(),
});
}
if !t2.is_finite() || *t2 <= 0.0 {
return Err(crate::error::PrismError::InvalidParameter {
message: "thermal relaxation t2 must be finite and positive".into(),
});
}
if !gate_time.is_finite() || *gate_time < 0.0 {
return Err(crate::error::PrismError::InvalidParameter {
message: "thermal relaxation gate_time must be finite and non-negative"
.into(),
});
}
}
NoiseChannel::Custom { kraus } => {
if kraus.is_empty() {
return Err(crate::error::PrismError::InvalidParameter {
message: "Custom Kraus channel must contain at least one operator".into(),
});
}
for (op_idx, op) in kraus.iter().enumerate() {
for (row_idx, row) in op.iter().enumerate() {
for (col_idx, val) in row.iter().enumerate() {
if !val.re.is_finite() || !val.im.is_finite() {
return Err(crate::error::PrismError::InvalidParameter {
message: format!(
"Custom Kraus operator {op_idx} entry ({row_idx},{col_idx}) must be finite"
),
});
}
}
}
}
}
}
Ok(())
}
}
fn validate_probability(name: &str, value: f64) -> Result<()> {
if !value.is_finite() || !(0.0..=1.0).contains(&value) {
return Err(crate::error::PrismError::InvalidParameter {
message: format!("{name} must be finite and in [0, 1]"),
});
}
Ok(())
}
#[derive(Debug, Clone)]
pub struct NoiseEvent {
pub channel: NoiseChannel,
pub qubits: SmallVec<[usize; 2]>,
}
impl NoiseEvent {
pub fn pauli(qubit: usize, px: f64, py: f64, pz: f64) -> Self {
Self {
channel: NoiseChannel::Pauli { px, py, pz },
qubits: smallvec![qubit],
}
}
pub fn qubit(&self) -> usize {
self.qubits[0]
}
pub fn pauli_probs(&self) -> (f64, f64, f64) {
self.channel
.as_pauli()
.expect("pauli_probs called on non-Pauli channel; caller must use ensure_pauli_only")
}
}
#[derive(Debug, Clone)]
pub struct ReadoutError {
pub p01: f64,
pub p10: f64,
}
pub struct NoiseModel {
pub after_gate: Vec<Vec<NoiseEvent>>,
pub readout: Vec<Option<ReadoutError>>,
}
impl NoiseModel {
pub fn uniform_depolarizing(circuit: &Circuit, p: f64) -> Self {
let px = p / 3.0;
let py = p / 3.0;
let pz = p / 3.0;
let mut after_gate = Vec::with_capacity(circuit.instructions.len());
for instr in &circuit.instructions {
match instr {
Instruction::Gate { targets, .. } => {
let ops: Vec<NoiseEvent> = targets
.iter()
.map(|&q| NoiseEvent::pauli(q, px, py, pz))
.collect();
after_gate.push(ops);
}
_ => {
after_gate.push(Vec::new());
}
}
}
Self {
after_gate,
readout: vec![None; circuit.num_classical_bits],
}
}
pub fn with_amplitude_damping(circuit: &Circuit, gamma: f64) -> Self {
let mut after_gate = Vec::with_capacity(circuit.instructions.len());
for instr in &circuit.instructions {
match instr {
Instruction::Gate { targets, .. } => {
let ops: Vec<NoiseEvent> = targets
.iter()
.map(|&q| NoiseEvent {
channel: NoiseChannel::AmplitudeDamping { gamma },
qubits: smallvec![q],
})
.collect();
after_gate.push(ops);
}
_ => {
after_gate.push(Vec::new());
}
}
}
Self {
after_gate,
readout: vec![None; circuit.num_classical_bits],
}
}
pub fn with_readout_error(&mut self, p01: f64, p10: f64) -> &mut Self {
self.readout.fill(Some(ReadoutError { p01, p10 }));
self
}
pub fn is_pauli_only(&self) -> bool {
self.after_gate
.iter()
.flat_map(|events| events.iter())
.all(|e| e.channel.is_pauli())
&& self.readout.iter().all(|r| r.is_none())
}
pub fn has_noise(&self) -> bool {
self.after_gate.iter().any(|events| !events.is_empty())
|| self.readout.iter().any(|r| r.is_some())
}
pub fn ensure_pauli_only(&self) -> Result<()> {
if !self.is_pauli_only() {
return Err(crate::error::PrismError::IncompatibleBackend {
backend: "compiled stabilizer sampler".into(),
reason: "non-Pauli noise channels (amplitude damping, phase damping, \
thermal relaxation, custom Kraus) or readout errors require \
the trajectory engine; use a non-stabilizer backend or a \
Pauli-only noise model"
.into(),
});
}
Ok(())
}
pub fn validate(&self) -> Result<()> {
for (instr_idx, events) in self.after_gate.iter().enumerate() {
for (event_idx, event) in events.iter().enumerate() {
event.channel.validate().map_err(|err| {
crate::error::PrismError::InvalidParameter {
message: format!(
"noise event {event_idx} after instruction {instr_idx}: {err}"
),
}
})?;
let expected_qubits = match &event.channel {
NoiseChannel::TwoQubitDepolarizing { .. } => 2,
_ => 1,
};
if event.qubits.len() != expected_qubits {
return Err(crate::error::PrismError::InvalidParameter {
message: format!(
"noise event {event_idx} after instruction {instr_idx}: expected {expected_qubits} qubit(s), got {}",
event.qubits.len()
),
});
}
}
}
for (bit_idx, readout) in self.readout.iter().enumerate() {
if let Some(readout) = readout {
validate_probability("readout p01", readout.p01).map_err(|err| {
crate::error::PrismError::InvalidParameter {
message: format!("readout error {bit_idx}: {err}"),
}
})?;
validate_probability("readout p10", readout.p10).map_err(|err| {
crate::error::PrismError::InvalidParameter {
message: format!("readout error {bit_idx}: {err}"),
}
})?;
}
}
Ok(())
}
}
struct FlatNoiseSensitivity {
x_data: Vec<u64>,
z_data: Vec<u64>,
probs: Vec<[f64; 3]>,
m_words: usize,
}
impl FlatNoiseSensitivity {
fn new(m_words: usize, capacity: usize) -> Self {
Self {
x_data: Vec::with_capacity(capacity * m_words),
z_data: Vec::with_capacity(capacity * m_words),
probs: Vec::with_capacity(capacity),
m_words,
}
}
fn push(&mut self, x_flip: &[u64], z_flip: &[u64], px: f64, py: f64, pz: f64) {
self.x_data.extend_from_slice(x_flip);
self.z_data.extend_from_slice(z_flip);
self.probs.push([px, py, pz]);
}
#[inline(always)]
fn len(&self) -> usize {
self.probs.len()
}
#[inline(always)]
fn is_empty(&self) -> bool {
self.probs.is_empty()
}
#[inline(always)]
fn x_flip(&self, idx: usize) -> &[u64] {
let off = idx * self.m_words;
&self.x_data[off..off + self.m_words]
}
#[inline(always)]
fn z_flip(&self, idx: usize) -> &[u64] {
let off = idx * self.m_words;
&self.z_data[off..off + self.m_words]
}
}
#[inline(always)]
fn geometric_sample(rng: &mut ChaCha8Rng, ln_1mp: f64) -> usize {
let u: f64 = 1.0 - rand::Rng::random::<f64>(rng);
(u.ln() / ln_1mp) as usize
}
#[cfg(feature = "gpu")]
fn geometric_sample_xoshiro(
rng: &mut crate::sim::compiled::rng::Xoshiro256PlusPlus,
ln_1mp: f64,
) -> usize {
let u: f64 = 1.0 - rng.next_f64();
(u.ln() / ln_1mp) as usize
}
#[cfg(feature = "gpu")]
trait EventRng {
fn uniform(&mut self) -> f64;
fn geom(&mut self, ln_1mp: f64) -> usize;
}
#[cfg(feature = "gpu")]
impl EventRng for ChaCha8Rng {
#[inline]
fn uniform(&mut self) -> f64 {
rand::Rng::random::<f64>(self)
}
#[inline]
fn geom(&mut self, ln_1mp: f64) -> usize {
geometric_sample(self, ln_1mp)
}
}
#[cfg(feature = "gpu")]
impl EventRng for crate::sim::compiled::rng::Xoshiro256PlusPlus {
#[inline]
fn uniform(&mut self) -> f64 {
self.next_f64()
}
#[inline]
fn geom(&mut self, ln_1mp: f64) -> usize {
geometric_sample_xoshiro(self, ln_1mp)
}
}
#[cfg(feature = "gpu")]
#[inline]
fn fill_one_event(
x_slot: &mut [u64],
z_slot: &mut [u64],
probs: [f64; 3],
chunk_shots: usize,
rng: &mut dyn EventRng,
) {
let [px, py, pz] = probs;
let p_event = px + py + pz;
if p_event == 0.0 {
return;
}
let pxy = px + py;
if p_event >= 0.5 || chunk_shots < 32 {
for s in 0..chunk_shots {
let r = rng.uniform();
let slot_idx = s / 64;
let bit = 1u64 << (s % 64);
if r < px {
z_slot[slot_idx] |= bit;
} else if r < pxy {
z_slot[slot_idx] |= bit;
x_slot[slot_idx] |= bit;
} else if r < p_event {
x_slot[slot_idx] |= bit;
}
}
return;
}
let ln_1mp = (1.0 - p_event).ln();
let px_frac = px / p_event;
let pxy_frac = pxy / p_event;
let mut pos = rng.geom(ln_1mp);
while pos < chunk_shots {
let r = rng.uniform();
let slot_idx = pos / 64;
let bit = 1u64 << (pos % 64);
if r < px_frac {
z_slot[slot_idx] |= bit;
} else if r < pxy_frac {
z_slot[slot_idx] |= bit;
x_slot[slot_idx] |= bit;
} else {
x_slot[slot_idx] |= bit;
}
pos += 1 + rng.geom(ln_1mp);
}
}
const NOISE_LUT_K: usize = 8;
const NOISE_LUT_MIN_EVENTS: usize = 16;
const NOISE_LUT_TILE: usize = 4096;
fn unpack_and_remap(
accum: &[u64],
m_words: usize,
num_shots: usize,
classical_bit_order: &[usize],
num_classical: usize,
) -> Vec<Vec<bool>> {
fn unpack_one(src: &[u64], classical_bit_order: &[usize], num_classical: usize) -> Vec<bool> {
let mut out = vec![false; num_classical];
for (mi, &cbit) in classical_bit_order.iter().enumerate() {
if cbit < num_classical {
out[cbit] = (src[mi / 64] >> (mi % 64)) & 1 != 0;
}
}
out
}
#[cfg(feature = "parallel")]
if num_shots >= 256 {
use rayon::prelude::*;
return accum
.par_chunks(m_words)
.map(|src| unpack_one(src, classical_bit_order, num_classical))
.collect();
}
#[cfg(not(feature = "parallel"))]
let _ = num_shots;
accum
.chunks(m_words)
.map(|src| unpack_one(src, classical_bit_order, num_classical))
.collect()
}
fn unpack_and_remap_packed(
packed: &PackedShots,
num_shots: usize,
classical_bit_order: &[usize],
num_classical: usize,
) -> Vec<Vec<bool>> {
fn unpack_one_packed(
packed: &PackedShots,
shot: usize,
classical_bit_order: &[usize],
num_classical: usize,
) -> Vec<bool> {
let mut out = vec![false; num_classical];
for (mi, &cbit) in classical_bit_order.iter().enumerate() {
if cbit < num_classical {
out[cbit] = packed.get_bit(shot, mi);
}
}
out
}
#[cfg(feature = "parallel")]
if num_shots >= 256 {
use rayon::prelude::*;
return (0..num_shots)
.into_par_iter()
.map(|s| unpack_one_packed(packed, s, classical_bit_order, num_classical))
.collect();
}
#[cfg(not(feature = "parallel"))]
let _ = num_shots;
(0..packed.num_shots())
.map(|s| unpack_one_packed(packed, s, classical_bit_order, num_classical))
.collect()
}
struct NoiseFlipLut {
data: Vec<u64>,
m_words: usize,
num_full_groups: usize,
remainder_size: usize,
}
impl NoiseFlipLut {
fn build_from_flat(flat_data: &[u64], num_rows: usize, m_words: usize) -> Self {
let num_full_groups = num_rows / NOISE_LUT_K;
let remainder_size = num_rows % NOISE_LUT_K;
let total_groups = num_full_groups + usize::from(remainder_size > 0);
let entries = 1 << NOISE_LUT_K;
let mut data = vec![0u64; total_groups * entries * m_words];
for g in 0..total_groups {
let group_start = g * NOISE_LUT_K;
let k = if g < num_full_groups {
NOISE_LUT_K
} else {
remainder_size
};
let lut_off = g * entries * m_words;
for byte in 1..(1usize << k) {
let lowest = byte & byte.wrapping_neg();
let row_idx = group_start + lowest.trailing_zeros() as usize;
let prev = byte ^ lowest;
let dst = lut_off + byte * m_words;
let src = lut_off + prev * m_words;
let row_off = row_idx * m_words;
for w in 0..m_words {
data[dst + w] = data[src + w] ^ flat_data[row_off + w];
}
}
}
Self {
data,
m_words,
num_full_groups,
remainder_size,
}
}
#[inline(always)]
fn total_groups(&self) -> usize {
self.num_full_groups + usize::from(self.remainder_size > 0)
}
#[inline(always)]
fn apply_masked(&self, accum: &mut [u64], mask: &[u64]) {
for g in 0..self.total_groups() {
let byte = ((mask[g / 8] >> ((g % 8) * 8)) & 0xFF) as usize;
if byte != 0 {
let offset = (g * (1 << NOISE_LUT_K) + byte) * self.m_words;
xor_words(accum, &self.data[offset..offset + self.m_words]);
}
}
}
}
fn build_noise_luts(events: &FlatNoiseSensitivity) -> (Option<NoiseFlipLut>, Option<NoiseFlipLut>) {
let ne = events.len();
if ne < NOISE_LUT_MIN_EVENTS {
return (None, None);
}
let avg_p: f64 = events
.probs
.iter()
.map(|[px, py, pz]| px + py + pz)
.sum::<f64>()
/ ne as f64;
if avg_p < 0.05 {
return (None, None);
}
let mw = events.m_words;
let z_lut = NoiseFlipLut::build_from_flat(&events.z_data, ne, mw);
let x_lut = NoiseFlipLut::build_from_flat(&events.x_data, ne, mw);
(Some(z_lut), Some(x_lut))
}
#[cfg(feature = "gpu")]
fn flatten_event_thresholds_u64(probs: &[[f64; 3]]) -> (Vec<u64>, bool) {
const SCALE: f64 = 18446744073709549568.0_f64;
const PROB_SUM_SLACK: f64 = 1e-12;
let mut out = Vec::with_capacity(probs.len() * 3);
let mut valid = true;
for &[px, py, pz] in probs {
let finite = px.is_finite() && py.is_finite() && pz.is_finite();
let sum = px + py + pz;
if !finite || px < 0.0 || py < 0.0 || pz < 0.0 || sum > 1.0 + PROB_SUM_SLACK {
valid = false;
}
let to_u64 = |p: f64| -> u64 {
if !p.is_finite() || p <= 0.0 {
0
} else if p >= 1.0 {
u64::MAX
} else {
(p * SCALE) as u64
}
};
out.push(to_u64(px));
out.push(to_u64(px + py));
out.push(to_u64(sum));
}
(out, valid)
}
#[cfg(feature = "gpu")]
fn transpose_events_to_row_major(
events: &FlatNoiseSensitivity,
num_meas: usize,
) -> (Vec<u32>, Vec<u32>) {
let num_events = events.len();
let m_words = events.m_words;
let mut row_counts = vec![0u32; num_meas];
for e in 0..num_events {
let x = &events.x_data[e * m_words..(e + 1) * m_words];
let z = &events.z_data[e * m_words..(e + 1) * m_words];
for w in 0..m_words {
let union = x[w] | z[w];
let mut bits = union;
while bits != 0 {
let row = w * 64 + bits.trailing_zeros() as usize;
if row < num_meas {
row_counts[row] += 1;
}
bits &= bits - 1;
}
}
}
let mut offsets = Vec::with_capacity(num_meas + 1);
offsets.push(0u32);
let mut running = 0u32;
for c in &row_counts {
running += c;
offsets.push(running);
}
let total = running as usize;
let mut entries = vec![0u32; total];
let mut cursor: Vec<u32> = offsets[..num_meas].to_vec();
for e in 0..num_events {
let x = &events.x_data[e * m_words..(e + 1) * m_words];
let z = &events.z_data[e * m_words..(e + 1) * m_words];
for w in 0..m_words {
let union = x[w] | z[w];
let mut bits = union;
while bits != 0 {
let b = bits.trailing_zeros() as usize;
bits &= bits - 1;
let row = w * 64 + b;
if row >= num_meas {
continue;
}
let bit_mask = 1u64 << b;
let flag: u32 = (if x[w] & bit_mask != 0 { 1 } else { 0 })
| (if z[w] & bit_mask != 0 { 2 } else { 0 });
let slot = cursor[row] as usize;
entries[slot] = ((e as u32) << 2) | flag;
cursor[row] += 1;
}
}
}
(offsets, entries)
}
#[cfg(feature = "gpu")]
fn flatten_event_rows_to_csr(
flat_data: &[u64],
num_rows: usize,
m_words: usize,
) -> (Vec<u32>, Vec<u32>) {
let mut row_offsets = Vec::with_capacity(num_rows + 1);
let mut row_indices = Vec::new();
for row in 0..num_rows {
row_offsets.push(row_indices.len() as u32);
let base = row * m_words;
for word in 0..m_words {
let mut bits = flat_data[base + word];
while bits != 0 {
let bit = bits.trailing_zeros() as usize;
row_indices.push((word * 64 + bit) as u32);
bits &= bits - 1;
}
}
}
row_offsets.push(row_indices.len() as u32);
(row_offsets, row_indices)
}
#[cfg(feature = "gpu")]
struct GpuNoiseCache {
context: std::sync::Arc<crate::gpu::GpuContext>,
x_row_offsets_dev: crate::gpu::GpuBuffer<u32>,
x_row_indices_dev: crate::gpu::GpuBuffer<u32>,
z_row_offsets_dev: crate::gpu::GpuBuffer<u32>,
z_row_indices_dev: crate::gpu::GpuBuffer<u32>,
x_masks_dev: crate::gpu::GpuBuffer<u64>,
z_masks_dev: crate::gpu::GpuBuffer<u64>,
x_masks_host: Vec<u64>,
z_masks_host: Vec<u64>,
event_thresholds_dev: crate::gpu::GpuBuffer<u64>,
event_thresholds_valid: bool,
row_event_offsets_dev: crate::gpu::GpuBuffer<u32>,
row_event_entries_dev: crate::gpu::GpuBuffer<u32>,
}
#[cfg(feature = "gpu")]
impl GpuNoiseCache {
fn new(
context: std::sync::Arc<crate::gpu::GpuContext>,
events: &FlatNoiseSensitivity,
) -> Result<Self> {
let device = context.device();
let num_events = events.len();
let (x_row_offsets, x_row_indices) =
flatten_event_rows_to_csr(&events.x_data, num_events, events.m_words);
let (z_row_offsets, z_row_indices) =
flatten_event_rows_to_csr(&events.z_data, num_events, events.m_words);
let mut x_row_offsets_dev =
crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, x_row_offsets.len().max(1))?;
let mut x_row_indices_dev =
crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, x_row_indices.len().max(1))?;
let mut z_row_offsets_dev =
crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, z_row_offsets.len().max(1))?;
let mut z_row_indices_dev =
crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, z_row_indices.len().max(1))?;
if !x_row_offsets.is_empty() {
x_row_offsets_dev.copy_from_host(device, &x_row_offsets)?;
}
if !x_row_indices.is_empty() {
x_row_indices_dev.copy_from_host(device, &x_row_indices)?;
}
if !z_row_offsets.is_empty() {
z_row_offsets_dev.copy_from_host(device, &z_row_offsets)?;
}
if !z_row_indices.is_empty() {
z_row_indices_dev.copy_from_host(device, &z_row_indices)?;
}
let (thresholds_host, thresholds_valid) = flatten_event_thresholds_u64(&events.probs);
let mut event_thresholds_dev =
crate::gpu::GpuBuffer::<u64>::alloc_zeros(device, thresholds_host.len().max(1))?;
if !thresholds_host.is_empty() {
event_thresholds_dev.copy_from_host(device, &thresholds_host)?;
}
let (row_event_offsets_host, row_event_entries_host) =
transpose_events_to_row_major(events, events.m_words * 64);
let mut row_event_offsets_dev =
crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, row_event_offsets_host.len().max(1))?;
let mut row_event_entries_dev =
crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, row_event_entries_host.len().max(1))?;
if !row_event_offsets_host.is_empty() {
row_event_offsets_dev.copy_from_host(device, &row_event_offsets_host)?;
}
if !row_event_entries_host.is_empty() {
row_event_entries_dev.copy_from_host(device, &row_event_entries_host)?;
}
Ok(Self {
context: context.clone(),
x_row_offsets_dev,
x_row_indices_dev,
z_row_offsets_dev,
z_row_indices_dev,
x_masks_dev: crate::gpu::GpuBuffer::<u64>::alloc_zeros(device, 1)?,
z_masks_dev: crate::gpu::GpuBuffer::<u64>::alloc_zeros(device, 1)?,
x_masks_host: Vec::new(),
z_masks_host: Vec::new(),
event_thresholds_dev,
event_thresholds_valid: thresholds_valid,
row_event_offsets_dev,
row_event_entries_dev,
})
}
fn ensure_mask_capacity(&mut self, len: usize) -> Result<()> {
let needed = len.max(1);
let device = self.context.device();
if self.x_masks_dev.len() < needed {
self.x_masks_dev = crate::gpu::GpuBuffer::<u64>::alloc_zeros(device, needed)?;
}
if self.z_masks_dev.len() < needed {
self.z_masks_dev = crate::gpu::GpuBuffer::<u64>::alloc_zeros(device, needed)?;
}
if self.x_masks_host.len() < needed {
self.x_masks_host.resize(needed, 0);
}
if self.z_masks_host.len() < needed {
self.z_masks_host.resize(needed, 0);
}
Ok(())
}
}
pub struct NoisyCompiledSampler {
noiseless: crate::sim::compiled::CompiledSampler,
events: FlatNoiseSensitivity,
num_measurements: usize,
rng: ChaCha8Rng,
z_lut: Option<NoiseFlipLut>,
x_lut: Option<NoiseFlipLut>,
#[cfg(feature = "gpu")]
gpu_noise_cache: Option<GpuNoiseCache>,
}
impl NoisyCompiledSampler {
#[cfg(feature = "gpu")]
pub fn with_gpu(mut self, context: std::sync::Arc<crate::gpu::GpuContext>) -> Self {
self.noiseless = self.noiseless.with_gpu(context);
self.gpu_noise_cache = None;
self
}
fn sample_bulk_words_shot_major_cpu(&mut self, num_shots: usize) -> (Vec<u64>, usize) {
let (mut accum, m_words) = self.noiseless.sample_bulk_words_shot_major(num_shots);
self.apply_noise_bulk(&mut accum, num_shots, m_words);
let ref_bits_packed = self.noiseless.ref_bits_packed();
#[cfg(feature = "parallel")]
if num_shots >= 256 {
use rayon::prelude::*;
accum
.par_chunks_mut(m_words)
.for_each(|shot| xor_words(shot, ref_bits_packed));
} else {
for s in 0..num_shots {
let shot_base = s * m_words;
xor_words(&mut accum[shot_base..shot_base + m_words], ref_bits_packed);
}
}
#[cfg(not(feature = "parallel"))]
for s in 0..num_shots {
let shot_base = s * m_words;
xor_words(&mut accum[shot_base..shot_base + m_words], ref_bits_packed);
}
(accum, m_words)
}
#[cfg(feature = "gpu")]
fn try_sample_bulk_packed_with_gpu_context(&mut self, num_shots: usize) -> Result<PackedShots> {
let m_words = self.num_measurements.div_ceil(64);
if num_shots == 0 || self.num_measurements == 0 {
return Ok(PackedShots::from_shot_major(
vec![0u64; num_shots * m_words],
num_shots,
self.num_measurements,
));
}
let mut accum = match self.noiseless.try_sample_bulk_shot_major_gpu(num_shots) {
Some(Ok(data)) => data,
Some(Err(e)) => return Err(e),
None => {
let (accum, _m_words) = self.sample_bulk_words_shot_major_cpu(num_shots);
return Ok(PackedShots::from_shot_major(
accum,
num_shots,
self.num_measurements,
));
}
};
self.apply_noise_bulk(&mut accum, num_shots, m_words);
Ok(PackedShots::from_shot_major(
accum,
num_shots,
self.num_measurements,
))
}
#[cfg(feature = "gpu")]
fn take_gpu_noise_cache(&mut self) -> Result<GpuNoiseCache> {
if let Some(cache) = self.gpu_noise_cache.take() {
return Ok(cache);
}
let context = self
.noiseless
.gpu_context()
.expect("gpu noise cache requested without gpu_context");
GpuNoiseCache::new(context, &self.events)
}
#[cfg(feature = "gpu")]
fn fill_noise_masks_event_major(
events: &FlatNoiseSensitivity,
rng: &mut ChaCha8Rng,
x_masks: &mut [u64],
z_masks: &mut [u64],
chunk_shots: usize,
chunk_s_words: usize,
) {
x_masks.fill(0);
z_masks.fill(0);
#[cfg(feature = "parallel")]
{
use rand::RngCore;
use rayon::prelude::*;
const MIN_PAR_EVENTS: usize = 64;
const MIN_PAR_SLOTS: usize = 32_768;
let work_slots = events.len().saturating_mul(chunk_s_words);
if events.len() >= MIN_PAR_EVENTS
&& work_slots >= MIN_PAR_SLOTS
&& x_masks.len() == events.len() * chunk_s_words
&& z_masks.len() == events.len() * chunk_s_words
{
let thread_seeds: Vec<[u64; 4]> = (0..events.len())
.map(|_| {
[
rng.next_u64(),
rng.next_u64(),
rng.next_u64(),
rng.next_u64(),
]
})
.collect();
x_masks
.par_chunks_mut(chunk_s_words)
.zip(z_masks.par_chunks_mut(chunk_s_words))
.zip(thread_seeds.par_iter())
.enumerate()
.for_each(|(i, ((x_slot, z_slot), seed))| {
let mut trng =
crate::sim::compiled::rng::Xoshiro256PlusPlus::from_seeds(*seed);
fill_one_event(x_slot, z_slot, events.probs[i], chunk_shots, &mut trng);
});
return;
}
}
for (i, &probs) in events.probs.iter().enumerate() {
let base = i * chunk_s_words;
let end = base + chunk_s_words;
fill_one_event(
&mut x_masks[base..end],
&mut z_masks[base..end],
probs,
chunk_shots,
rng,
);
}
}
#[cfg(feature = "gpu")]
fn apply_noise_device_meas_major(
&mut self,
packed: &mut crate::sim::compiled::DevicePackedShots,
) -> Result<()> {
if self.events.is_empty() || packed.num_shots() == 0 || self.num_measurements == 0 {
return Ok(());
}
let num_events = self.events.len();
let mut cache = self.take_gpu_noise_cache()?;
let total_s_words = packed.s_words();
let context = packed.context().clone();
let num_shots = packed.num_shots();
let use_fused = cache.event_thresholds_valid;
let master_seed: u64 = if use_fused {
rand::RngCore::next_u64(&mut self.rng)
} else {
0
};
let mut shot_start = 0;
while shot_start < num_shots {
let chunk_shots = (shot_start + NOISE_LUT_TILE).min(num_shots) - shot_start;
let chunk_s_words = chunk_shots.div_ceil(64);
let word_offset = shot_start / 64;
if use_fused {
crate::gpu::kernels::bts::generate_and_apply_noise_masks_meas_major_by_row(
&context,
crate::gpu::kernels::bts::NoiseDeviceGenApplyByRow {
meas_major: packed.data_mut(),
num_meas: self.num_measurements,
s_words: total_s_words,
word_offset,
chunk_s_words,
row_event_offsets: &cache.row_event_offsets_dev,
row_event_entries: &cache.row_event_entries_dev,
event_thresholds: &cache.event_thresholds_dev,
master_seed,
batch_offset: word_offset as u64,
},
)?;
} else {
let mask_len = num_events * chunk_s_words;
cache.ensure_mask_capacity(mask_len)?;
Self::fill_noise_masks_event_major(
&self.events,
&mut self.rng,
&mut cache.x_masks_host[..mask_len],
&mut cache.z_masks_host[..mask_len],
chunk_shots,
chunk_s_words,
);
if mask_len > 0 {
cache
.x_masks_dev
.copy_from_host(cache.context.device(), &cache.x_masks_host[..mask_len])?;
cache
.z_masks_dev
.copy_from_host(cache.context.device(), &cache.z_masks_host[..mask_len])?;
let base = crate::gpu::kernels::bts::NoiseApplyBase {
meas_major: packed.data_mut(),
num_meas: self.num_measurements,
s_words: total_s_words,
word_offset,
chunk_s_words,
num_events,
x_row_offsets: &cache.x_row_offsets_dev,
x_row_indices: &cache.x_row_indices_dev,
z_row_offsets: &cache.z_row_offsets_dev,
z_row_indices: &cache.z_row_indices_dev,
};
crate::gpu::kernels::bts::apply_noise_masks_meas_major(
&context,
crate::gpu::kernels::bts::NoiseMaskApply {
base,
x_masks: &cache.x_masks_dev,
z_masks: &cache.z_masks_dev,
},
)?;
}
}
shot_start += chunk_shots;
}
self.gpu_noise_cache = Some(cache);
Ok(())
}
#[cfg(feature = "gpu")]
fn try_sample_marginals_gpu(&mut self, total_shots: usize) -> Option<Result<Vec<f64>>> {
if !self.noiseless.should_use_gpu_bts(total_shots) {
return None;
}
let mut packed = match self.noiseless.sample_bulk_packed_device(total_shots) {
Ok(packed) => packed,
Err(e) => return Some(Err(e)),
};
if let Err(e) = self.apply_noise_device_meas_major(&mut packed) {
return Some(Err(e));
}
Some(packed.marginals())
}
#[cfg(feature = "gpu")]
fn try_sample_counts_gpu(
&mut self,
total_shots: usize,
) -> Option<Result<std::collections::HashMap<Vec<u64>, u64>>> {
if !self.noiseless.should_use_gpu_bts(total_shots) {
return None;
}
let mut packed = match self.noiseless.sample_bulk_packed_device(total_shots) {
Ok(packed) => packed,
Err(e) => return Some(Err(e)),
};
if let Err(e) = self.apply_noise_device_meas_major(&mut packed) {
return Some(Err(e));
}
Some(packed.counts_with_rank_hint(self.num_measurements))
}
pub fn sample_bulk_packed(&mut self, num_shots: usize) -> PackedShots {
match self.try_sample_bulk_packed(num_shots) {
Ok(packed) => packed,
Err(_) => self.sample_bulk_packed_cpu(num_shots),
}
}
pub fn try_sample_bulk_packed(&mut self, num_shots: usize) -> Result<PackedShots> {
#[cfg(feature = "gpu")]
if self.noiseless.has_gpu_context() {
return self.try_sample_bulk_packed_with_gpu_context(num_shots);
}
Ok(self.sample_bulk_packed_cpu(num_shots))
}
fn sample_bulk_packed_cpu(&mut self, num_shots: usize) -> PackedShots {
let m_words = self.num_measurements.div_ceil(64);
if num_shots == 0 || self.num_measurements == 0 {
return PackedShots::from_shot_major(
vec![0u64; num_shots * m_words],
num_shots,
self.num_measurements,
);
}
let (accum, _m_words) = self.sample_bulk_words_shot_major_cpu(num_shots);
PackedShots::from_shot_major(accum, num_shots, self.num_measurements)
}
pub fn sample_chunked<A: crate::sim::compiled::ShotAccumulator>(
&mut self,
total_shots: usize,
acc: &mut A,
) {
let chunk_size = crate::sim::compiled::default_chunk_size(self.num_measurements);
self.sample_chunked_with_size(total_shots, chunk_size, acc);
}
pub fn sample_chunked_with_size<A: crate::sim::compiled::ShotAccumulator>(
&mut self,
total_shots: usize,
chunk_size: usize,
acc: &mut A,
) {
#[cfg(feature = "gpu")]
if self.noiseless.has_gpu_context()
&& self
.noiseless
.should_use_gpu_bts(total_shots.min(chunk_size.max(1)))
{
let mut remaining = total_shots;
while remaining > 0 {
let this_batch = remaining.min(chunk_size);
let packed = match self.try_sample_bulk_packed_with_gpu_context(this_batch) {
Ok(packed) => packed,
Err(_) => self.sample_bulk_packed_cpu(this_batch),
};
acc.accumulate(&packed);
remaining -= this_batch;
}
return;
}
let m_words = self.num_measurements.div_ceil(64);
let mut accum_buf: Vec<u64> = Vec::new();
let mut rand_buf: Vec<u8> = Vec::new();
let ref_bits_packed = self.noiseless.ref_bits_packed().to_vec();
let mut remaining = total_shots;
while remaining > 0 {
let this_batch = remaining.min(chunk_size);
self.noiseless.sample_bulk_words_shot_major_reuse(
&mut accum_buf,
&mut rand_buf,
this_batch,
);
self.apply_noise_bulk(&mut accum_buf, this_batch, m_words);
#[cfg(feature = "parallel")]
if this_batch >= 256 {
use rayon::prelude::*;
accum_buf[..this_batch * m_words]
.par_chunks_mut(m_words)
.for_each(|shot| xor_words(shot, &ref_bits_packed));
} else {
for s in 0..this_batch {
let shot_base = s * m_words;
xor_words(
&mut accum_buf[shot_base..shot_base + m_words],
&ref_bits_packed,
);
}
}
#[cfg(not(feature = "parallel"))]
for s in 0..this_batch {
let shot_base = s * m_words;
xor_words(
&mut accum_buf[shot_base..shot_base + m_words],
&ref_bits_packed,
);
}
let data = std::mem::take(&mut accum_buf);
let packed = PackedShots::from_shot_major(data, this_batch, self.num_measurements);
acc.accumulate(&packed);
accum_buf = packed.into_data();
remaining -= this_batch;
}
}
pub fn sample_counts(
&mut self,
total_shots: usize,
) -> std::collections::HashMap<Vec<u64>, u64> {
match self.try_sample_counts(total_shots) {
Ok(counts) => counts,
Err(_) => self.sample_counts_cpu(total_shots),
}
}
pub fn try_sample_counts(
&mut self,
total_shots: usize,
) -> Result<std::collections::HashMap<Vec<u64>, u64>> {
#[cfg(feature = "gpu")]
if let Some(counts) = self.try_sample_counts_gpu(total_shots) {
return counts;
}
Ok(self.sample_counts_cpu(total_shots))
}
fn sample_counts_cpu(
&mut self,
total_shots: usize,
) -> std::collections::HashMap<Vec<u64>, u64> {
let mut acc = crate::sim::compiled::HistogramAccumulator::new();
self.sample_chunked(total_shots, &mut acc);
acc.into_counts()
}
pub fn sample_marginals(&mut self, total_shots: usize) -> Vec<f64> {
match self.try_sample_marginals(total_shots) {
Ok(marginals) => marginals,
Err(_) => self.sample_marginals_cpu(total_shots),
}
}
pub fn try_sample_marginals(&mut self, total_shots: usize) -> Result<Vec<f64>> {
#[cfg(feature = "gpu")]
if let Some(marginals) = self.try_sample_marginals_gpu(total_shots) {
return marginals;
}
Ok(self.sample_marginals_cpu(total_shots))
}
fn sample_marginals_cpu(&mut self, total_shots: usize) -> Vec<f64> {
let mut acc = crate::sim::compiled::MarginalsAccumulator::new(self.num_measurements);
self.sample_chunked(total_shots, &mut acc);
acc.marginals()
}
#[cfg(test)]
fn sample_bulk(&mut self, num_shots: usize) -> Vec<Vec<bool>> {
self.sample_bulk_packed(num_shots).to_shots()
}
fn apply_noise_bulk(&mut self, accum: &mut [u64], num_shots: usize, m_words: usize) {
if self.events.is_empty() {
return;
}
if self.z_lut.is_some() {
self.apply_noise_bulk_grouped(accum, num_shots, m_words);
} else {
self.apply_noise_bulk_scalar(accum, num_shots, m_words);
}
}
fn apply_noise_bulk_scalar(&mut self, accum: &mut [u64], num_shots: usize, m_words: usize) {
#[cfg(feature = "parallel")]
if num_shots >= 4096 {
self.apply_noise_bulk_scalar_par(accum, num_shots, m_words);
return;
}
self.apply_noise_bulk_scalar_seq(accum, num_shots, m_words);
}
fn apply_noise_bulk_scalar_seq(&mut self, accum: &mut [u64], num_shots: usize, m_words: usize) {
Self::apply_noise_range(&self.events, accum, 0, num_shots, m_words, &mut self.rng);
}
#[cfg(feature = "parallel")]
fn apply_noise_bulk_scalar_par(&mut self, accum: &mut [u64], num_shots: usize, m_words: usize) {
use rayon::prelude::*;
let num_threads = rayon::current_num_threads().max(1);
let shots_per_thread = (num_shots.div_ceil(num_threads) + 63) & !63;
let seeds: Vec<u64> = (0..num_threads)
.map(|_| rand::Rng::random(&mut self.rng))
.collect();
let events = &self.events;
accum
.par_chunks_mut(shots_per_thread * m_words)
.enumerate()
.for_each(|(tid, chunk)| {
let chunk_shots = chunk.len() / m_words;
if chunk_shots == 0 {
return;
}
let mut rng = ChaCha8Rng::seed_from_u64(seeds[tid]);
Self::apply_noise_range(events, chunk, 0, chunk_shots, m_words, &mut rng);
});
}
fn apply_noise_range(
events: &FlatNoiseSensitivity,
accum: &mut [u64],
start: usize,
end: usize,
m_words: usize,
rng: &mut ChaCha8Rng,
) {
let ne = events.len();
let num_shots = end - start;
for i in 0..ne {
let [px, py, pz] = events.probs[i];
let p_event = px + py + pz;
if p_event == 0.0 {
continue;
}
if p_event >= 0.5 || num_shots < 32 {
for s in start..end {
let r: f64 = rand::Rng::random(rng);
if r < px {
let b = s * m_words;
xor_words(&mut accum[b..b + m_words], events.z_flip(i));
} else if r < px + py {
let b = s * m_words;
xor_words(&mut accum[b..b + m_words], events.x_flip(i));
xor_words(&mut accum[b..b + m_words], events.z_flip(i));
} else if r < p_event {
let b = s * m_words;
xor_words(&mut accum[b..b + m_words], events.x_flip(i));
}
}
} else {
let ln_1mp = (1.0 - p_event).ln();
let px_frac = px / p_event;
let pxy_frac = (px + py) / p_event;
let mut pos = start + geometric_sample(rng, ln_1mp);
while pos < end {
let r: f64 = rand::Rng::random(rng);
let b = pos * m_words;
if r < px_frac {
xor_words(&mut accum[b..b + m_words], events.z_flip(i));
} else if r < pxy_frac {
xor_words(&mut accum[b..b + m_words], events.x_flip(i));
xor_words(&mut accum[b..b + m_words], events.z_flip(i));
} else {
xor_words(&mut accum[b..b + m_words], events.x_flip(i));
}
pos += 1 + geometric_sample(rng, ln_1mp);
}
}
}
}
fn apply_noise_bulk_grouped(&mut self, accum: &mut [u64], num_shots: usize, m_words: usize) {
let num_events = self.events.len();
let e_words = num_events.div_ceil(64);
for tile_start in (0..num_shots).step_by(NOISE_LUT_TILE) {
let tile_end = (tile_start + NOISE_LUT_TILE).min(num_shots);
let tile_n = tile_end - tile_start;
let mut z_mask = vec![0u64; tile_n * e_words];
let mut x_mask = vec![0u64; tile_n * e_words];
for i in 0..num_events {
let [px, py, pz] = self.events.probs[i];
let p_event = px + py + pz;
if p_event == 0.0 {
continue;
}
let ew = i / 64;
let eb = 1u64 << (i % 64);
if p_event >= 0.5 || tile_n < 32 {
for s in 0..tile_n {
let r: f64 = rand::Rng::random(&mut self.rng);
if r < px {
z_mask[s * e_words + ew] |= eb;
} else if r < px + py {
z_mask[s * e_words + ew] |= eb;
x_mask[s * e_words + ew] |= eb;
} else if r < p_event {
x_mask[s * e_words + ew] |= eb;
}
}
} else {
let ln_1mp = (1.0 - p_event).ln();
let px_frac = px / p_event;
let pxy_frac = (px + py) / p_event;
let mut pos = geometric_sample(&mut self.rng, ln_1mp);
while pos < tile_n {
let r: f64 = rand::Rng::random(&mut self.rng);
if r < px_frac {
z_mask[pos * e_words + ew] |= eb;
} else if r < pxy_frac {
z_mask[pos * e_words + ew] |= eb;
x_mask[pos * e_words + ew] |= eb;
} else {
x_mask[pos * e_words + ew] |= eb;
}
pos += 1 + geometric_sample(&mut self.rng, ln_1mp);
}
}
}
let z_lut = self.z_lut.as_ref().unwrap();
let x_lut = self.x_lut.as_ref().unwrap();
for s in 0..tile_n {
let shot_base = (tile_start + s) * m_words;
let mask_base = s * e_words;
let shot_accum = &mut accum[shot_base..shot_base + m_words];
z_lut.apply_masked(shot_accum, &z_mask[mask_base..mask_base + e_words]);
x_lut.apply_masked(shot_accum, &x_mask[mask_base..mask_base + e_words]);
}
}
}
}
pub fn compile_noisy(
circuit: &Circuit,
noise: &NoiseModel,
seed: u64,
) -> Result<NoisyCompiledSampler> {
noise.ensure_pauli_only()?;
if circuit.num_qubits >= 4 {
let blocks = circuit.independent_subsystems();
if blocks.len() > 1 {
let max_block = blocks.iter().map(|b| b.len()).max().unwrap_or(0);
if max_block < circuit.num_qubits {
return compile_noisy_filtered(circuit, noise, &blocks, seed);
}
}
}
compile_noisy_monolithic(circuit, noise, seed)
}
fn compile_noisy_filtered(
circuit: &Circuit,
noise: &NoiseModel,
blocks: &[Vec<usize>],
seed: u64,
) -> Result<NoisyCompiledSampler> {
let noiseless = compile_measurements(circuit, seed)?;
let measurement_qubits: Vec<usize> = circuit
.instructions
.iter()
.filter_map(|inst| match inst {
Instruction::Measure { qubit, .. } => Some(*qubit),
_ => None,
})
.collect();
let num_measurements = measurement_qubits.len();
let m_words = num_measurements.div_ceil(64);
if num_measurements == 0 {
return Ok(NoisyCompiledSampler {
noiseless,
events: FlatNoiseSensitivity::new(1, 0),
num_measurements: 0,
rng: ChaCha8Rng::seed_from_u64(seed.wrapping_add(0xA01CE)),
z_lut: None,
x_lut: None,
#[cfg(feature = "gpu")]
gpu_noise_cache: None,
});
}
let mut qubit_to_block = vec![0usize; circuit.num_qubits];
let mut qubit_to_local = vec![0usize; circuit.num_qubits];
for (bi, block) in blocks.iter().enumerate() {
for (li, &q) in block.iter().enumerate() {
qubit_to_block[q] = bi;
qubit_to_local[q] = li;
}
}
let mut block_meas: Vec<Vec<usize>> = vec![Vec::new(); blocks.len()];
for (mi, &q) in measurement_qubits.iter().enumerate() {
block_meas[qubit_to_block[q]].push(mi);
}
let total_noise_events: usize = noise.after_gate.iter().map(|ops| ops.len()).sum();
let mut events = FlatNoiseSensitivity::new(m_words, total_noise_events);
let mut global_x_buf = vec![0u64; m_words];
let mut global_z_buf = vec![0u64; m_words];
for (bi, block) in blocks.iter().enumerate() {
let bm_list = &block_meas[bi];
if bm_list.is_empty() {
continue;
}
let bn = block.len();
let bm = bm_list.len();
let bm_words = bm.div_ceil(64);
let mut x_packed: Vec<Vec<u64>> = vec![vec![0u64; bm_words]; bn];
let mut z_packed: Vec<Vec<u64>> = vec![vec![0u64; bm_words]; bn];
let mut sign_packed: Vec<u64> = vec![0u64; bm_words];
for (local_mi, &global_mi) in bm_list.iter().enumerate() {
let q = measurement_qubits[global_mi];
let local_q = qubit_to_local[q];
z_packed[local_q][local_mi / 64] |= 1u64 << (local_mi % 64);
}
let block_gates: Vec<(usize, &Gate, SmallVec<[usize; 4]>)> = circuit
.instructions
.iter()
.enumerate()
.filter_map(|(idx, inst)| match inst {
Instruction::Gate { gate, targets } => {
if targets.iter().all(|&t| qubit_to_block[t] == bi) {
let local_targets: SmallVec<[usize; 4]> =
targets.iter().map(|&t| qubit_to_local[t]).collect();
Some((idx, gate, local_targets))
} else {
None
}
}
Instruction::Conditional { gate, targets, .. } => {
if targets.iter().all(|&t| qubit_to_block[t] == bi) {
let local_targets: SmallVec<[usize; 4]> =
targets.iter().map(|&t| qubit_to_local[t]).collect();
Some((idx, gate, local_targets))
} else {
None
}
}
_ => None,
})
.collect();
for (instr_idx, gate, local_targets) in block_gates.iter().rev() {
let noise_ops = &noise.after_gate[*instr_idx];
if !noise_ops.is_empty() {
for event in noise_ops {
let (px, py, pz) = event.pauli_probs();
let local_q = qubit_to_local[event.qubit()];
let has_any = x_packed[local_q].iter().any(|&w| w != 0)
|| z_packed[local_q].iter().any(|&w| w != 0);
if has_any {
global_x_buf.fill(0);
global_z_buf.fill(0);
for (local_mi, &global_mi) in bm_list.iter().enumerate() {
if (x_packed[local_q][local_mi / 64] >> (local_mi % 64)) & 1 != 0 {
global_x_buf[global_mi / 64] |= 1u64 << (global_mi % 64);
}
if (z_packed[local_q][local_mi / 64] >> (local_mi % 64)) & 1 != 0 {
global_z_buf[global_mi / 64] |= 1u64 << (global_mi % 64);
}
}
events.push(&global_x_buf, &global_z_buf, px, py, pz);
}
}
}
batch_propagate_backward(
&mut x_packed,
&mut z_packed,
&mut sign_packed,
gate,
local_targets.as_slice(),
bm_words,
);
}
}
let (z_lut, x_lut) = build_noise_luts(&events);
Ok(NoisyCompiledSampler {
noiseless,
events,
num_measurements,
rng: ChaCha8Rng::seed_from_u64(seed.wrapping_add(0xCAFE_BABE)),
z_lut,
x_lut,
#[cfg(feature = "gpu")]
gpu_noise_cache: None,
})
}
fn compile_noisy_monolithic(
circuit: &Circuit,
noise: &NoiseModel,
seed: u64,
) -> Result<NoisyCompiledSampler> {
let noiseless = compile_measurements(circuit, seed)?;
let n = circuit.num_qubits;
let gate_indices: Vec<(usize, &Gate, &[usize])> = circuit
.instructions
.iter()
.enumerate()
.filter_map(|(idx, inst)| match inst {
Instruction::Gate { gate, targets } => Some((idx, gate, targets.as_slice())),
Instruction::Conditional { gate, targets, .. } => Some((idx, gate, targets.as_slice())),
_ => None,
})
.collect();
let measurement_qubits: Vec<usize> = circuit
.instructions
.iter()
.filter_map(|inst| match inst {
Instruction::Measure { qubit, .. } => Some(*qubit),
_ => None,
})
.collect();
let num_measurements = measurement_qubits.len();
let m_words = num_measurements.div_ceil(64);
if num_measurements == 0 {
return Ok(NoisyCompiledSampler {
noiseless,
events: FlatNoiseSensitivity::new(m_words, 0),
num_measurements: 0,
rng: ChaCha8Rng::seed_from_u64(seed.wrapping_add(0xA01CE)),
z_lut: None,
x_lut: None,
#[cfg(feature = "gpu")]
gpu_noise_cache: None,
});
}
let mut x_packed: Vec<Vec<u64>> = vec![vec![0u64; m_words]; n];
let mut z_packed: Vec<Vec<u64>> = vec![vec![0u64; m_words]; n];
let mut sign_packed: Vec<u64> = vec![0u64; m_words];
for (mi, &q) in measurement_qubits.iter().enumerate() {
z_packed[q][mi / 64] |= 1u64 << (mi % 64);
}
let total_noise_events: usize = noise.after_gate.iter().map(|ops| ops.len()).sum();
let mut events = FlatNoiseSensitivity::new(m_words, total_noise_events);
for &(instr_idx, gate, targets) in gate_indices.iter().rev() {
let noise_ops = &noise.after_gate[instr_idx];
if !noise_ops.is_empty() {
for event in noise_ops {
let (px, py, pz) = event.pauli_probs();
let q = event.qubit();
let has_any =
x_packed[q].iter().any(|&w| w != 0) || z_packed[q].iter().any(|&w| w != 0);
if has_any {
events.push(&x_packed[q], &z_packed[q], px, py, pz);
}
}
}
batch_propagate_backward(
&mut x_packed,
&mut z_packed,
&mut sign_packed,
gate,
targets,
m_words,
);
}
let (z_lut, x_lut) = build_noise_luts(&events);
Ok(NoisyCompiledSampler {
noiseless,
events,
num_measurements,
rng: ChaCha8Rng::seed_from_u64(seed.wrapping_add(0xCAFE_BABE)),
z_lut,
x_lut,
#[cfg(feature = "gpu")]
gpu_noise_cache: None,
})
}
const FRAME_BATCH_SIZE: usize = 256;
struct ReferenceInfo {
outcomes: Vec<bool>,
is_random: Vec<bool>,
random_x_support: Vec<Vec<usize>>,
}
fn reference_simulation(circuit: &Circuit, seed: u64) -> Result<ReferenceInfo> {
let mut stab = StabilizerBackend::new(seed);
stab.init(circuit.num_qubits, circuit.num_classical_bits)?;
let meas_info: Vec<(usize, usize, usize)> = circuit
.instructions
.iter()
.enumerate()
.filter_map(|(i, inst)| match inst {
Instruction::Measure {
qubit,
classical_bit,
} => Some((i, *qubit, *classical_bit)),
_ => None,
})
.collect();
let num_meas = meas_info.len();
if num_meas == 0 {
return Ok(ReferenceInfo {
outcomes: Vec::new(),
is_random: Vec::new(),
random_x_support: Vec::new(),
});
}
let first_meas_idx = meas_info[0].0;
let all_at_end = meas_info
.iter()
.enumerate()
.all(|(i, &(inst_idx, _, _))| inst_idx == first_meas_idx + i);
if all_at_end {
stab.apply_gates_only(&circuit.instructions[..first_meas_idx])?;
let measurements: Vec<(usize, usize)> = meas_info
.iter()
.map(|&(_, qubit, classical_bit)| (qubit, classical_bit))
.collect();
let (is_random, random_x_support, outcomes) = stab.batch_measure_ref_info(&measurements);
return Ok(ReferenceInfo {
outcomes,
is_random,
random_x_support,
});
}
let mut is_random = Vec::with_capacity(num_meas);
let mut random_x_support: Vec<Vec<usize>> = Vec::with_capacity(num_meas);
let mut seg_start = 0usize;
for &(meas_inst_idx, qubit, classical_bit) in &meas_info {
if seg_start < meas_inst_idx {
stab.apply_gates_only(&circuit.instructions[seg_start..meas_inst_idx])?;
}
let (meas_random, support) = stab.apply_measure_with_info(qubit, classical_bit);
is_random.push(meas_random);
random_x_support.push(support);
seg_start = meas_inst_idx + 1;
}
if seg_start < circuit.instructions.len() {
stab.apply_gates_only(&circuit.instructions[seg_start..])?;
}
let outcomes: Vec<bool> = meas_info
.iter()
.map(|&(_, _, cbit)| stab.classical_results()[cbit])
.collect();
Ok(ReferenceInfo {
outcomes,
is_random,
random_x_support,
})
}
#[inline(always)]
fn apply_gate_to_frame(
gate: &Gate,
targets: &[usize],
x_frame: &mut [Vec<u64>],
z_frame: &mut [Vec<u64>],
bw: usize,
) {
match gate {
Gate::H => {
let q = targets[0];
std::mem::swap(&mut x_frame[q], &mut z_frame[q]);
}
Gate::S | Gate::Sdg => {
let q = targets[0];
for w in 0..bw {
z_frame[q][w] ^= x_frame[q][w];
}
}
Gate::SX | Gate::SXdg => {
let q = targets[0];
for w in 0..bw {
x_frame[q][w] ^= z_frame[q][w];
}
}
Gate::X | Gate::Y | Gate::Z | Gate::Id => {}
Gate::Cx => {
let ctrl = targets[0];
let tgt = targets[1];
for w in 0..bw {
x_frame[tgt][w] ^= x_frame[ctrl][w];
z_frame[ctrl][w] ^= z_frame[tgt][w];
}
}
Gate::Cz => {
let q0 = targets[0];
let q1 = targets[1];
for w in 0..bw {
z_frame[q0][w] ^= x_frame[q1][w];
z_frame[q1][w] ^= x_frame[q0][w];
}
}
Gate::Swap => {
let q0 = targets[0];
let q1 = targets[1];
x_frame.swap(q0, q1);
z_frame.swap(q0, q1);
}
_ => {
debug_assert!(
false,
"apply_gate_to_frame: unhandled Clifford gate {:?}",
gate
);
}
}
}
fn run_shots_noisy_frame(
circuit: &Circuit,
noise: &NoiseModel,
num_shots: usize,
seed: u64,
) -> Result<ShotsResult> {
let n = circuit.num_qubits;
let num_classical = circuit.num_classical_bits;
let ref_info = reference_simulation(circuit, seed)?;
let classical_bit_order: Vec<usize> = circuit
.instructions
.iter()
.filter_map(|inst| match inst {
Instruction::Measure { classical_bit, .. } => Some(*classical_bit),
_ => None,
})
.collect();
let num_measurements = classical_bit_order.len();
let m_words = num_measurements.div_ceil(64);
let mut all_packed = vec![0u64; num_shots * m_words];
let mut rng = ChaCha8Rng::seed_from_u64(seed.wrapping_add(0xFAAB_E001));
for batch_start in (0..num_shots).step_by(FRAME_BATCH_SIZE) {
let batch_end = (batch_start + FRAME_BATCH_SIZE).min(num_shots);
let batch_n = batch_end - batch_start;
let bw = batch_n.div_ceil(64);
let mut x_frame: Vec<Vec<u64>> = vec![vec![0u64; bw]; n];
let mut z_frame: Vec<Vec<u64>> = vec![vec![0u64; bw]; n];
let mut meas_idx = 0usize;
for (idx, inst) in circuit.instructions.iter().enumerate() {
match inst {
Instruction::Gate { gate, targets }
| Instruction::Conditional { gate, targets, .. } => {
apply_gate_to_frame(gate, targets.as_slice(), &mut x_frame, &mut z_frame, bw);
for event in &noise.after_gate[idx] {
let (px, py, pz) = event.pauli_probs();
let q = event.qubit();
let p_event = px + py + pz;
if p_event == 0.0 {
continue;
}
let px_frac = px / p_event;
let pxy_frac = (px + py) / p_event;
if p_event < 0.5 && batch_n >= 32 {
let ln_1mp = (1.0 - p_event).ln();
let mut pos = geometric_sample(&mut rng, ln_1mp);
while pos < batch_n {
let r: f64 = rand::Rng::random(&mut rng);
let bit = 1u64 << (pos % 64);
let w = pos / 64;
if r < px_frac {
x_frame[q][w] ^= bit;
} else if r < pxy_frac {
x_frame[q][w] ^= bit;
z_frame[q][w] ^= bit;
} else {
z_frame[q][w] ^= bit;
}
pos += 1 + geometric_sample(&mut rng, ln_1mp);
}
} else {
for s in 0..batch_n {
let r: f64 = rand::Rng::random(&mut rng);
if r < px {
x_frame[q][s / 64] ^= 1u64 << (s % 64);
} else if r < px + py {
x_frame[q][s / 64] ^= 1u64 << (s % 64);
z_frame[q][s / 64] ^= 1u64 << (s % 64);
} else if r < p_event {
z_frame[q][s / 64] ^= 1u64 << (s % 64);
}
}
}
}
}
Instruction::Measure {
qubit,
classical_bit: _,
} => {
if ref_info.is_random[meas_idx] {
let support = &ref_info.random_x_support[meas_idx];
#[allow(clippy::needless_range_loop)]
for w in 0..bw {
let random_word: u64 = rand::Rng::random(&mut rng);
let mask = if w == bw - 1 && batch_n % 64 != 0 {
random_word & ((1u64 << (batch_n % 64)) - 1)
} else {
random_word
};
if mask != 0 {
for &q in support {
x_frame[q][w] ^= mask;
}
}
}
}
let ref_bit = ref_info.outcomes[meas_idx];
let mi_word = meas_idx / 64;
let mi_bit = meas_idx % 64;
#[allow(clippy::needless_range_loop)]
for w in 0..bw {
let frame_word = x_frame[*qubit][w];
let effective = if ref_bit { !frame_word } else { frame_word };
let num_bits = if w == bw - 1 && batch_n % 64 != 0 {
batch_n % 64
} else {
64
};
let mask = if num_bits == 64 {
effective
} else {
effective & ((1u64 << num_bits) - 1)
};
let mut bits = mask;
while bits != 0 {
let s = bits.trailing_zeros() as usize;
let gs = batch_start + w * 64 + s;
all_packed[gs * m_words + mi_word] |= 1u64 << mi_bit;
bits &= bits - 1;
}
}
meas_idx += 1;
}
_ => {}
}
}
}
let shots = unpack_and_remap(
&all_packed,
m_words,
num_shots,
&classical_bit_order,
num_classical,
);
Ok(ShotsResult {
shots,
num_classical_bits: circuit.num_classical_bits,
})
}
pub fn run_shots_noisy(
circuit: &Circuit,
noise: &NoiseModel,
num_shots: usize,
seed: u64,
) -> Result<ShotsResult> {
noise.ensure_pauli_only()?;
if !circuit.is_clifford_only() {
return run_shots_noisy_brute_with(
|s| Box::new(StabilizerBackend::new(s)),
circuit,
noise,
num_shots,
seed,
);
}
if !super::supports_compiled_measurement_sampling(circuit) {
return run_shots_noisy_brute_with(
|s| Box::new(StabilizerBackend::new(s)),
circuit,
noise,
num_shots,
seed,
);
}
if num_shots >= 1000 {
if let Ok(sampler) = super::homological::HomologicalSampler::compile(circuit, noise, seed) {
return super::homological::run_shots_homological_inner(sampler, circuit, num_shots);
}
}
let n = circuit.num_qubits;
let gate_count = circuit
.instructions
.iter()
.filter(|i| {
matches!(
i,
Instruction::Gate { .. } | Instruction::Conditional { .. }
)
})
.count();
let depth_ratio = gate_count as f64 / n.max(1) as f64;
let use_frame = depth_ratio < 3.0 || (n >= 200 && depth_ratio < 5.0);
if use_frame {
run_shots_noisy_frame(circuit, noise, num_shots, seed)
} else {
run_shots_noisy_compiled(circuit, noise, num_shots, seed)
}
}
fn finish_noisy_compiled_run(
mut sampler: NoisyCompiledSampler,
circuit: &Circuit,
num_shots: usize,
) -> Result<ShotsResult> {
let classical_bit_order: Vec<usize> = circuit
.instructions
.iter()
.filter_map(|inst| match inst {
Instruction::Measure { classical_bit, .. } => Some(*classical_bit),
_ => None,
})
.collect();
let num_classical = circuit.num_classical_bits;
let packed = sampler.try_sample_bulk_packed(num_shots)?;
let shots = unpack_and_remap_packed(&packed, num_shots, &classical_bit_order, num_classical);
Ok(ShotsResult {
shots,
num_classical_bits: circuit.num_classical_bits,
})
}
fn run_shots_noisy_compiled(
circuit: &Circuit,
noise: &NoiseModel,
num_shots: usize,
seed: u64,
) -> Result<ShotsResult> {
let sampler = compile_noisy(circuit, noise, seed)?;
finish_noisy_compiled_run(sampler, circuit, num_shots)
}
#[cfg(feature = "gpu")]
pub(crate) fn run_shots_noisy_with_gpu(
circuit: &Circuit,
noise: &NoiseModel,
num_shots: usize,
seed: u64,
context: std::sync::Arc<crate::gpu::GpuContext>,
) -> Result<ShotsResult> {
noise.ensure_pauli_only()?;
if !circuit.is_clifford_only() {
return run_shots_noisy_brute_with(
|s| Box::new(StabilizerBackend::new(s)),
circuit,
noise,
num_shots,
seed,
);
}
if !super::supports_compiled_measurement_sampling(circuit) {
return run_shots_noisy_brute_with(
|s| Box::new(StabilizerBackend::new(s)),
circuit,
noise,
num_shots,
seed,
);
}
if num_shots >= 1000 {
if let Ok(sampler) = super::homological::HomologicalSampler::compile(circuit, noise, seed) {
return super::homological::run_shots_homological_inner(sampler, circuit, num_shots);
}
}
let n = circuit.num_qubits;
let gate_count = circuit
.instructions
.iter()
.filter(|i| {
matches!(
i,
Instruction::Gate { .. } | Instruction::Conditional { .. }
)
})
.count();
let depth_ratio = gate_count as f64 / n.max(1) as f64;
let use_frame = depth_ratio < 3.0 || (n >= 200 && depth_ratio < 5.0);
if use_frame {
return run_shots_noisy_frame(circuit, noise, num_shots, seed);
}
let sampler = compile_noisy(circuit, noise, seed)?.with_gpu(context);
finish_noisy_compiled_run(sampler, circuit, num_shots)
}
pub(crate) fn run_shots_noisy_brute_with(
backend_factory: impl Fn(u64) -> Box<dyn Backend>,
circuit: &Circuit,
noise: &NoiseModel,
num_shots: usize,
seed: u64,
) -> Result<ShotsResult> {
let mut shots = Vec::with_capacity(num_shots);
for i in 0..num_shots {
let shot_seed = seed.wrapping_add(i as u64);
let mut rng = ChaCha8Rng::seed_from_u64(shot_seed);
let mut backend = backend_factory(shot_seed);
backend.init(circuit.num_qubits, circuit.num_classical_bits)?;
for (idx, instr) in circuit.instructions.iter().enumerate() {
backend.apply(instr)?;
let noise_events = &noise.after_gate[idx];
for event in noise_events {
let (px, py, pz) = event.pauli_probs();
let q = event.qubit();
let r: f64 = rand::Rng::random(&mut rng);
if r < px {
backend.apply(&Instruction::Gate {
gate: Gate::X,
targets: SmallVec::from_elem(q, 1),
})?;
} else if r < px + py {
backend.apply(&Instruction::Gate {
gate: Gate::Y,
targets: SmallVec::from_elem(q, 1),
})?;
} else if r < px + py + pz {
backend.apply(&Instruction::Gate {
gate: Gate::Z,
targets: SmallVec::from_elem(q, 1),
})?;
}
}
}
shots.push(backend.classical_results().to_vec());
}
Ok(ShotsResult {
shots,
num_classical_bits: circuit.num_classical_bits,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::circuits;
#[test]
fn noisy_ghz_produces_varied_outcomes() {
let n = 10;
let mut circuit = circuits::ghz_circuit(n);
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
let result = run_shots_noisy(&circuit, &noise, 1000, 42).unwrap();
assert_eq!(result.shots.len(), 1000);
assert_eq!(result.shots[0].len(), n);
let all_zero: Vec<bool> = vec![false; n];
let all_one: Vec<bool> = vec![true; n];
let num_00 = result.shots.iter().filter(|s| **s == all_zero).count();
let num_11 = result.shots.iter().filter(|s| **s == all_one).count();
let num_other = 1000 - num_00 - num_11;
assert!(num_other > 0, "noise should produce non-GHZ outcomes");
assert!(num_00 > 100, "should still have many |00...0> outcomes");
assert!(num_11 > 100, "should still have many |11...1> outcomes");
}
#[test]
fn zero_noise_matches_noiseless() {
let n = 5;
let mut circuit = circuits::ghz_circuit(n);
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.0);
let result = run_shots_noisy(&circuit, &noise, 100, 42).unwrap();
for shot in &result.shots {
let all_same = shot.iter().all(|&b| b == shot[0]);
assert!(all_same, "GHZ with zero noise must be all-0 or all-1");
}
}
#[test]
fn noise_model_length_matches_circuit() {
let n = 10;
let mut circuit = circuits::ghz_circuit(n);
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.001);
assert_eq!(noise.after_gate.len(), circuit.instructions.len());
}
#[test]
fn compiled_noisy_stats_match_brute_force() {
let n = 10;
let mut circuit = circuits::ghz_circuit(n);
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
let num_shots = 10000;
let brute = run_shots_noisy_brute_with(
|s| Box::new(StabilizerBackend::new(s)),
&circuit,
&noise,
num_shots,
42,
)
.unwrap();
let compiled = run_shots_noisy_compiled(&circuit, &noise, num_shots, 42).unwrap();
let count_all_same = |shots: &[Vec<bool>]| -> usize {
shots
.iter()
.filter(|s| s.iter().all(|&b| b == s[0]))
.count()
};
let brute_coherent = count_all_same(&brute.shots);
let compiled_coherent = count_all_same(&compiled.shots);
let brute_frac = brute_coherent as f64 / num_shots as f64;
let compiled_frac = compiled_coherent as f64 / num_shots as f64;
assert!(
(brute_frac - compiled_frac).abs() < 0.05,
"coherent fraction should be similar: brute={brute_frac:.3}, compiled={compiled_frac:.3}"
);
let count_errors = |shots: &[Vec<bool>]| -> usize {
shots
.iter()
.filter(|s| !s.iter().all(|&b| b == s[0]))
.count()
};
let brute_errors = count_errors(&brute.shots);
let compiled_errors = count_errors(&compiled.shots);
assert!(
brute_errors > 0 && compiled_errors > 0,
"both should produce errors"
);
}
#[test]
fn compiled_noisy_clifford_produces_noise() {
let n = 20;
let circuit_base = circuits::clifford_heavy_circuit(n, 10, 42);
let mut circuit = circuit_base;
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
let result = run_shots_noisy(&circuit, &noise, 100, 42).unwrap();
assert_eq!(result.shots.len(), 100);
assert_eq!(result.shots[0].len(), n);
let unique: std::collections::HashSet<Vec<bool>> = result.shots.iter().cloned().collect();
assert!(unique.len() > 1, "noise should produce varied outcomes");
}
#[test]
fn compile_noisy_rejects_reset_circuits() {
let mut circuit = Circuit::new(1, 1);
circuit.add_reset(0);
circuit.add_measure(0, 0);
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
assert!(compile_noisy(&circuit, &noise, 42).is_err());
}
#[test]
fn compile_noisy_rejects_conditionals() {
let mut circuit = Circuit::new(2, 2);
circuit.add_gate(Gate::H, &[0]);
circuit.add_measure(0, 0);
circuit.instructions.push(Instruction::Conditional {
condition: crate::circuit::ClassicalCondition::BitIsOne(0),
gate: Gate::X,
targets: crate::circuit::smallvec![1],
});
circuit.add_measure(1, 1);
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
assert!(compile_noisy(&circuit, &noise, 42).is_err());
}
#[test]
fn run_shots_noisy_handles_reset_circuits() {
let mut circuit = Circuit::new(1, 1);
circuit.add_gate(Gate::X, &[0]);
circuit.add_reset(0);
circuit.add_measure(0, 0);
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.0);
let result = run_shots_noisy(&circuit, &noise, 32, 42).unwrap();
assert!(result.shots.iter().all(|shot| !shot[0]));
}
#[test]
fn run_shots_noisy_handles_conditionals() {
let mut circuit = Circuit::new(2, 2);
circuit.add_gate(Gate::H, &[0]);
circuit.add_measure(0, 0);
circuit.instructions.push(Instruction::Conditional {
condition: crate::circuit::ClassicalCondition::BitIsOne(0),
gate: Gate::X,
targets: crate::circuit::smallvec![1],
});
circuit.add_measure(1, 1);
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.0);
let result = run_shots_noisy(&circuit, &noise, 256, 42).unwrap();
assert!(result.shots.iter().all(|shot| shot[0] == shot[1]));
}
#[test]
fn frame_ghz_100q_produces_varied_outcomes() {
let n = 100;
let mut circuit = circuits::ghz_circuit(n);
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
let result = run_shots_noisy_frame(&circuit, &noise, 1000, 42).unwrap();
assert_eq!(result.shots.len(), 1000);
assert_eq!(result.shots[0].len(), n);
let all_zero: Vec<bool> = vec![false; n];
let all_one: Vec<bool> = vec![true; n];
let num_00 = result.shots.iter().filter(|s| **s == all_zero).count();
let num_11 = result.shots.iter().filter(|s| **s == all_one).count();
let num_other = 1000 - num_00 - num_11;
assert!(num_other > 0, "noise should produce non-GHZ outcomes");
assert!(num_00 > 50, "should still have many |00...0> outcomes");
assert!(num_11 > 50, "should still have many |11...1> outcomes");
}
#[test]
fn frame_zero_noise_matches_noiseless_100q() {
let n = 100;
let mut circuit = circuits::ghz_circuit(n);
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.0);
let result = run_shots_noisy_frame(&circuit, &noise, 100, 42).unwrap();
for shot in &result.shots {
let all_same = shot.iter().all(|&b| b == shot[0]);
assert!(all_same, "GHZ with zero noise must be all-0 or all-1");
}
}
#[test]
fn frame_stats_match_compiled_ghz() {
let n = 100;
let mut circuit = circuits::ghz_circuit(n);
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
let num_shots = 5000;
let frame = run_shots_noisy_frame(&circuit, &noise, num_shots, 42).unwrap();
let compiled = run_shots_noisy_compiled(&circuit, &noise, num_shots, 42).unwrap();
let count_coherent = |shots: &[Vec<bool>]| -> usize {
shots
.iter()
.filter(|s| s.iter().all(|&b| b == s[0]))
.count()
};
let frame_coh = count_coherent(&frame.shots) as f64 / num_shots as f64;
let compiled_coh = count_coherent(&compiled.shots) as f64 / num_shots as f64;
assert!(
(frame_coh - compiled_coh).abs() < 0.05,
"coherent fraction should be similar: frame={frame_coh:.3}, compiled={compiled_coh:.3}"
);
}
#[test]
fn frame_clifford_100q_produces_noise() {
let n = 100;
let circuit_base = circuits::clifford_heavy_circuit(n, 10, 42);
let mut circuit = circuit_base;
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
let result = run_shots_noisy_frame(&circuit, &noise, 100, 42).unwrap();
assert_eq!(result.shots.len(), 100);
assert_eq!(result.shots[0].len(), n);
let unique: std::collections::HashSet<Vec<bool>> = result.shots.iter().cloned().collect();
assert!(unique.len() > 1, "noise should produce varied outcomes");
}
#[test]
fn filtered_noisy_bell_pairs_matches_monolithic() {
let n_pairs = 50;
let n = n_pairs * 2;
let mut circuit = circuits::independent_bell_pairs(n_pairs);
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
let seed = 42u64;
let filtered =
compile_noisy_filtered(&circuit, &noise, &circuit.independent_subsystems(), seed)
.unwrap();
let monolithic = compile_noisy_monolithic(&circuit, &noise, seed).unwrap();
assert_eq!(filtered.num_measurements, monolithic.num_measurements);
assert_eq!(filtered.events.len(), monolithic.events.len());
let mut filtered = filtered;
let mut monolithic = monolithic;
let num_shots = 10_000;
let shots_f = filtered.sample_bulk(num_shots);
let shots_m = monolithic.sample_bulk(num_shots);
assert_eq!(shots_f.len(), num_shots);
assert_eq!(shots_m.len(), num_shots);
let mut agree_f = 0usize;
let mut agree_m = 0usize;
for shot in &shots_f {
for pair in shot.chunks(2) {
if pair[0] == pair[1] {
agree_f += 1;
}
}
}
for shot in &shots_m {
for pair in shot.chunks(2) {
if pair[0] == pair[1] {
agree_m += 1;
}
}
}
let total_pairs = num_shots * n_pairs;
let agree_rate_f = agree_f as f64 / total_pairs as f64;
let agree_rate_m = agree_m as f64 / total_pairs as f64;
assert!(
agree_rate_f > 0.95,
"filtered agreement rate {agree_rate_f:.4} should be >0.95 with low noise"
);
assert!(
agree_rate_m > 0.95,
"monolithic agreement rate {agree_rate_m:.4} should be >0.95 with low noise"
);
assert!(
(agree_rate_f - agree_rate_m).abs() < 0.02,
"filtered ({agree_rate_f:.4}) and monolithic ({agree_rate_m:.4}) should have similar agreement rates"
);
}
#[cfg(feature = "gpu")]
#[test]
fn compiled_noisy_with_stub_gpu_matches_cpu_below_threshold() {
let n = 12;
let mut circuit = circuits::ghz_circuit(n);
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
let shots = 20_000;
let mut cpu = compile_noisy(&circuit, &noise, 42).unwrap();
let cpu_marginals = cpu.sample_marginals(shots);
let mut gpu = compile_noisy(&circuit, &noise, 42)
.unwrap()
.with_gpu(crate::gpu::GpuContext::stub_for_tests());
let gpu_marginals = gpu.sample_marginals(shots);
for (idx, (cpu_p1, gpu_p1)) in cpu_marginals.iter().zip(gpu_marginals.iter()).enumerate() {
assert!(
(cpu_p1 - gpu_p1).abs() < 0.03,
"marginal[{idx}] diverged too much: cpu={cpu_p1}, gpu={gpu_p1}"
);
}
}
#[cfg(feature = "gpu")]
#[test]
fn compiled_noisy_with_stub_gpu_low_rank_above_threshold_uses_cpu_fallback() {
let shots = crate::gpu::bts_min_shots().max(1);
let n = 12;
let mut circuit = circuits::ghz_circuit(n);
circuit.num_classical_bits = n;
for i in 0..n {
circuit.add_measure(i, i);
}
let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
let mut gpu = compile_noisy(&circuit, &noise, 42)
.unwrap()
.with_gpu(crate::gpu::GpuContext::stub_for_tests());
assert!(!gpu.noiseless.should_use_gpu_bts(shots));
let counts = gpu.sample_counts(shots);
let total: u64 = counts.values().sum();
assert_eq!(total, shots as u64);
}
}