1use num_complex::Complex64;
2use rand::SeedableRng;
3use rand_chacha::ChaCha8Rng;
4use smallvec::smallvec;
5
6use crate::backend::stabilizer::StabilizerBackend;
7use crate::backend::Backend;
8use crate::circuit::{Circuit, Instruction, SmallVec};
9use crate::error::Result;
10use crate::gates::Gate;
11use crate::sim::compiled::{
12 batch_propagate_backward, compile_measurements, xor_words, PackedShots,
13};
14use crate::sim::ShotsResult;
15
16#[derive(Debug, Clone)]
23pub enum NoiseChannel {
24 Pauli { px: f64, py: f64, pz: f64 },
26 Depolarizing { p: f64 },
29 AmplitudeDamping { gamma: f64 },
31 PhaseDamping { gamma: f64 },
34 ThermalRelaxation { t1: f64, t2: f64, gate_time: f64 },
38 TwoQubitDepolarizing { p: f64 },
41 Custom { kraus: Vec<[[Complex64; 2]; 2]> },
49}
50
51impl NoiseChannel {
52 pub fn as_pauli(&self) -> Option<(f64, f64, f64)> {
53 match self {
54 NoiseChannel::Pauli { px, py, pz } => Some((*px, *py, *pz)),
55 NoiseChannel::Depolarizing { p } => {
56 let pp = p / 3.0;
57 Some((pp, pp, pp))
58 }
59 _ => None,
60 }
61 }
62
63 pub fn is_pauli(&self) -> bool {
64 matches!(
65 self,
66 NoiseChannel::Pauli { .. } | NoiseChannel::Depolarizing { .. }
67 )
68 }
69
70 pub fn is_exactly_samplable(&self) -> bool {
72 self.validate().is_ok()
73 }
74
75 pub fn validate(&self) -> Result<()> {
77 match self {
78 NoiseChannel::Pauli { px, py, pz } => {
79 validate_probability("px", *px)?;
80 validate_probability("py", *py)?;
81 validate_probability("pz", *pz)?;
82 if *px + *py + *pz > 1.0 + 1e-12 {
83 return Err(crate::error::PrismError::InvalidParameter {
84 message: "Pauli channel probabilities must sum to <= 1".into(),
85 });
86 }
87 }
88 NoiseChannel::Depolarizing { p } | NoiseChannel::TwoQubitDepolarizing { p } => {
89 validate_probability("p", *p)?;
90 }
91 NoiseChannel::AmplitudeDamping { gamma } | NoiseChannel::PhaseDamping { gamma } => {
92 validate_probability("gamma", *gamma)?;
93 }
94 NoiseChannel::ThermalRelaxation { t1, t2, gate_time } => {
95 if !t1.is_finite() || *t1 <= 0.0 {
96 return Err(crate::error::PrismError::InvalidParameter {
97 message: "thermal relaxation t1 must be finite and positive".into(),
98 });
99 }
100 if !t2.is_finite() || *t2 <= 0.0 {
101 return Err(crate::error::PrismError::InvalidParameter {
102 message: "thermal relaxation t2 must be finite and positive".into(),
103 });
104 }
105 if !gate_time.is_finite() || *gate_time < 0.0 {
106 return Err(crate::error::PrismError::InvalidParameter {
107 message: "thermal relaxation gate_time must be finite and non-negative"
108 .into(),
109 });
110 }
111 }
112 NoiseChannel::Custom { kraus } => {
113 if kraus.is_empty() {
114 return Err(crate::error::PrismError::InvalidParameter {
115 message: "Custom Kraus channel must contain at least one operator".into(),
116 });
117 }
118 for (op_idx, op) in kraus.iter().enumerate() {
119 for (row_idx, row) in op.iter().enumerate() {
120 for (col_idx, val) in row.iter().enumerate() {
121 if !val.re.is_finite() || !val.im.is_finite() {
122 return Err(crate::error::PrismError::InvalidParameter {
123 message: format!(
124 "Custom Kraus operator {op_idx} entry ({row_idx},{col_idx}) must be finite"
125 ),
126 });
127 }
128 }
129 }
130 }
131 }
132 }
133 Ok(())
134 }
135}
136
137fn validate_probability(name: &str, value: f64) -> Result<()> {
138 if !value.is_finite() || !(0.0..=1.0).contains(&value) {
139 return Err(crate::error::PrismError::InvalidParameter {
140 message: format!("{name} must be finite and in [0, 1]"),
141 });
142 }
143 Ok(())
144}
145
146#[derive(Debug, Clone)]
147pub struct NoiseEvent {
148 pub channel: NoiseChannel,
149 pub qubits: SmallVec<[usize; 2]>,
150}
151
152impl NoiseEvent {
153 pub fn pauli(qubit: usize, px: f64, py: f64, pz: f64) -> Self {
154 Self {
155 channel: NoiseChannel::Pauli { px, py, pz },
156 qubits: smallvec![qubit],
157 }
158 }
159
160 pub fn qubit(&self) -> usize {
161 self.qubits[0]
162 }
163
164 pub fn pauli_probs(&self) -> (f64, f64, f64) {
172 self.channel
173 .as_pauli()
174 .expect("pauli_probs called on non-Pauli channel; caller must use ensure_pauli_only")
175 }
176}
177
178#[derive(Debug, Clone)]
179pub struct ReadoutError {
180 pub p01: f64,
181 pub p10: f64,
182}
183
184pub struct NoiseModel {
185 pub after_gate: Vec<Vec<NoiseEvent>>,
186 pub readout: Vec<Option<ReadoutError>>,
187}
188
189impl NoiseModel {
190 pub fn uniform_depolarizing(circuit: &Circuit, p: f64) -> Self {
191 let px = p / 3.0;
192 let py = p / 3.0;
193 let pz = p / 3.0;
194
195 let mut after_gate = Vec::with_capacity(circuit.instructions.len());
196 for instr in &circuit.instructions {
197 match instr {
198 Instruction::Gate { targets, .. } => {
199 let ops: Vec<NoiseEvent> = targets
200 .iter()
201 .map(|&q| NoiseEvent::pauli(q, px, py, pz))
202 .collect();
203 after_gate.push(ops);
204 }
205 _ => {
206 after_gate.push(Vec::new());
207 }
208 }
209 }
210
211 Self {
212 after_gate,
213 readout: vec![None; circuit.num_classical_bits],
214 }
215 }
216
217 pub fn with_amplitude_damping(circuit: &Circuit, gamma: f64) -> Self {
218 let mut after_gate = Vec::with_capacity(circuit.instructions.len());
219 for instr in &circuit.instructions {
220 match instr {
221 Instruction::Gate { targets, .. } => {
222 let ops: Vec<NoiseEvent> = targets
223 .iter()
224 .map(|&q| NoiseEvent {
225 channel: NoiseChannel::AmplitudeDamping { gamma },
226 qubits: smallvec![q],
227 })
228 .collect();
229 after_gate.push(ops);
230 }
231 _ => {
232 after_gate.push(Vec::new());
233 }
234 }
235 }
236
237 Self {
238 after_gate,
239 readout: vec![None; circuit.num_classical_bits],
240 }
241 }
242
243 pub fn with_readout_error(&mut self, p01: f64, p10: f64) -> &mut Self {
244 self.readout.fill(Some(ReadoutError { p01, p10 }));
245 self
246 }
247
248 pub fn is_pauli_only(&self) -> bool {
249 self.after_gate
250 .iter()
251 .flat_map(|events| events.iter())
252 .all(|e| e.channel.is_pauli())
253 && self.readout.iter().all(|r| r.is_none())
254 }
255
256 pub fn has_noise(&self) -> bool {
257 self.after_gate.iter().any(|events| !events.is_empty())
258 || self.readout.iter().any(|r| r.is_some())
259 }
260
261 pub fn ensure_pauli_only(&self) -> Result<()> {
265 if !self.is_pauli_only() {
266 return Err(crate::error::PrismError::IncompatibleBackend {
267 backend: "compiled stabilizer sampler".into(),
268 reason: "non-Pauli noise channels (amplitude damping, phase damping, \
269 thermal relaxation, custom Kraus) or readout errors require \
270 the trajectory engine; use a non-stabilizer backend or a \
271 Pauli-only noise model"
272 .into(),
273 });
274 }
275 Ok(())
276 }
277
278 pub fn validate(&self) -> Result<()> {
280 for (instr_idx, events) in self.after_gate.iter().enumerate() {
281 for (event_idx, event) in events.iter().enumerate() {
282 event.channel.validate().map_err(|err| {
283 crate::error::PrismError::InvalidParameter {
284 message: format!(
285 "noise event {event_idx} after instruction {instr_idx}: {err}"
286 ),
287 }
288 })?;
289
290 let expected_qubits = match &event.channel {
291 NoiseChannel::TwoQubitDepolarizing { .. } => 2,
292 _ => 1,
293 };
294 if event.qubits.len() != expected_qubits {
295 return Err(crate::error::PrismError::InvalidParameter {
296 message: format!(
297 "noise event {event_idx} after instruction {instr_idx}: expected {expected_qubits} qubit(s), got {}",
298 event.qubits.len()
299 ),
300 });
301 }
302 }
303 }
304
305 for (bit_idx, readout) in self.readout.iter().enumerate() {
306 if let Some(readout) = readout {
307 validate_probability("readout p01", readout.p01).map_err(|err| {
308 crate::error::PrismError::InvalidParameter {
309 message: format!("readout error {bit_idx}: {err}"),
310 }
311 })?;
312 validate_probability("readout p10", readout.p10).map_err(|err| {
313 crate::error::PrismError::InvalidParameter {
314 message: format!("readout error {bit_idx}: {err}"),
315 }
316 })?;
317 }
318 }
319
320 Ok(())
321 }
322}
323
324struct FlatNoiseSensitivity {
325 x_data: Vec<u64>,
326 z_data: Vec<u64>,
327 probs: Vec<[f64; 3]>,
328 m_words: usize,
329}
330
331impl FlatNoiseSensitivity {
332 fn new(m_words: usize, capacity: usize) -> Self {
333 Self {
334 x_data: Vec::with_capacity(capacity * m_words),
335 z_data: Vec::with_capacity(capacity * m_words),
336 probs: Vec::with_capacity(capacity),
337 m_words,
338 }
339 }
340
341 fn push(&mut self, x_flip: &[u64], z_flip: &[u64], px: f64, py: f64, pz: f64) {
342 self.x_data.extend_from_slice(x_flip);
343 self.z_data.extend_from_slice(z_flip);
344 self.probs.push([px, py, pz]);
345 }
346
347 #[inline(always)]
348 fn len(&self) -> usize {
349 self.probs.len()
350 }
351
352 #[inline(always)]
353 fn is_empty(&self) -> bool {
354 self.probs.is_empty()
355 }
356
357 #[inline(always)]
358 fn x_flip(&self, idx: usize) -> &[u64] {
359 let off = idx * self.m_words;
360 &self.x_data[off..off + self.m_words]
361 }
362
363 #[inline(always)]
364 fn z_flip(&self, idx: usize) -> &[u64] {
365 let off = idx * self.m_words;
366 &self.z_data[off..off + self.m_words]
367 }
368}
369
370#[inline(always)]
371fn geometric_sample(rng: &mut ChaCha8Rng, ln_1mp: f64) -> usize {
372 let u: f64 = 1.0 - rand::Rng::random::<f64>(rng);
373 (u.ln() / ln_1mp) as usize
374}
375
376#[cfg(feature = "gpu")]
377fn geometric_sample_xoshiro(
378 rng: &mut crate::sim::compiled::rng::Xoshiro256PlusPlus,
379 ln_1mp: f64,
380) -> usize {
381 let u: f64 = 1.0 - rng.next_f64();
383 (u.ln() / ln_1mp) as usize
384}
385
386#[cfg(feature = "gpu")]
390trait EventRng {
391 fn uniform(&mut self) -> f64;
392 fn geom(&mut self, ln_1mp: f64) -> usize;
393}
394
395#[cfg(feature = "gpu")]
396impl EventRng for ChaCha8Rng {
397 #[inline]
398 fn uniform(&mut self) -> f64 {
399 rand::Rng::random::<f64>(self)
400 }
401 #[inline]
402 fn geom(&mut self, ln_1mp: f64) -> usize {
403 geometric_sample(self, ln_1mp)
404 }
405}
406
407#[cfg(feature = "gpu")]
408impl EventRng for crate::sim::compiled::rng::Xoshiro256PlusPlus {
409 #[inline]
410 fn uniform(&mut self) -> f64 {
411 self.next_f64()
412 }
413 #[inline]
414 fn geom(&mut self, ln_1mp: f64) -> usize {
415 geometric_sample_xoshiro(self, ln_1mp)
416 }
417}
418
419#[cfg(feature = "gpu")]
423#[inline]
424fn fill_one_event(
425 x_slot: &mut [u64],
426 z_slot: &mut [u64],
427 probs: [f64; 3],
428 chunk_shots: usize,
429 rng: &mut dyn EventRng,
430) {
431 let [px, py, pz] = probs;
432 let p_event = px + py + pz;
433 if p_event == 0.0 {
434 return;
435 }
436 let pxy = px + py;
437
438 if p_event >= 0.5 || chunk_shots < 32 {
441 for s in 0..chunk_shots {
442 let r = rng.uniform();
443 let slot_idx = s / 64;
444 let bit = 1u64 << (s % 64);
445 if r < px {
446 z_slot[slot_idx] |= bit;
447 } else if r < pxy {
448 z_slot[slot_idx] |= bit;
449 x_slot[slot_idx] |= bit;
450 } else if r < p_event {
451 x_slot[slot_idx] |= bit;
452 }
453 }
454 return;
455 }
456
457 let ln_1mp = (1.0 - p_event).ln();
460 let px_frac = px / p_event;
461 let pxy_frac = pxy / p_event;
462 let mut pos = rng.geom(ln_1mp);
463 while pos < chunk_shots {
464 let r = rng.uniform();
465 let slot_idx = pos / 64;
466 let bit = 1u64 << (pos % 64);
467 if r < px_frac {
468 z_slot[slot_idx] |= bit;
469 } else if r < pxy_frac {
470 z_slot[slot_idx] |= bit;
471 x_slot[slot_idx] |= bit;
472 } else {
473 x_slot[slot_idx] |= bit;
474 }
475 pos += 1 + rng.geom(ln_1mp);
476 }
477}
478
479const NOISE_LUT_K: usize = 8;
480const NOISE_LUT_MIN_EVENTS: usize = 16;
481const NOISE_LUT_TILE: usize = 4096;
482
483fn unpack_and_remap(
484 accum: &[u64],
485 m_words: usize,
486 num_shots: usize,
487 classical_bit_order: &[usize],
488 num_classical: usize,
489) -> Vec<Vec<bool>> {
490 fn unpack_one(src: &[u64], classical_bit_order: &[usize], num_classical: usize) -> Vec<bool> {
491 let mut out = vec![false; num_classical];
492 for (mi, &cbit) in classical_bit_order.iter().enumerate() {
493 if cbit < num_classical {
494 out[cbit] = (src[mi / 64] >> (mi % 64)) & 1 != 0;
495 }
496 }
497 out
498 }
499
500 #[cfg(feature = "parallel")]
501 if num_shots >= 256 {
502 use rayon::prelude::*;
503 return accum
504 .par_chunks(m_words)
505 .map(|src| unpack_one(src, classical_bit_order, num_classical))
506 .collect();
507 }
508
509 #[cfg(not(feature = "parallel"))]
510 let _ = num_shots;
511
512 accum
513 .chunks(m_words)
514 .map(|src| unpack_one(src, classical_bit_order, num_classical))
515 .collect()
516}
517
518fn unpack_and_remap_packed(
519 packed: &PackedShots,
520 num_shots: usize,
521 classical_bit_order: &[usize],
522 num_classical: usize,
523) -> Vec<Vec<bool>> {
524 fn unpack_one_packed(
525 packed: &PackedShots,
526 shot: usize,
527 classical_bit_order: &[usize],
528 num_classical: usize,
529 ) -> Vec<bool> {
530 let mut out = vec![false; num_classical];
531 for (mi, &cbit) in classical_bit_order.iter().enumerate() {
532 if cbit < num_classical {
533 out[cbit] = packed.get_bit(shot, mi);
534 }
535 }
536 out
537 }
538
539 #[cfg(feature = "parallel")]
540 if num_shots >= 256 {
541 use rayon::prelude::*;
542 return (0..num_shots)
543 .into_par_iter()
544 .map(|s| unpack_one_packed(packed, s, classical_bit_order, num_classical))
545 .collect();
546 }
547
548 #[cfg(not(feature = "parallel"))]
549 let _ = num_shots;
550
551 (0..packed.num_shots())
552 .map(|s| unpack_one_packed(packed, s, classical_bit_order, num_classical))
553 .collect()
554}
555
556struct NoiseFlipLut {
557 data: Vec<u64>,
558 m_words: usize,
559 num_full_groups: usize,
560 remainder_size: usize,
561}
562
563impl NoiseFlipLut {
564 fn build_from_flat(flat_data: &[u64], num_rows: usize, m_words: usize) -> Self {
565 let num_full_groups = num_rows / NOISE_LUT_K;
566 let remainder_size = num_rows % NOISE_LUT_K;
567 let total_groups = num_full_groups + usize::from(remainder_size > 0);
568 let entries = 1 << NOISE_LUT_K;
569
570 let mut data = vec![0u64; total_groups * entries * m_words];
571
572 for g in 0..total_groups {
573 let group_start = g * NOISE_LUT_K;
574 let k = if g < num_full_groups {
575 NOISE_LUT_K
576 } else {
577 remainder_size
578 };
579 let lut_off = g * entries * m_words;
580
581 for byte in 1..(1usize << k) {
582 let lowest = byte & byte.wrapping_neg();
583 let row_idx = group_start + lowest.trailing_zeros() as usize;
584 let prev = byte ^ lowest;
585
586 let dst = lut_off + byte * m_words;
587 let src = lut_off + prev * m_words;
588 let row_off = row_idx * m_words;
589
590 for w in 0..m_words {
591 data[dst + w] = data[src + w] ^ flat_data[row_off + w];
592 }
593 }
594 }
595
596 Self {
597 data,
598 m_words,
599 num_full_groups,
600 remainder_size,
601 }
602 }
603
604 #[inline(always)]
605 fn total_groups(&self) -> usize {
606 self.num_full_groups + usize::from(self.remainder_size > 0)
607 }
608
609 #[inline(always)]
610 fn apply_masked(&self, accum: &mut [u64], mask: &[u64]) {
611 for g in 0..self.total_groups() {
612 let byte = ((mask[g / 8] >> ((g % 8) * 8)) & 0xFF) as usize;
613 if byte != 0 {
614 let offset = (g * (1 << NOISE_LUT_K) + byte) * self.m_words;
615 xor_words(accum, &self.data[offset..offset + self.m_words]);
616 }
617 }
618 }
619}
620
621fn build_noise_luts(events: &FlatNoiseSensitivity) -> (Option<NoiseFlipLut>, Option<NoiseFlipLut>) {
622 let ne = events.len();
623 if ne < NOISE_LUT_MIN_EVENTS {
624 return (None, None);
625 }
626
627 let avg_p: f64 = events
628 .probs
629 .iter()
630 .map(|[px, py, pz]| px + py + pz)
631 .sum::<f64>()
632 / ne as f64;
633 if avg_p < 0.05 {
634 return (None, None);
635 }
636
637 let mw = events.m_words;
638 let z_lut = NoiseFlipLut::build_from_flat(&events.z_data, ne, mw);
639 let x_lut = NoiseFlipLut::build_from_flat(&events.x_data, ne, mw);
640 (Some(z_lut), Some(x_lut))
641}
642
643#[cfg(feature = "gpu")]
650fn flatten_event_thresholds_u64(probs: &[[f64; 3]]) -> (Vec<u64>, bool) {
651 const SCALE: f64 = 18446744073709549568.0_f64;
655 const PROB_SUM_SLACK: f64 = 1e-12;
656
657 let mut out = Vec::with_capacity(probs.len() * 3);
658 let mut valid = true;
659 for &[px, py, pz] in probs {
660 let finite = px.is_finite() && py.is_finite() && pz.is_finite();
661 let sum = px + py + pz;
662 if !finite || px < 0.0 || py < 0.0 || pz < 0.0 || sum > 1.0 + PROB_SUM_SLACK {
663 valid = false;
664 }
665 let to_u64 = |p: f64| -> u64 {
666 if !p.is_finite() || p <= 0.0 {
667 0
668 } else if p >= 1.0 {
669 u64::MAX
670 } else {
671 (p * SCALE) as u64
672 }
673 };
674 out.push(to_u64(px));
675 out.push(to_u64(px + py));
676 out.push(to_u64(sum));
677 }
678 (out, valid)
679}
680
681#[cfg(feature = "gpu")]
688fn transpose_events_to_row_major(
689 events: &FlatNoiseSensitivity,
690 num_meas: usize,
691) -> (Vec<u32>, Vec<u32>) {
692 let num_events = events.len();
693 let m_words = events.m_words;
694
695 let mut row_counts = vec![0u32; num_meas];
697 for e in 0..num_events {
698 let x = &events.x_data[e * m_words..(e + 1) * m_words];
699 let z = &events.z_data[e * m_words..(e + 1) * m_words];
700 for w in 0..m_words {
701 let union = x[w] | z[w];
702 let mut bits = union;
703 while bits != 0 {
704 let row = w * 64 + bits.trailing_zeros() as usize;
705 if row < num_meas {
706 row_counts[row] += 1;
707 }
708 bits &= bits - 1;
709 }
710 }
711 }
712
713 let mut offsets = Vec::with_capacity(num_meas + 1);
714 offsets.push(0u32);
715 let mut running = 0u32;
716 for c in &row_counts {
717 running += c;
718 offsets.push(running);
719 }
720 let total = running as usize;
721
722 let mut entries = vec![0u32; total];
726 let mut cursor: Vec<u32> = offsets[..num_meas].to_vec();
727 for e in 0..num_events {
728 let x = &events.x_data[e * m_words..(e + 1) * m_words];
729 let z = &events.z_data[e * m_words..(e + 1) * m_words];
730 for w in 0..m_words {
731 let union = x[w] | z[w];
732 let mut bits = union;
733 while bits != 0 {
734 let b = bits.trailing_zeros() as usize;
735 bits &= bits - 1;
736 let row = w * 64 + b;
737 if row >= num_meas {
738 continue;
739 }
740 let bit_mask = 1u64 << b;
741 let flag: u32 = (if x[w] & bit_mask != 0 { 1 } else { 0 })
742 | (if z[w] & bit_mask != 0 { 2 } else { 0 });
743 let slot = cursor[row] as usize;
744 entries[slot] = ((e as u32) << 2) | flag;
745 cursor[row] += 1;
746 }
747 }
748 }
749
750 (offsets, entries)
751}
752
753#[cfg(feature = "gpu")]
754fn flatten_event_rows_to_csr(
755 flat_data: &[u64],
756 num_rows: usize,
757 m_words: usize,
758) -> (Vec<u32>, Vec<u32>) {
759 let mut row_offsets = Vec::with_capacity(num_rows + 1);
760 let mut row_indices = Vec::new();
761
762 for row in 0..num_rows {
763 row_offsets.push(row_indices.len() as u32);
764 let base = row * m_words;
765 for word in 0..m_words {
766 let mut bits = flat_data[base + word];
767 while bits != 0 {
768 let bit = bits.trailing_zeros() as usize;
769 row_indices.push((word * 64 + bit) as u32);
770 bits &= bits - 1;
771 }
772 }
773 }
774 row_offsets.push(row_indices.len() as u32);
775
776 (row_offsets, row_indices)
777}
778
779#[cfg(feature = "gpu")]
780struct GpuNoiseCache {
781 context: std::sync::Arc<crate::gpu::GpuContext>,
782 x_row_offsets_dev: crate::gpu::GpuBuffer<u32>,
783 x_row_indices_dev: crate::gpu::GpuBuffer<u32>,
784 z_row_offsets_dev: crate::gpu::GpuBuffer<u32>,
785 z_row_indices_dev: crate::gpu::GpuBuffer<u32>,
786 x_masks_dev: crate::gpu::GpuBuffer<u64>,
787 z_masks_dev: crate::gpu::GpuBuffer<u64>,
788 x_masks_host: Vec<u64>,
789 z_masks_host: Vec<u64>,
790 event_thresholds_dev: crate::gpu::GpuBuffer<u64>,
795 event_thresholds_valid: bool,
799 row_event_offsets_dev: crate::gpu::GpuBuffer<u32>,
805 row_event_entries_dev: crate::gpu::GpuBuffer<u32>,
806}
807
808#[cfg(feature = "gpu")]
809impl GpuNoiseCache {
810 fn new(
811 context: std::sync::Arc<crate::gpu::GpuContext>,
812 events: &FlatNoiseSensitivity,
813 ) -> Result<Self> {
814 let device = context.device();
815 let num_events = events.len();
816 let (x_row_offsets, x_row_indices) =
817 flatten_event_rows_to_csr(&events.x_data, num_events, events.m_words);
818 let (z_row_offsets, z_row_indices) =
819 flatten_event_rows_to_csr(&events.z_data, num_events, events.m_words);
820
821 let mut x_row_offsets_dev =
822 crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, x_row_offsets.len().max(1))?;
823 let mut x_row_indices_dev =
824 crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, x_row_indices.len().max(1))?;
825 let mut z_row_offsets_dev =
826 crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, z_row_offsets.len().max(1))?;
827 let mut z_row_indices_dev =
828 crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, z_row_indices.len().max(1))?;
829
830 if !x_row_offsets.is_empty() {
831 x_row_offsets_dev.copy_from_host(device, &x_row_offsets)?;
832 }
833 if !x_row_indices.is_empty() {
834 x_row_indices_dev.copy_from_host(device, &x_row_indices)?;
835 }
836 if !z_row_offsets.is_empty() {
837 z_row_offsets_dev.copy_from_host(device, &z_row_offsets)?;
838 }
839 if !z_row_indices.is_empty() {
840 z_row_indices_dev.copy_from_host(device, &z_row_indices)?;
841 }
842
843 let (thresholds_host, thresholds_valid) = flatten_event_thresholds_u64(&events.probs);
844 let mut event_thresholds_dev =
845 crate::gpu::GpuBuffer::<u64>::alloc_zeros(device, thresholds_host.len().max(1))?;
846 if !thresholds_host.is_empty() {
847 event_thresholds_dev.copy_from_host(device, &thresholds_host)?;
848 }
849
850 let (row_event_offsets_host, row_event_entries_host) =
853 transpose_events_to_row_major(events, events.m_words * 64);
854 let mut row_event_offsets_dev =
855 crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, row_event_offsets_host.len().max(1))?;
856 let mut row_event_entries_dev =
857 crate::gpu::GpuBuffer::<u32>::alloc_zeros(device, row_event_entries_host.len().max(1))?;
858 if !row_event_offsets_host.is_empty() {
859 row_event_offsets_dev.copy_from_host(device, &row_event_offsets_host)?;
860 }
861 if !row_event_entries_host.is_empty() {
862 row_event_entries_dev.copy_from_host(device, &row_event_entries_host)?;
863 }
864
865 Ok(Self {
866 context: context.clone(),
867 x_row_offsets_dev,
868 x_row_indices_dev,
869 z_row_offsets_dev,
870 z_row_indices_dev,
871 x_masks_dev: crate::gpu::GpuBuffer::<u64>::alloc_zeros(device, 1)?,
872 z_masks_dev: crate::gpu::GpuBuffer::<u64>::alloc_zeros(device, 1)?,
873 x_masks_host: Vec::new(),
874 z_masks_host: Vec::new(),
875 event_thresholds_dev,
876 event_thresholds_valid: thresholds_valid,
877 row_event_offsets_dev,
878 row_event_entries_dev,
879 })
880 }
881
882 fn ensure_mask_capacity(&mut self, len: usize) -> Result<()> {
883 let needed = len.max(1);
884 let device = self.context.device();
885 if self.x_masks_dev.len() < needed {
886 self.x_masks_dev = crate::gpu::GpuBuffer::<u64>::alloc_zeros(device, needed)?;
887 }
888 if self.z_masks_dev.len() < needed {
889 self.z_masks_dev = crate::gpu::GpuBuffer::<u64>::alloc_zeros(device, needed)?;
890 }
891 if self.x_masks_host.len() < needed {
892 self.x_masks_host.resize(needed, 0);
893 }
894 if self.z_masks_host.len() < needed {
895 self.z_masks_host.resize(needed, 0);
896 }
897 Ok(())
898 }
899}
900
901pub struct NoisyCompiledSampler {
912 noiseless: crate::sim::compiled::CompiledSampler,
913 events: FlatNoiseSensitivity,
914 num_measurements: usize,
915 rng: ChaCha8Rng,
916 z_lut: Option<NoiseFlipLut>,
917 x_lut: Option<NoiseFlipLut>,
918 #[cfg(feature = "gpu")]
919 gpu_noise_cache: Option<GpuNoiseCache>,
920}
921
922impl NoisyCompiledSampler {
923 #[cfg(feature = "gpu")]
931 pub fn with_gpu(mut self, context: std::sync::Arc<crate::gpu::GpuContext>) -> Self {
932 self.noiseless = self.noiseless.with_gpu(context);
933 self.gpu_noise_cache = None;
934 self
935 }
936
937 fn sample_bulk_words_shot_major_cpu(&mut self, num_shots: usize) -> (Vec<u64>, usize) {
938 let (mut accum, m_words) = self.noiseless.sample_bulk_words_shot_major(num_shots);
939 self.apply_noise_bulk(&mut accum, num_shots, m_words);
940
941 let ref_bits_packed = self.noiseless.ref_bits_packed();
942
943 #[cfg(feature = "parallel")]
944 if num_shots >= 256 {
945 use rayon::prelude::*;
946 accum
947 .par_chunks_mut(m_words)
948 .for_each(|shot| xor_words(shot, ref_bits_packed));
949 } else {
950 for s in 0..num_shots {
951 let shot_base = s * m_words;
952 xor_words(&mut accum[shot_base..shot_base + m_words], ref_bits_packed);
953 }
954 }
955
956 #[cfg(not(feature = "parallel"))]
957 for s in 0..num_shots {
958 let shot_base = s * m_words;
959 xor_words(&mut accum[shot_base..shot_base + m_words], ref_bits_packed);
960 }
961
962 (accum, m_words)
963 }
964
965 #[cfg(feature = "gpu")]
966 fn try_sample_bulk_packed_with_gpu_context(&mut self, num_shots: usize) -> Result<PackedShots> {
967 let m_words = self.num_measurements.div_ceil(64);
968 if num_shots == 0 || self.num_measurements == 0 {
969 return Ok(PackedShots::from_shot_major(
970 vec![0u64; num_shots * m_words],
971 num_shots,
972 self.num_measurements,
973 ));
974 }
975
976 let mut accum = match self.noiseless.try_sample_bulk_shot_major_gpu(num_shots) {
982 Some(Ok(data)) => data,
983 Some(Err(e)) => return Err(e),
984 None => {
985 let (accum, _m_words) = self.sample_bulk_words_shot_major_cpu(num_shots);
986 return Ok(PackedShots::from_shot_major(
987 accum,
988 num_shots,
989 self.num_measurements,
990 ));
991 }
992 };
993 self.apply_noise_bulk(&mut accum, num_shots, m_words);
994 Ok(PackedShots::from_shot_major(
995 accum,
996 num_shots,
997 self.num_measurements,
998 ))
999 }
1000
1001 #[cfg(feature = "gpu")]
1002 fn take_gpu_noise_cache(&mut self) -> Result<GpuNoiseCache> {
1003 if let Some(cache) = self.gpu_noise_cache.take() {
1004 return Ok(cache);
1005 }
1006 let context = self
1007 .noiseless
1008 .gpu_context()
1009 .expect("gpu noise cache requested without gpu_context");
1010 GpuNoiseCache::new(context, &self.events)
1011 }
1012
1013 #[cfg(feature = "gpu")]
1014 fn fill_noise_masks_event_major(
1015 events: &FlatNoiseSensitivity,
1016 rng: &mut ChaCha8Rng,
1017 x_masks: &mut [u64],
1018 z_masks: &mut [u64],
1019 chunk_shots: usize,
1020 chunk_s_words: usize,
1021 ) {
1022 x_masks.fill(0);
1023 z_masks.fill(0);
1024
1025 #[cfg(feature = "parallel")]
1031 {
1032 use rand::RngCore;
1033 use rayon::prelude::*;
1034 const MIN_PAR_EVENTS: usize = 64;
1039 const MIN_PAR_SLOTS: usize = 32_768;
1040 let work_slots = events.len().saturating_mul(chunk_s_words);
1041 if events.len() >= MIN_PAR_EVENTS
1042 && work_slots >= MIN_PAR_SLOTS
1043 && x_masks.len() == events.len() * chunk_s_words
1044 && z_masks.len() == events.len() * chunk_s_words
1045 {
1046 let thread_seeds: Vec<[u64; 4]> = (0..events.len())
1047 .map(|_| {
1048 [
1049 rng.next_u64(),
1050 rng.next_u64(),
1051 rng.next_u64(),
1052 rng.next_u64(),
1053 ]
1054 })
1055 .collect();
1056
1057 x_masks
1058 .par_chunks_mut(chunk_s_words)
1059 .zip(z_masks.par_chunks_mut(chunk_s_words))
1060 .zip(thread_seeds.par_iter())
1061 .enumerate()
1062 .for_each(|(i, ((x_slot, z_slot), seed))| {
1063 let mut trng =
1064 crate::sim::compiled::rng::Xoshiro256PlusPlus::from_seeds(*seed);
1065 fill_one_event(x_slot, z_slot, events.probs[i], chunk_shots, &mut trng);
1066 });
1067 return;
1068 }
1069 }
1070
1071 for (i, &probs) in events.probs.iter().enumerate() {
1072 let base = i * chunk_s_words;
1073 let end = base + chunk_s_words;
1074 fill_one_event(
1075 &mut x_masks[base..end],
1076 &mut z_masks[base..end],
1077 probs,
1078 chunk_shots,
1079 rng,
1080 );
1081 }
1082 }
1083
1084 #[cfg(feature = "gpu")]
1085 fn apply_noise_device_meas_major(
1086 &mut self,
1087 packed: &mut crate::sim::compiled::DevicePackedShots,
1088 ) -> Result<()> {
1089 if self.events.is_empty() || packed.num_shots() == 0 || self.num_measurements == 0 {
1090 return Ok(());
1091 }
1092
1093 let num_events = self.events.len();
1094 let mut cache = self.take_gpu_noise_cache()?;
1095 let total_s_words = packed.s_words();
1096 let context = packed.context().clone();
1097 let num_shots = packed.num_shots();
1098
1099 let use_fused = cache.event_thresholds_valid;
1106 let master_seed: u64 = if use_fused {
1107 rand::RngCore::next_u64(&mut self.rng)
1108 } else {
1109 0
1110 };
1111
1112 let mut shot_start = 0;
1113 while shot_start < num_shots {
1114 let chunk_shots = (shot_start + NOISE_LUT_TILE).min(num_shots) - shot_start;
1115 let chunk_s_words = chunk_shots.div_ceil(64);
1116 let word_offset = shot_start / 64;
1117
1118 if use_fused {
1119 crate::gpu::kernels::bts::generate_and_apply_noise_masks_meas_major_by_row(
1120 &context,
1121 crate::gpu::kernels::bts::NoiseDeviceGenApplyByRow {
1122 meas_major: packed.data_mut(),
1123 num_meas: self.num_measurements,
1124 s_words: total_s_words,
1125 word_offset,
1126 chunk_s_words,
1127 row_event_offsets: &cache.row_event_offsets_dev,
1128 row_event_entries: &cache.row_event_entries_dev,
1129 event_thresholds: &cache.event_thresholds_dev,
1130 master_seed,
1131 batch_offset: word_offset as u64,
1132 },
1133 )?;
1134 } else {
1135 let mask_len = num_events * chunk_s_words;
1136 cache.ensure_mask_capacity(mask_len)?;
1137 Self::fill_noise_masks_event_major(
1138 &self.events,
1139 &mut self.rng,
1140 &mut cache.x_masks_host[..mask_len],
1141 &mut cache.z_masks_host[..mask_len],
1142 chunk_shots,
1143 chunk_s_words,
1144 );
1145 if mask_len > 0 {
1146 cache
1147 .x_masks_dev
1148 .copy_from_host(cache.context.device(), &cache.x_masks_host[..mask_len])?;
1149 cache
1150 .z_masks_dev
1151 .copy_from_host(cache.context.device(), &cache.z_masks_host[..mask_len])?;
1152 let base = crate::gpu::kernels::bts::NoiseApplyBase {
1153 meas_major: packed.data_mut(),
1154 num_meas: self.num_measurements,
1155 s_words: total_s_words,
1156 word_offset,
1157 chunk_s_words,
1158 num_events,
1159 x_row_offsets: &cache.x_row_offsets_dev,
1160 x_row_indices: &cache.x_row_indices_dev,
1161 z_row_offsets: &cache.z_row_offsets_dev,
1162 z_row_indices: &cache.z_row_indices_dev,
1163 };
1164 crate::gpu::kernels::bts::apply_noise_masks_meas_major(
1165 &context,
1166 crate::gpu::kernels::bts::NoiseMaskApply {
1167 base,
1168 x_masks: &cache.x_masks_dev,
1169 z_masks: &cache.z_masks_dev,
1170 },
1171 )?;
1172 }
1173 }
1174
1175 shot_start += chunk_shots;
1176 }
1177
1178 self.gpu_noise_cache = Some(cache);
1179 Ok(())
1180 }
1181
1182 #[cfg(feature = "gpu")]
1183 fn try_sample_marginals_gpu(&mut self, total_shots: usize) -> Option<Result<Vec<f64>>> {
1184 if !self.noiseless.should_use_gpu_bts(total_shots) {
1185 return None;
1186 }
1187
1188 let mut packed = match self.noiseless.sample_bulk_packed_device(total_shots) {
1189 Ok(packed) => packed,
1190 Err(e) => return Some(Err(e)),
1191 };
1192 if let Err(e) = self.apply_noise_device_meas_major(&mut packed) {
1193 return Some(Err(e));
1194 }
1195 Some(packed.marginals())
1196 }
1197
1198 #[cfg(feature = "gpu")]
1199 fn try_sample_counts_gpu(
1200 &mut self,
1201 total_shots: usize,
1202 ) -> Option<Result<std::collections::HashMap<Vec<u64>, u64>>> {
1203 if !self.noiseless.should_use_gpu_bts(total_shots) {
1204 return None;
1205 }
1206
1207 let mut packed = match self.noiseless.sample_bulk_packed_device(total_shots) {
1208 Ok(packed) => packed,
1209 Err(e) => return Some(Err(e)),
1210 };
1211 if let Err(e) = self.apply_noise_device_meas_major(&mut packed) {
1212 return Some(Err(e));
1213 }
1214 Some(packed.counts_with_rank_hint(self.num_measurements))
1215 }
1216
1217 pub fn sample_bulk_packed(&mut self, num_shots: usize) -> PackedShots {
1224 match self.try_sample_bulk_packed(num_shots) {
1225 Ok(packed) => packed,
1226 Err(_) => self.sample_bulk_packed_cpu(num_shots),
1227 }
1228 }
1229
1230 pub fn try_sample_bulk_packed(&mut self, num_shots: usize) -> Result<PackedShots> {
1231 #[cfg(feature = "gpu")]
1232 if self.noiseless.has_gpu_context() {
1233 return self.try_sample_bulk_packed_with_gpu_context(num_shots);
1234 }
1235
1236 Ok(self.sample_bulk_packed_cpu(num_shots))
1237 }
1238
1239 fn sample_bulk_packed_cpu(&mut self, num_shots: usize) -> PackedShots {
1240 let m_words = self.num_measurements.div_ceil(64);
1241 if num_shots == 0 || self.num_measurements == 0 {
1242 return PackedShots::from_shot_major(
1243 vec![0u64; num_shots * m_words],
1244 num_shots,
1245 self.num_measurements,
1246 );
1247 }
1248
1249 let (accum, _m_words) = self.sample_bulk_words_shot_major_cpu(num_shots);
1250 PackedShots::from_shot_major(accum, num_shots, self.num_measurements)
1251 }
1252
1253 pub fn sample_chunked<A: crate::sim::compiled::ShotAccumulator>(
1258 &mut self,
1259 total_shots: usize,
1260 acc: &mut A,
1261 ) {
1262 let chunk_size = crate::sim::compiled::default_chunk_size(self.num_measurements);
1263 self.sample_chunked_with_size(total_shots, chunk_size, acc);
1264 }
1265
1266 pub fn sample_chunked_with_size<A: crate::sim::compiled::ShotAccumulator>(
1271 &mut self,
1272 total_shots: usize,
1273 chunk_size: usize,
1274 acc: &mut A,
1275 ) {
1276 #[cfg(feature = "gpu")]
1277 if self.noiseless.has_gpu_context()
1278 && self
1279 .noiseless
1280 .should_use_gpu_bts(total_shots.min(chunk_size.max(1)))
1281 {
1282 let mut remaining = total_shots;
1283 while remaining > 0 {
1284 let this_batch = remaining.min(chunk_size);
1285 let packed = match self.try_sample_bulk_packed_with_gpu_context(this_batch) {
1286 Ok(packed) => packed,
1287 Err(_) => self.sample_bulk_packed_cpu(this_batch),
1288 };
1289 acc.accumulate(&packed);
1290 remaining -= this_batch;
1291 }
1292 return;
1293 }
1294
1295 let m_words = self.num_measurements.div_ceil(64);
1296 let mut accum_buf: Vec<u64> = Vec::new();
1297 let mut rand_buf: Vec<u8> = Vec::new();
1298 let ref_bits_packed = self.noiseless.ref_bits_packed().to_vec();
1299 let mut remaining = total_shots;
1300 while remaining > 0 {
1301 let this_batch = remaining.min(chunk_size);
1302 self.noiseless.sample_bulk_words_shot_major_reuse(
1303 &mut accum_buf,
1304 &mut rand_buf,
1305 this_batch,
1306 );
1307
1308 self.apply_noise_bulk(&mut accum_buf, this_batch, m_words);
1309
1310 #[cfg(feature = "parallel")]
1311 if this_batch >= 256 {
1312 use rayon::prelude::*;
1313 accum_buf[..this_batch * m_words]
1314 .par_chunks_mut(m_words)
1315 .for_each(|shot| xor_words(shot, &ref_bits_packed));
1316 } else {
1317 for s in 0..this_batch {
1318 let shot_base = s * m_words;
1319 xor_words(
1320 &mut accum_buf[shot_base..shot_base + m_words],
1321 &ref_bits_packed,
1322 );
1323 }
1324 }
1325
1326 #[cfg(not(feature = "parallel"))]
1327 for s in 0..this_batch {
1328 let shot_base = s * m_words;
1329 xor_words(
1330 &mut accum_buf[shot_base..shot_base + m_words],
1331 &ref_bits_packed,
1332 );
1333 }
1334
1335 let data = std::mem::take(&mut accum_buf);
1336 let packed = PackedShots::from_shot_major(data, this_batch, self.num_measurements);
1337 acc.accumulate(&packed);
1338 accum_buf = packed.into_data();
1339 remaining -= this_batch;
1340 }
1341 }
1342
1343 pub fn sample_counts(
1345 &mut self,
1346 total_shots: usize,
1347 ) -> std::collections::HashMap<Vec<u64>, u64> {
1348 match self.try_sample_counts(total_shots) {
1349 Ok(counts) => counts,
1350 Err(_) => self.sample_counts_cpu(total_shots),
1351 }
1352 }
1353
1354 pub fn try_sample_counts(
1355 &mut self,
1356 total_shots: usize,
1357 ) -> Result<std::collections::HashMap<Vec<u64>, u64>> {
1358 #[cfg(feature = "gpu")]
1359 if let Some(counts) = self.try_sample_counts_gpu(total_shots) {
1360 return counts;
1361 }
1362
1363 Ok(self.sample_counts_cpu(total_shots))
1364 }
1365
1366 fn sample_counts_cpu(
1367 &mut self,
1368 total_shots: usize,
1369 ) -> std::collections::HashMap<Vec<u64>, u64> {
1370 let mut acc = crate::sim::compiled::HistogramAccumulator::new();
1371 self.sample_chunked(total_shots, &mut acc);
1372 acc.into_counts()
1373 }
1374
1375 pub fn sample_marginals(&mut self, total_shots: usize) -> Vec<f64> {
1377 match self.try_sample_marginals(total_shots) {
1378 Ok(marginals) => marginals,
1379 Err(_) => self.sample_marginals_cpu(total_shots),
1380 }
1381 }
1382
1383 pub fn try_sample_marginals(&mut self, total_shots: usize) -> Result<Vec<f64>> {
1384 #[cfg(feature = "gpu")]
1385 if let Some(marginals) = self.try_sample_marginals_gpu(total_shots) {
1386 return marginals;
1387 }
1388
1389 Ok(self.sample_marginals_cpu(total_shots))
1390 }
1391
1392 fn sample_marginals_cpu(&mut self, total_shots: usize) -> Vec<f64> {
1393 let mut acc = crate::sim::compiled::MarginalsAccumulator::new(self.num_measurements);
1394 self.sample_chunked(total_shots, &mut acc);
1395 acc.marginals()
1396 }
1397
1398 #[cfg(test)]
1399 fn sample_bulk(&mut self, num_shots: usize) -> Vec<Vec<bool>> {
1400 self.sample_bulk_packed(num_shots).to_shots()
1401 }
1402
1403 fn apply_noise_bulk(&mut self, accum: &mut [u64], num_shots: usize, m_words: usize) {
1404 if self.events.is_empty() {
1405 return;
1406 }
1407
1408 if self.z_lut.is_some() {
1409 self.apply_noise_bulk_grouped(accum, num_shots, m_words);
1410 } else {
1411 self.apply_noise_bulk_scalar(accum, num_shots, m_words);
1412 }
1413 }
1414
1415 fn apply_noise_bulk_scalar(&mut self, accum: &mut [u64], num_shots: usize, m_words: usize) {
1416 #[cfg(feature = "parallel")]
1417 if num_shots >= 4096 {
1418 self.apply_noise_bulk_scalar_par(accum, num_shots, m_words);
1419 return;
1420 }
1421
1422 self.apply_noise_bulk_scalar_seq(accum, num_shots, m_words);
1423 }
1424
1425 fn apply_noise_bulk_scalar_seq(&mut self, accum: &mut [u64], num_shots: usize, m_words: usize) {
1426 Self::apply_noise_range(&self.events, accum, 0, num_shots, m_words, &mut self.rng);
1427 }
1428
1429 #[cfg(feature = "parallel")]
1430 fn apply_noise_bulk_scalar_par(&mut self, accum: &mut [u64], num_shots: usize, m_words: usize) {
1431 use rayon::prelude::*;
1432
1433 let num_threads = rayon::current_num_threads().max(1);
1434 let shots_per_thread = (num_shots.div_ceil(num_threads) + 63) & !63;
1435
1436 let seeds: Vec<u64> = (0..num_threads)
1437 .map(|_| rand::Rng::random(&mut self.rng))
1438 .collect();
1439
1440 let events = &self.events;
1441
1442 accum
1443 .par_chunks_mut(shots_per_thread * m_words)
1444 .enumerate()
1445 .for_each(|(tid, chunk)| {
1446 let chunk_shots = chunk.len() / m_words;
1447 if chunk_shots == 0 {
1448 return;
1449 }
1450 let mut rng = ChaCha8Rng::seed_from_u64(seeds[tid]);
1451 Self::apply_noise_range(events, chunk, 0, chunk_shots, m_words, &mut rng);
1452 });
1453 }
1454
1455 fn apply_noise_range(
1456 events: &FlatNoiseSensitivity,
1457 accum: &mut [u64],
1458 start: usize,
1459 end: usize,
1460 m_words: usize,
1461 rng: &mut ChaCha8Rng,
1462 ) {
1463 let ne = events.len();
1464 let num_shots = end - start;
1465
1466 for i in 0..ne {
1467 let [px, py, pz] = events.probs[i];
1468 let p_event = px + py + pz;
1469 if p_event == 0.0 {
1470 continue;
1471 }
1472
1473 if p_event >= 0.5 || num_shots < 32 {
1474 for s in start..end {
1475 let r: f64 = rand::Rng::random(rng);
1476 if r < px {
1477 let b = s * m_words;
1478 xor_words(&mut accum[b..b + m_words], events.z_flip(i));
1479 } else if r < px + py {
1480 let b = s * m_words;
1481 xor_words(&mut accum[b..b + m_words], events.x_flip(i));
1482 xor_words(&mut accum[b..b + m_words], events.z_flip(i));
1483 } else if r < p_event {
1484 let b = s * m_words;
1485 xor_words(&mut accum[b..b + m_words], events.x_flip(i));
1486 }
1487 }
1488 } else {
1489 let ln_1mp = (1.0 - p_event).ln();
1490 let px_frac = px / p_event;
1491 let pxy_frac = (px + py) / p_event;
1492
1493 let mut pos = start + geometric_sample(rng, ln_1mp);
1494 while pos < end {
1495 let r: f64 = rand::Rng::random(rng);
1496 let b = pos * m_words;
1497 if r < px_frac {
1498 xor_words(&mut accum[b..b + m_words], events.z_flip(i));
1499 } else if r < pxy_frac {
1500 xor_words(&mut accum[b..b + m_words], events.x_flip(i));
1501 xor_words(&mut accum[b..b + m_words], events.z_flip(i));
1502 } else {
1503 xor_words(&mut accum[b..b + m_words], events.x_flip(i));
1504 }
1505 pos += 1 + geometric_sample(rng, ln_1mp);
1506 }
1507 }
1508 }
1509 }
1510
1511 fn apply_noise_bulk_grouped(&mut self, accum: &mut [u64], num_shots: usize, m_words: usize) {
1512 let num_events = self.events.len();
1513 let e_words = num_events.div_ceil(64);
1514
1515 for tile_start in (0..num_shots).step_by(NOISE_LUT_TILE) {
1516 let tile_end = (tile_start + NOISE_LUT_TILE).min(num_shots);
1517 let tile_n = tile_end - tile_start;
1518
1519 let mut z_mask = vec![0u64; tile_n * e_words];
1520 let mut x_mask = vec![0u64; tile_n * e_words];
1521
1522 for i in 0..num_events {
1523 let [px, py, pz] = self.events.probs[i];
1524 let p_event = px + py + pz;
1525 if p_event == 0.0 {
1526 continue;
1527 }
1528
1529 let ew = i / 64;
1530 let eb = 1u64 << (i % 64);
1531
1532 if p_event >= 0.5 || tile_n < 32 {
1533 for s in 0..tile_n {
1534 let r: f64 = rand::Rng::random(&mut self.rng);
1535 if r < px {
1536 z_mask[s * e_words + ew] |= eb;
1537 } else if r < px + py {
1538 z_mask[s * e_words + ew] |= eb;
1539 x_mask[s * e_words + ew] |= eb;
1540 } else if r < p_event {
1541 x_mask[s * e_words + ew] |= eb;
1542 }
1543 }
1544 } else {
1545 let ln_1mp = (1.0 - p_event).ln();
1546 let px_frac = px / p_event;
1547 let pxy_frac = (px + py) / p_event;
1548
1549 let mut pos = geometric_sample(&mut self.rng, ln_1mp);
1550 while pos < tile_n {
1551 let r: f64 = rand::Rng::random(&mut self.rng);
1552 if r < px_frac {
1553 z_mask[pos * e_words + ew] |= eb;
1554 } else if r < pxy_frac {
1555 z_mask[pos * e_words + ew] |= eb;
1556 x_mask[pos * e_words + ew] |= eb;
1557 } else {
1558 x_mask[pos * e_words + ew] |= eb;
1559 }
1560 pos += 1 + geometric_sample(&mut self.rng, ln_1mp);
1561 }
1562 }
1563 }
1564
1565 let z_lut = self.z_lut.as_ref().unwrap();
1566 let x_lut = self.x_lut.as_ref().unwrap();
1567
1568 for s in 0..tile_n {
1569 let shot_base = (tile_start + s) * m_words;
1570 let mask_base = s * e_words;
1571 let shot_accum = &mut accum[shot_base..shot_base + m_words];
1572
1573 z_lut.apply_masked(shot_accum, &z_mask[mask_base..mask_base + e_words]);
1574 x_lut.apply_masked(shot_accum, &x_mask[mask_base..mask_base + e_words]);
1575 }
1576 }
1577 }
1578}
1579
1580pub fn compile_noisy(
1585 circuit: &Circuit,
1586 noise: &NoiseModel,
1587 seed: u64,
1588) -> Result<NoisyCompiledSampler> {
1589 noise.ensure_pauli_only()?;
1590 if circuit.num_qubits >= 4 {
1591 let blocks = circuit.independent_subsystems();
1592 if blocks.len() > 1 {
1593 let max_block = blocks.iter().map(|b| b.len()).max().unwrap_or(0);
1594 if max_block < circuit.num_qubits {
1595 return compile_noisy_filtered(circuit, noise, &blocks, seed);
1596 }
1597 }
1598 }
1599
1600 compile_noisy_monolithic(circuit, noise, seed)
1601}
1602
1603fn compile_noisy_filtered(
1604 circuit: &Circuit,
1605 noise: &NoiseModel,
1606 blocks: &[Vec<usize>],
1607 seed: u64,
1608) -> Result<NoisyCompiledSampler> {
1609 let noiseless = compile_measurements(circuit, seed)?;
1610
1611 let measurement_qubits: Vec<usize> = circuit
1612 .instructions
1613 .iter()
1614 .filter_map(|inst| match inst {
1615 Instruction::Measure { qubit, .. } => Some(*qubit),
1616 _ => None,
1617 })
1618 .collect();
1619 let num_measurements = measurement_qubits.len();
1620 let m_words = num_measurements.div_ceil(64);
1621
1622 if num_measurements == 0 {
1623 return Ok(NoisyCompiledSampler {
1624 noiseless,
1625 events: FlatNoiseSensitivity::new(1, 0),
1626
1627 num_measurements: 0,
1628 rng: ChaCha8Rng::seed_from_u64(seed.wrapping_add(0xA01CE)),
1629 z_lut: None,
1630 x_lut: None,
1631 #[cfg(feature = "gpu")]
1632 gpu_noise_cache: None,
1633 });
1634 }
1635
1636 let mut qubit_to_block = vec![0usize; circuit.num_qubits];
1637 let mut qubit_to_local = vec![0usize; circuit.num_qubits];
1638 for (bi, block) in blocks.iter().enumerate() {
1639 for (li, &q) in block.iter().enumerate() {
1640 qubit_to_block[q] = bi;
1641 qubit_to_local[q] = li;
1642 }
1643 }
1644
1645 let mut block_meas: Vec<Vec<usize>> = vec![Vec::new(); blocks.len()];
1646 for (mi, &q) in measurement_qubits.iter().enumerate() {
1647 block_meas[qubit_to_block[q]].push(mi);
1648 }
1649
1650 let total_noise_events: usize = noise.after_gate.iter().map(|ops| ops.len()).sum();
1651 let mut events = FlatNoiseSensitivity::new(m_words, total_noise_events);
1652 let mut global_x_buf = vec![0u64; m_words];
1653 let mut global_z_buf = vec![0u64; m_words];
1654
1655 for (bi, block) in blocks.iter().enumerate() {
1656 let bm_list = &block_meas[bi];
1657 if bm_list.is_empty() {
1658 continue;
1659 }
1660
1661 let bn = block.len();
1662 let bm = bm_list.len();
1663 let bm_words = bm.div_ceil(64);
1664
1665 let mut x_packed: Vec<Vec<u64>> = vec![vec![0u64; bm_words]; bn];
1666 let mut z_packed: Vec<Vec<u64>> = vec![vec![0u64; bm_words]; bn];
1667 let mut sign_packed: Vec<u64> = vec![0u64; bm_words];
1668
1669 for (local_mi, &global_mi) in bm_list.iter().enumerate() {
1670 let q = measurement_qubits[global_mi];
1671 let local_q = qubit_to_local[q];
1672 z_packed[local_q][local_mi / 64] |= 1u64 << (local_mi % 64);
1673 }
1674
1675 let block_gates: Vec<(usize, &Gate, SmallVec<[usize; 4]>)> = circuit
1676 .instructions
1677 .iter()
1678 .enumerate()
1679 .filter_map(|(idx, inst)| match inst {
1680 Instruction::Gate { gate, targets } => {
1681 if targets.iter().all(|&t| qubit_to_block[t] == bi) {
1682 let local_targets: SmallVec<[usize; 4]> =
1683 targets.iter().map(|&t| qubit_to_local[t]).collect();
1684 Some((idx, gate, local_targets))
1685 } else {
1686 None
1687 }
1688 }
1689 Instruction::Conditional { gate, targets, .. } => {
1690 if targets.iter().all(|&t| qubit_to_block[t] == bi) {
1691 let local_targets: SmallVec<[usize; 4]> =
1692 targets.iter().map(|&t| qubit_to_local[t]).collect();
1693 Some((idx, gate, local_targets))
1694 } else {
1695 None
1696 }
1697 }
1698 _ => None,
1699 })
1700 .collect();
1701
1702 for (instr_idx, gate, local_targets) in block_gates.iter().rev() {
1703 let noise_ops = &noise.after_gate[*instr_idx];
1704 if !noise_ops.is_empty() {
1705 for event in noise_ops {
1706 let (px, py, pz) = event.pauli_probs();
1707 let local_q = qubit_to_local[event.qubit()];
1708
1709 let has_any = x_packed[local_q].iter().any(|&w| w != 0)
1710 || z_packed[local_q].iter().any(|&w| w != 0);
1711 if has_any {
1712 global_x_buf.fill(0);
1713 global_z_buf.fill(0);
1714 for (local_mi, &global_mi) in bm_list.iter().enumerate() {
1715 if (x_packed[local_q][local_mi / 64] >> (local_mi % 64)) & 1 != 0 {
1716 global_x_buf[global_mi / 64] |= 1u64 << (global_mi % 64);
1717 }
1718 if (z_packed[local_q][local_mi / 64] >> (local_mi % 64)) & 1 != 0 {
1719 global_z_buf[global_mi / 64] |= 1u64 << (global_mi % 64);
1720 }
1721 }
1722 events.push(&global_x_buf, &global_z_buf, px, py, pz);
1723 }
1724 }
1725 }
1726
1727 batch_propagate_backward(
1728 &mut x_packed,
1729 &mut z_packed,
1730 &mut sign_packed,
1731 gate,
1732 local_targets.as_slice(),
1733 bm_words,
1734 );
1735 }
1736 }
1737
1738 let (z_lut, x_lut) = build_noise_luts(&events);
1739
1740 Ok(NoisyCompiledSampler {
1741 noiseless,
1742 events,
1743
1744 num_measurements,
1745 rng: ChaCha8Rng::seed_from_u64(seed.wrapping_add(0xCAFE_BABE)),
1746 z_lut,
1747 x_lut,
1748 #[cfg(feature = "gpu")]
1749 gpu_noise_cache: None,
1750 })
1751}
1752
1753fn compile_noisy_monolithic(
1754 circuit: &Circuit,
1755 noise: &NoiseModel,
1756 seed: u64,
1757) -> Result<NoisyCompiledSampler> {
1758 let noiseless = compile_measurements(circuit, seed)?;
1759
1760 let n = circuit.num_qubits;
1761
1762 let gate_indices: Vec<(usize, &Gate, &[usize])> = circuit
1763 .instructions
1764 .iter()
1765 .enumerate()
1766 .filter_map(|(idx, inst)| match inst {
1767 Instruction::Gate { gate, targets } => Some((idx, gate, targets.as_slice())),
1768 Instruction::Conditional { gate, targets, .. } => Some((idx, gate, targets.as_slice())),
1769 _ => None,
1770 })
1771 .collect();
1772
1773 let measurement_qubits: Vec<usize> = circuit
1774 .instructions
1775 .iter()
1776 .filter_map(|inst| match inst {
1777 Instruction::Measure { qubit, .. } => Some(*qubit),
1778 _ => None,
1779 })
1780 .collect();
1781 let num_measurements = measurement_qubits.len();
1782 let m_words = num_measurements.div_ceil(64);
1783
1784 if num_measurements == 0 {
1785 return Ok(NoisyCompiledSampler {
1786 noiseless,
1787 events: FlatNoiseSensitivity::new(m_words, 0),
1788
1789 num_measurements: 0,
1790 rng: ChaCha8Rng::seed_from_u64(seed.wrapping_add(0xA01CE)),
1791 z_lut: None,
1792 x_lut: None,
1793 #[cfg(feature = "gpu")]
1794 gpu_noise_cache: None,
1795 });
1796 }
1797
1798 let mut x_packed: Vec<Vec<u64>> = vec![vec![0u64; m_words]; n];
1799 let mut z_packed: Vec<Vec<u64>> = vec![vec![0u64; m_words]; n];
1800 let mut sign_packed: Vec<u64> = vec![0u64; m_words];
1801
1802 for (mi, &q) in measurement_qubits.iter().enumerate() {
1803 z_packed[q][mi / 64] |= 1u64 << (mi % 64);
1804 }
1805
1806 let total_noise_events: usize = noise.after_gate.iter().map(|ops| ops.len()).sum();
1807 let mut events = FlatNoiseSensitivity::new(m_words, total_noise_events);
1808
1809 for &(instr_idx, gate, targets) in gate_indices.iter().rev() {
1810 let noise_ops = &noise.after_gate[instr_idx];
1811 if !noise_ops.is_empty() {
1812 for event in noise_ops {
1813 let (px, py, pz) = event.pauli_probs();
1814 let q = event.qubit();
1815
1816 let has_any =
1817 x_packed[q].iter().any(|&w| w != 0) || z_packed[q].iter().any(|&w| w != 0);
1818 if has_any {
1819 events.push(&x_packed[q], &z_packed[q], px, py, pz);
1820 }
1821 }
1822 }
1823
1824 batch_propagate_backward(
1825 &mut x_packed,
1826 &mut z_packed,
1827 &mut sign_packed,
1828 gate,
1829 targets,
1830 m_words,
1831 );
1832 }
1833
1834 let (z_lut, x_lut) = build_noise_luts(&events);
1835
1836 Ok(NoisyCompiledSampler {
1837 noiseless,
1838 events,
1839
1840 num_measurements,
1841 rng: ChaCha8Rng::seed_from_u64(seed.wrapping_add(0xCAFE_BABE)),
1842 z_lut,
1843 x_lut,
1844 #[cfg(feature = "gpu")]
1845 gpu_noise_cache: None,
1846 })
1847}
1848
1849const FRAME_BATCH_SIZE: usize = 256;
1850
1851struct ReferenceInfo {
1852 outcomes: Vec<bool>,
1853 is_random: Vec<bool>,
1854 random_x_support: Vec<Vec<usize>>,
1855}
1856
1857fn reference_simulation(circuit: &Circuit, seed: u64) -> Result<ReferenceInfo> {
1858 let mut stab = StabilizerBackend::new(seed);
1859 stab.init(circuit.num_qubits, circuit.num_classical_bits)?;
1860
1861 let meas_info: Vec<(usize, usize, usize)> = circuit
1862 .instructions
1863 .iter()
1864 .enumerate()
1865 .filter_map(|(i, inst)| match inst {
1866 Instruction::Measure {
1867 qubit,
1868 classical_bit,
1869 } => Some((i, *qubit, *classical_bit)),
1870 _ => None,
1871 })
1872 .collect();
1873
1874 let num_meas = meas_info.len();
1875 if num_meas == 0 {
1876 return Ok(ReferenceInfo {
1877 outcomes: Vec::new(),
1878 is_random: Vec::new(),
1879 random_x_support: Vec::new(),
1880 });
1881 }
1882
1883 let first_meas_idx = meas_info[0].0;
1884 let all_at_end = meas_info
1885 .iter()
1886 .enumerate()
1887 .all(|(i, &(inst_idx, _, _))| inst_idx == first_meas_idx + i);
1888
1889 if all_at_end {
1890 stab.apply_gates_only(&circuit.instructions[..first_meas_idx])?;
1891
1892 let measurements: Vec<(usize, usize)> = meas_info
1893 .iter()
1894 .map(|&(_, qubit, classical_bit)| (qubit, classical_bit))
1895 .collect();
1896 let (is_random, random_x_support, outcomes) = stab.batch_measure_ref_info(&measurements);
1897
1898 return Ok(ReferenceInfo {
1899 outcomes,
1900 is_random,
1901 random_x_support,
1902 });
1903 }
1904
1905 let mut is_random = Vec::with_capacity(num_meas);
1906 let mut random_x_support: Vec<Vec<usize>> = Vec::with_capacity(num_meas);
1907
1908 let mut seg_start = 0usize;
1909 for &(meas_inst_idx, qubit, classical_bit) in &meas_info {
1910 if seg_start < meas_inst_idx {
1911 stab.apply_gates_only(&circuit.instructions[seg_start..meas_inst_idx])?;
1912 }
1913
1914 let (meas_random, support) = stab.apply_measure_with_info(qubit, classical_bit);
1915 is_random.push(meas_random);
1916 random_x_support.push(support);
1917 seg_start = meas_inst_idx + 1;
1918 }
1919
1920 if seg_start < circuit.instructions.len() {
1921 stab.apply_gates_only(&circuit.instructions[seg_start..])?;
1922 }
1923
1924 let outcomes: Vec<bool> = meas_info
1925 .iter()
1926 .map(|&(_, _, cbit)| stab.classical_results()[cbit])
1927 .collect();
1928
1929 Ok(ReferenceInfo {
1930 outcomes,
1931 is_random,
1932 random_x_support,
1933 })
1934}
1935
1936#[inline(always)]
1937fn apply_gate_to_frame(
1938 gate: &Gate,
1939 targets: &[usize],
1940 x_frame: &mut [Vec<u64>],
1941 z_frame: &mut [Vec<u64>],
1942 bw: usize,
1943) {
1944 match gate {
1945 Gate::H => {
1946 let q = targets[0];
1947 std::mem::swap(&mut x_frame[q], &mut z_frame[q]);
1948 }
1949 Gate::S | Gate::Sdg => {
1950 let q = targets[0];
1951 for w in 0..bw {
1952 z_frame[q][w] ^= x_frame[q][w];
1953 }
1954 }
1955 Gate::SX | Gate::SXdg => {
1956 let q = targets[0];
1957 for w in 0..bw {
1958 x_frame[q][w] ^= z_frame[q][w];
1959 }
1960 }
1961 Gate::X | Gate::Y | Gate::Z | Gate::Id => {}
1962 Gate::Cx => {
1963 let ctrl = targets[0];
1964 let tgt = targets[1];
1965 for w in 0..bw {
1966 x_frame[tgt][w] ^= x_frame[ctrl][w];
1967 z_frame[ctrl][w] ^= z_frame[tgt][w];
1968 }
1969 }
1970 Gate::Cz => {
1971 let q0 = targets[0];
1972 let q1 = targets[1];
1973 for w in 0..bw {
1974 z_frame[q0][w] ^= x_frame[q1][w];
1975 z_frame[q1][w] ^= x_frame[q0][w];
1976 }
1977 }
1978 Gate::Swap => {
1979 let q0 = targets[0];
1980 let q1 = targets[1];
1981 x_frame.swap(q0, q1);
1982 z_frame.swap(q0, q1);
1983 }
1984 _ => {
1985 debug_assert!(
1986 false,
1987 "apply_gate_to_frame: unhandled Clifford gate {:?}",
1988 gate
1989 );
1990 }
1991 }
1992}
1993
1994fn run_shots_noisy_frame(
1995 circuit: &Circuit,
1996 noise: &NoiseModel,
1997 num_shots: usize,
1998 seed: u64,
1999) -> Result<ShotsResult> {
2000 let n = circuit.num_qubits;
2001 let num_classical = circuit.num_classical_bits;
2002
2003 let ref_info = reference_simulation(circuit, seed)?;
2004
2005 let classical_bit_order: Vec<usize> = circuit
2006 .instructions
2007 .iter()
2008 .filter_map(|inst| match inst {
2009 Instruction::Measure { classical_bit, .. } => Some(*classical_bit),
2010 _ => None,
2011 })
2012 .collect();
2013 let num_measurements = classical_bit_order.len();
2014 let m_words = num_measurements.div_ceil(64);
2015
2016 let mut all_packed = vec![0u64; num_shots * m_words];
2017 let mut rng = ChaCha8Rng::seed_from_u64(seed.wrapping_add(0xFAAB_E001));
2018
2019 for batch_start in (0..num_shots).step_by(FRAME_BATCH_SIZE) {
2020 let batch_end = (batch_start + FRAME_BATCH_SIZE).min(num_shots);
2021 let batch_n = batch_end - batch_start;
2022 let bw = batch_n.div_ceil(64);
2023
2024 let mut x_frame: Vec<Vec<u64>> = vec![vec![0u64; bw]; n];
2025 let mut z_frame: Vec<Vec<u64>> = vec![vec![0u64; bw]; n];
2026
2027 let mut meas_idx = 0usize;
2028
2029 for (idx, inst) in circuit.instructions.iter().enumerate() {
2030 match inst {
2031 Instruction::Gate { gate, targets }
2032 | Instruction::Conditional { gate, targets, .. } => {
2033 apply_gate_to_frame(gate, targets.as_slice(), &mut x_frame, &mut z_frame, bw);
2034
2035 for event in &noise.after_gate[idx] {
2036 let (px, py, pz) = event.pauli_probs();
2037 let q = event.qubit();
2038 let p_event = px + py + pz;
2039 if p_event == 0.0 {
2040 continue;
2041 }
2042
2043 let px_frac = px / p_event;
2044 let pxy_frac = (px + py) / p_event;
2045
2046 if p_event < 0.5 && batch_n >= 32 {
2047 let ln_1mp = (1.0 - p_event).ln();
2048 let mut pos = geometric_sample(&mut rng, ln_1mp);
2049 while pos < batch_n {
2050 let r: f64 = rand::Rng::random(&mut rng);
2051 let bit = 1u64 << (pos % 64);
2052 let w = pos / 64;
2053 if r < px_frac {
2054 x_frame[q][w] ^= bit;
2055 } else if r < pxy_frac {
2056 x_frame[q][w] ^= bit;
2057 z_frame[q][w] ^= bit;
2058 } else {
2059 z_frame[q][w] ^= bit;
2060 }
2061 pos += 1 + geometric_sample(&mut rng, ln_1mp);
2062 }
2063 } else {
2064 for s in 0..batch_n {
2065 let r: f64 = rand::Rng::random(&mut rng);
2066 if r < px {
2067 x_frame[q][s / 64] ^= 1u64 << (s % 64);
2068 } else if r < px + py {
2069 x_frame[q][s / 64] ^= 1u64 << (s % 64);
2070 z_frame[q][s / 64] ^= 1u64 << (s % 64);
2071 } else if r < p_event {
2072 z_frame[q][s / 64] ^= 1u64 << (s % 64);
2073 }
2074 }
2075 }
2076 }
2077 }
2078 Instruction::Measure {
2079 qubit,
2080 classical_bit: _,
2081 } => {
2082 if ref_info.is_random[meas_idx] {
2083 let support = &ref_info.random_x_support[meas_idx];
2084 #[allow(clippy::needless_range_loop)]
2085 for w in 0..bw {
2086 let random_word: u64 = rand::Rng::random(&mut rng);
2087 let mask = if w == bw - 1 && batch_n % 64 != 0 {
2088 random_word & ((1u64 << (batch_n % 64)) - 1)
2089 } else {
2090 random_word
2091 };
2092 if mask != 0 {
2093 for &q in support {
2094 x_frame[q][w] ^= mask;
2095 }
2096 }
2097 }
2098 }
2099
2100 let ref_bit = ref_info.outcomes[meas_idx];
2101 let mi_word = meas_idx / 64;
2102 let mi_bit = meas_idx % 64;
2103 #[allow(clippy::needless_range_loop)]
2104 for w in 0..bw {
2105 let frame_word = x_frame[*qubit][w];
2106 let effective = if ref_bit { !frame_word } else { frame_word };
2107 let num_bits = if w == bw - 1 && batch_n % 64 != 0 {
2108 batch_n % 64
2109 } else {
2110 64
2111 };
2112 let mask = if num_bits == 64 {
2113 effective
2114 } else {
2115 effective & ((1u64 << num_bits) - 1)
2116 };
2117 let mut bits = mask;
2118 while bits != 0 {
2119 let s = bits.trailing_zeros() as usize;
2120 let gs = batch_start + w * 64 + s;
2121 all_packed[gs * m_words + mi_word] |= 1u64 << mi_bit;
2122 bits &= bits - 1;
2123 }
2124 }
2125
2126 meas_idx += 1;
2127 }
2128 _ => {}
2129 }
2130 }
2131 }
2132
2133 let shots = unpack_and_remap(
2134 &all_packed,
2135 m_words,
2136 num_shots,
2137 &classical_bit_order,
2138 num_classical,
2139 );
2140
2141 Ok(ShotsResult {
2142 shots,
2143 num_classical_bits: circuit.num_classical_bits,
2144 })
2145}
2146
2147pub fn run_shots_noisy(
2153 circuit: &Circuit,
2154 noise: &NoiseModel,
2155 num_shots: usize,
2156 seed: u64,
2157) -> Result<ShotsResult> {
2158 noise.ensure_pauli_only()?;
2159 if !circuit.is_clifford_only() {
2160 return run_shots_noisy_brute_with(
2161 |s| Box::new(StabilizerBackend::new(s)),
2162 circuit,
2163 noise,
2164 num_shots,
2165 seed,
2166 );
2167 }
2168
2169 if !super::supports_compiled_measurement_sampling(circuit) {
2170 return run_shots_noisy_brute_with(
2171 |s| Box::new(StabilizerBackend::new(s)),
2172 circuit,
2173 noise,
2174 num_shots,
2175 seed,
2176 );
2177 }
2178
2179 if num_shots >= 1000 {
2182 if let Ok(sampler) = super::homological::HomologicalSampler::compile(circuit, noise, seed) {
2183 return super::homological::run_shots_homological_inner(sampler, circuit, num_shots);
2184 }
2185 }
2186
2187 let n = circuit.num_qubits;
2188 let gate_count = circuit
2189 .instructions
2190 .iter()
2191 .filter(|i| {
2192 matches!(
2193 i,
2194 Instruction::Gate { .. } | Instruction::Conditional { .. }
2195 )
2196 })
2197 .count();
2198
2199 let depth_ratio = gate_count as f64 / n.max(1) as f64;
2200 let use_frame = depth_ratio < 3.0 || (n >= 200 && depth_ratio < 5.0);
2201
2202 if use_frame {
2203 run_shots_noisy_frame(circuit, noise, num_shots, seed)
2204 } else {
2205 run_shots_noisy_compiled(circuit, noise, num_shots, seed)
2206 }
2207}
2208
2209fn finish_noisy_compiled_run(
2210 mut sampler: NoisyCompiledSampler,
2211 circuit: &Circuit,
2212 num_shots: usize,
2213) -> Result<ShotsResult> {
2214 let classical_bit_order: Vec<usize> = circuit
2215 .instructions
2216 .iter()
2217 .filter_map(|inst| match inst {
2218 Instruction::Measure { classical_bit, .. } => Some(*classical_bit),
2219 _ => None,
2220 })
2221 .collect();
2222 let num_classical = circuit.num_classical_bits;
2223
2224 let packed = sampler.try_sample_bulk_packed(num_shots)?;
2225 let shots = unpack_and_remap_packed(&packed, num_shots, &classical_bit_order, num_classical);
2226
2227 Ok(ShotsResult {
2228 shots,
2229 num_classical_bits: circuit.num_classical_bits,
2230 })
2231}
2232
2233fn run_shots_noisy_compiled(
2234 circuit: &Circuit,
2235 noise: &NoiseModel,
2236 num_shots: usize,
2237 seed: u64,
2238) -> Result<ShotsResult> {
2239 let sampler = compile_noisy(circuit, noise, seed)?;
2240 finish_noisy_compiled_run(sampler, circuit, num_shots)
2241}
2242
2243#[cfg(feature = "gpu")]
2244pub(crate) fn run_shots_noisy_with_gpu(
2245 circuit: &Circuit,
2246 noise: &NoiseModel,
2247 num_shots: usize,
2248 seed: u64,
2249 context: std::sync::Arc<crate::gpu::GpuContext>,
2250) -> Result<ShotsResult> {
2251 noise.ensure_pauli_only()?;
2252 if !circuit.is_clifford_only() {
2253 return run_shots_noisy_brute_with(
2254 |s| Box::new(StabilizerBackend::new(s)),
2255 circuit,
2256 noise,
2257 num_shots,
2258 seed,
2259 );
2260 }
2261
2262 if !super::supports_compiled_measurement_sampling(circuit) {
2263 return run_shots_noisy_brute_with(
2264 |s| Box::new(StabilizerBackend::new(s)),
2265 circuit,
2266 noise,
2267 num_shots,
2268 seed,
2269 );
2270 }
2271
2272 if num_shots >= 1000 {
2273 if let Ok(sampler) = super::homological::HomologicalSampler::compile(circuit, noise, seed) {
2274 return super::homological::run_shots_homological_inner(sampler, circuit, num_shots);
2275 }
2276 }
2277
2278 let n = circuit.num_qubits;
2279 let gate_count = circuit
2280 .instructions
2281 .iter()
2282 .filter(|i| {
2283 matches!(
2284 i,
2285 Instruction::Gate { .. } | Instruction::Conditional { .. }
2286 )
2287 })
2288 .count();
2289
2290 let depth_ratio = gate_count as f64 / n.max(1) as f64;
2291 let use_frame = depth_ratio < 3.0 || (n >= 200 && depth_ratio < 5.0);
2292 if use_frame {
2293 return run_shots_noisy_frame(circuit, noise, num_shots, seed);
2294 }
2295
2296 let sampler = compile_noisy(circuit, noise, seed)?.with_gpu(context);
2297 finish_noisy_compiled_run(sampler, circuit, num_shots)
2298}
2299
2300pub(crate) fn run_shots_noisy_brute_with(
2301 backend_factory: impl Fn(u64) -> Box<dyn Backend>,
2302 circuit: &Circuit,
2303 noise: &NoiseModel,
2304 num_shots: usize,
2305 seed: u64,
2306) -> Result<ShotsResult> {
2307 let mut shots = Vec::with_capacity(num_shots);
2308
2309 for i in 0..num_shots {
2310 let shot_seed = seed.wrapping_add(i as u64);
2311 let mut rng = ChaCha8Rng::seed_from_u64(shot_seed);
2312 let mut backend = backend_factory(shot_seed);
2313 backend.init(circuit.num_qubits, circuit.num_classical_bits)?;
2314
2315 for (idx, instr) in circuit.instructions.iter().enumerate() {
2316 backend.apply(instr)?;
2317
2318 let noise_events = &noise.after_gate[idx];
2319 for event in noise_events {
2320 let (px, py, pz) = event.pauli_probs();
2321 let q = event.qubit();
2322 let r: f64 = rand::Rng::random(&mut rng);
2323 if r < px {
2324 backend.apply(&Instruction::Gate {
2325 gate: Gate::X,
2326 targets: SmallVec::from_elem(q, 1),
2327 })?;
2328 } else if r < px + py {
2329 backend.apply(&Instruction::Gate {
2330 gate: Gate::Y,
2331 targets: SmallVec::from_elem(q, 1),
2332 })?;
2333 } else if r < px + py + pz {
2334 backend.apply(&Instruction::Gate {
2335 gate: Gate::Z,
2336 targets: SmallVec::from_elem(q, 1),
2337 })?;
2338 }
2339 }
2340 }
2341
2342 shots.push(backend.classical_results().to_vec());
2343 }
2344
2345 Ok(ShotsResult {
2346 shots,
2347 num_classical_bits: circuit.num_classical_bits,
2348 })
2349}
2350
2351#[cfg(test)]
2352mod tests {
2353 use super::*;
2354 use crate::circuits;
2355
2356 #[test]
2357 fn noisy_ghz_produces_varied_outcomes() {
2358 let n = 10;
2359 let mut circuit = circuits::ghz_circuit(n);
2360 circuit.num_classical_bits = n;
2361 for i in 0..n {
2362 circuit.add_measure(i, i);
2363 }
2364
2365 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2366 let result = run_shots_noisy(&circuit, &noise, 1000, 42).unwrap();
2367
2368 assert_eq!(result.shots.len(), 1000);
2369 assert_eq!(result.shots[0].len(), n);
2370
2371 let all_zero: Vec<bool> = vec![false; n];
2372 let all_one: Vec<bool> = vec![true; n];
2373 let num_00 = result.shots.iter().filter(|s| **s == all_zero).count();
2374 let num_11 = result.shots.iter().filter(|s| **s == all_one).count();
2375 let num_other = 1000 - num_00 - num_11;
2376
2377 assert!(num_other > 0, "noise should produce non-GHZ outcomes");
2378 assert!(num_00 > 100, "should still have many |00...0> outcomes");
2379 assert!(num_11 > 100, "should still have many |11...1> outcomes");
2380 }
2381
2382 #[test]
2383 fn zero_noise_matches_noiseless() {
2384 let n = 5;
2385 let mut circuit = circuits::ghz_circuit(n);
2386 circuit.num_classical_bits = n;
2387 for i in 0..n {
2388 circuit.add_measure(i, i);
2389 }
2390
2391 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.0);
2392 let result = run_shots_noisy(&circuit, &noise, 100, 42).unwrap();
2393
2394 for shot in &result.shots {
2395 let all_same = shot.iter().all(|&b| b == shot[0]);
2396 assert!(all_same, "GHZ with zero noise must be all-0 or all-1");
2397 }
2398 }
2399
2400 #[test]
2401 fn noise_model_length_matches_circuit() {
2402 let n = 10;
2403 let mut circuit = circuits::ghz_circuit(n);
2404 circuit.num_classical_bits = n;
2405 for i in 0..n {
2406 circuit.add_measure(i, i);
2407 }
2408
2409 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.001);
2410 assert_eq!(noise.after_gate.len(), circuit.instructions.len());
2411 }
2412
2413 #[test]
2414 fn compiled_noisy_stats_match_brute_force() {
2415 let n = 10;
2416 let mut circuit = circuits::ghz_circuit(n);
2417 circuit.num_classical_bits = n;
2418 for i in 0..n {
2419 circuit.add_measure(i, i);
2420 }
2421
2422 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2423 let num_shots = 10000;
2424
2425 let brute = run_shots_noisy_brute_with(
2426 |s| Box::new(StabilizerBackend::new(s)),
2427 &circuit,
2428 &noise,
2429 num_shots,
2430 42,
2431 )
2432 .unwrap();
2433 let compiled = run_shots_noisy_compiled(&circuit, &noise, num_shots, 42).unwrap();
2434
2435 let count_all_same = |shots: &[Vec<bool>]| -> usize {
2436 shots
2437 .iter()
2438 .filter(|s| s.iter().all(|&b| b == s[0]))
2439 .count()
2440 };
2441
2442 let brute_coherent = count_all_same(&brute.shots);
2443 let compiled_coherent = count_all_same(&compiled.shots);
2444
2445 let brute_frac = brute_coherent as f64 / num_shots as f64;
2446 let compiled_frac = compiled_coherent as f64 / num_shots as f64;
2447
2448 assert!(
2449 (brute_frac - compiled_frac).abs() < 0.05,
2450 "coherent fraction should be similar: brute={brute_frac:.3}, compiled={compiled_frac:.3}"
2451 );
2452
2453 let count_errors = |shots: &[Vec<bool>]| -> usize {
2454 shots
2455 .iter()
2456 .filter(|s| !s.iter().all(|&b| b == s[0]))
2457 .count()
2458 };
2459
2460 let brute_errors = count_errors(&brute.shots);
2461 let compiled_errors = count_errors(&compiled.shots);
2462
2463 assert!(
2464 brute_errors > 0 && compiled_errors > 0,
2465 "both should produce errors"
2466 );
2467 }
2468
2469 #[test]
2470 fn compiled_noisy_clifford_produces_noise() {
2471 let n = 20;
2472 let circuit_base = circuits::clifford_heavy_circuit(n, 10, 42);
2473 let mut circuit = circuit_base;
2474 circuit.num_classical_bits = n;
2475 for i in 0..n {
2476 circuit.add_measure(i, i);
2477 }
2478
2479 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2480 let result = run_shots_noisy(&circuit, &noise, 100, 42).unwrap();
2481
2482 assert_eq!(result.shots.len(), 100);
2483 assert_eq!(result.shots[0].len(), n);
2484
2485 let unique: std::collections::HashSet<Vec<bool>> = result.shots.iter().cloned().collect();
2486 assert!(unique.len() > 1, "noise should produce varied outcomes");
2487 }
2488
2489 #[test]
2490 fn compile_noisy_rejects_reset_circuits() {
2491 let mut circuit = Circuit::new(1, 1);
2492 circuit.add_reset(0);
2493 circuit.add_measure(0, 0);
2494 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2495 assert!(compile_noisy(&circuit, &noise, 42).is_err());
2496 }
2497
2498 #[test]
2499 fn compile_noisy_rejects_conditionals() {
2500 let mut circuit = Circuit::new(2, 2);
2501 circuit.add_gate(Gate::H, &[0]);
2502 circuit.add_measure(0, 0);
2503 circuit.instructions.push(Instruction::Conditional {
2504 condition: crate::circuit::ClassicalCondition::BitIsOne(0),
2505 gate: Gate::X,
2506 targets: crate::circuit::smallvec![1],
2507 });
2508 circuit.add_measure(1, 1);
2509 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2510 assert!(compile_noisy(&circuit, &noise, 42).is_err());
2511 }
2512
2513 #[test]
2514 fn run_shots_noisy_handles_reset_circuits() {
2515 let mut circuit = Circuit::new(1, 1);
2516 circuit.add_gate(Gate::X, &[0]);
2517 circuit.add_reset(0);
2518 circuit.add_measure(0, 0);
2519 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.0);
2520 let result = run_shots_noisy(&circuit, &noise, 32, 42).unwrap();
2521 assert!(result.shots.iter().all(|shot| !shot[0]));
2522 }
2523
2524 #[test]
2525 fn run_shots_noisy_handles_conditionals() {
2526 let mut circuit = Circuit::new(2, 2);
2527 circuit.add_gate(Gate::H, &[0]);
2528 circuit.add_measure(0, 0);
2529 circuit.instructions.push(Instruction::Conditional {
2530 condition: crate::circuit::ClassicalCondition::BitIsOne(0),
2531 gate: Gate::X,
2532 targets: crate::circuit::smallvec![1],
2533 });
2534 circuit.add_measure(1, 1);
2535 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.0);
2536 let result = run_shots_noisy(&circuit, &noise, 256, 42).unwrap();
2537 assert!(result.shots.iter().all(|shot| shot[0] == shot[1]));
2538 }
2539
2540 #[test]
2541 fn frame_ghz_100q_produces_varied_outcomes() {
2542 let n = 100;
2543 let mut circuit = circuits::ghz_circuit(n);
2544 circuit.num_classical_bits = n;
2545 for i in 0..n {
2546 circuit.add_measure(i, i);
2547 }
2548
2549 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2550 let result = run_shots_noisy_frame(&circuit, &noise, 1000, 42).unwrap();
2551
2552 assert_eq!(result.shots.len(), 1000);
2553 assert_eq!(result.shots[0].len(), n);
2554
2555 let all_zero: Vec<bool> = vec![false; n];
2556 let all_one: Vec<bool> = vec![true; n];
2557 let num_00 = result.shots.iter().filter(|s| **s == all_zero).count();
2558 let num_11 = result.shots.iter().filter(|s| **s == all_one).count();
2559 let num_other = 1000 - num_00 - num_11;
2560
2561 assert!(num_other > 0, "noise should produce non-GHZ outcomes");
2562 assert!(num_00 > 50, "should still have many |00...0> outcomes");
2563 assert!(num_11 > 50, "should still have many |11...1> outcomes");
2564 }
2565
2566 #[test]
2567 fn frame_zero_noise_matches_noiseless_100q() {
2568 let n = 100;
2569 let mut circuit = circuits::ghz_circuit(n);
2570 circuit.num_classical_bits = n;
2571 for i in 0..n {
2572 circuit.add_measure(i, i);
2573 }
2574
2575 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.0);
2576 let result = run_shots_noisy_frame(&circuit, &noise, 100, 42).unwrap();
2577
2578 for shot in &result.shots {
2579 let all_same = shot.iter().all(|&b| b == shot[0]);
2580 assert!(all_same, "GHZ with zero noise must be all-0 or all-1");
2581 }
2582 }
2583
2584 #[test]
2585 fn frame_stats_match_compiled_ghz() {
2586 let n = 100;
2587 let mut circuit = circuits::ghz_circuit(n);
2588 circuit.num_classical_bits = n;
2589 for i in 0..n {
2590 circuit.add_measure(i, i);
2591 }
2592
2593 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2594 let num_shots = 5000;
2595
2596 let frame = run_shots_noisy_frame(&circuit, &noise, num_shots, 42).unwrap();
2597 let compiled = run_shots_noisy_compiled(&circuit, &noise, num_shots, 42).unwrap();
2598
2599 let count_coherent = |shots: &[Vec<bool>]| -> usize {
2600 shots
2601 .iter()
2602 .filter(|s| s.iter().all(|&b| b == s[0]))
2603 .count()
2604 };
2605
2606 let frame_coh = count_coherent(&frame.shots) as f64 / num_shots as f64;
2607 let compiled_coh = count_coherent(&compiled.shots) as f64 / num_shots as f64;
2608
2609 assert!(
2610 (frame_coh - compiled_coh).abs() < 0.05,
2611 "coherent fraction should be similar: frame={frame_coh:.3}, compiled={compiled_coh:.3}"
2612 );
2613 }
2614
2615 #[test]
2616 fn frame_clifford_100q_produces_noise() {
2617 let n = 100;
2618 let circuit_base = circuits::clifford_heavy_circuit(n, 10, 42);
2619 let mut circuit = circuit_base;
2620 circuit.num_classical_bits = n;
2621 for i in 0..n {
2622 circuit.add_measure(i, i);
2623 }
2624
2625 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2626 let result = run_shots_noisy_frame(&circuit, &noise, 100, 42).unwrap();
2627
2628 assert_eq!(result.shots.len(), 100);
2629 assert_eq!(result.shots[0].len(), n);
2630
2631 let unique: std::collections::HashSet<Vec<bool>> = result.shots.iter().cloned().collect();
2632 assert!(unique.len() > 1, "noise should produce varied outcomes");
2633 }
2634
2635 #[test]
2636 fn filtered_noisy_bell_pairs_matches_monolithic() {
2637 let n_pairs = 50;
2638 let n = n_pairs * 2;
2639 let mut circuit = circuits::independent_bell_pairs(n_pairs);
2640 circuit.num_classical_bits = n;
2641 for i in 0..n {
2642 circuit.add_measure(i, i);
2643 }
2644
2645 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2646 let seed = 42u64;
2647
2648 let filtered =
2649 compile_noisy_filtered(&circuit, &noise, &circuit.independent_subsystems(), seed)
2650 .unwrap();
2651 let monolithic = compile_noisy_monolithic(&circuit, &noise, seed).unwrap();
2652
2653 assert_eq!(filtered.num_measurements, monolithic.num_measurements);
2654 assert_eq!(filtered.events.len(), monolithic.events.len());
2655
2656 let mut filtered = filtered;
2657 let mut monolithic = monolithic;
2658 let num_shots = 10_000;
2659 let shots_f = filtered.sample_bulk(num_shots);
2660 let shots_m = monolithic.sample_bulk(num_shots);
2661
2662 assert_eq!(shots_f.len(), num_shots);
2663 assert_eq!(shots_m.len(), num_shots);
2664
2665 let mut agree_f = 0usize;
2666 let mut agree_m = 0usize;
2667 for shot in &shots_f {
2668 for pair in shot.chunks(2) {
2669 if pair[0] == pair[1] {
2670 agree_f += 1;
2671 }
2672 }
2673 }
2674 for shot in &shots_m {
2675 for pair in shot.chunks(2) {
2676 if pair[0] == pair[1] {
2677 agree_m += 1;
2678 }
2679 }
2680 }
2681
2682 let total_pairs = num_shots * n_pairs;
2683 let agree_rate_f = agree_f as f64 / total_pairs as f64;
2684 let agree_rate_m = agree_m as f64 / total_pairs as f64;
2685 assert!(
2686 agree_rate_f > 0.95,
2687 "filtered agreement rate {agree_rate_f:.4} should be >0.95 with low noise"
2688 );
2689 assert!(
2690 agree_rate_m > 0.95,
2691 "monolithic agreement rate {agree_rate_m:.4} should be >0.95 with low noise"
2692 );
2693 assert!(
2694 (agree_rate_f - agree_rate_m).abs() < 0.02,
2695 "filtered ({agree_rate_f:.4}) and monolithic ({agree_rate_m:.4}) should have similar agreement rates"
2696 );
2697 }
2698
2699 #[cfg(feature = "gpu")]
2700 #[test]
2701 fn compiled_noisy_with_stub_gpu_matches_cpu_below_threshold() {
2702 let n = 12;
2703 let mut circuit = circuits::ghz_circuit(n);
2704 circuit.num_classical_bits = n;
2705 for i in 0..n {
2706 circuit.add_measure(i, i);
2707 }
2708
2709 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2710 let shots = 20_000;
2711
2712 let mut cpu = compile_noisy(&circuit, &noise, 42).unwrap();
2713 let cpu_marginals = cpu.sample_marginals(shots);
2714
2715 let mut gpu = compile_noisy(&circuit, &noise, 42)
2716 .unwrap()
2717 .with_gpu(crate::gpu::GpuContext::stub_for_tests());
2718 let gpu_marginals = gpu.sample_marginals(shots);
2719
2720 for (idx, (cpu_p1, gpu_p1)) in cpu_marginals.iter().zip(gpu_marginals.iter()).enumerate() {
2721 assert!(
2722 (cpu_p1 - gpu_p1).abs() < 0.03,
2723 "marginal[{idx}] diverged too much: cpu={cpu_p1}, gpu={gpu_p1}"
2724 );
2725 }
2726 }
2727
2728 #[cfg(feature = "gpu")]
2729 #[test]
2730 fn compiled_noisy_with_stub_gpu_low_rank_above_threshold_uses_cpu_fallback() {
2731 let shots = crate::gpu::bts_min_shots().max(1);
2732 let n = 12;
2733 let mut circuit = circuits::ghz_circuit(n);
2734 circuit.num_classical_bits = n;
2735 for i in 0..n {
2736 circuit.add_measure(i, i);
2737 }
2738
2739 let noise = NoiseModel::uniform_depolarizing(&circuit, 0.01);
2740 let mut gpu = compile_noisy(&circuit, &noise, 42)
2741 .unwrap()
2742 .with_gpu(crate::gpu::GpuContext::stub_for_tests());
2743
2744 assert!(!gpu.noiseless.should_use_gpu_bts(shots));
2745 let counts = gpu.sample_counts(shots);
2746 let total: u64 = counts.values().sum();
2747 assert_eq!(total, shots as u64);
2748 }
2749}