Skip to main content

quantrs2_circuit/
solovay_kitaev.rs

1//! Solovay-Kitaev Algorithm for single-qubit gate approximation.
2//!
3//! Implements the Solovay-Kitaev theorem (Dawson & Nielsen, quant-ph/0505030):
4//! any single-qubit SU(2) gate can be approximated to precision ε using
5//! O(log^c(1/ε)) gates from the universal set {H, T, T†, S, S†}.
6//!
7//! S = T² and S† = T†² are algebraically expressible in terms of T and T†, but
8//! enumerating them as separate BFS atoms shortens many useful sequences (e.g.
9//! Y-axis-rotation conjugates such as S·H·RZ(θ)·H·S†) at the cost of a denser
10//! basic table.
11//!
12//! # Design
13//!
14//! Rather than storing SU(2) in the `[[a,-b*],[b,a*]]` parameterization (which
15//! forces H into a non-standard embedding), we store the full 2×2 unitary matrix.
16//! Gate multiplication, adjoint and distance are computed directly.  This avoids
17//! the representation issues with H (det = −1) while keeping the struct name SU2.
18//!
19//! Frobenius distance is computed **up to global phase**:
20//!   d(U,V) = sqrt(2 − |Tr(U†V)|)
21//! which is the correct metric for SK's approximation quality.
22
23use quantrs2_core::error::{QuantRS2Error, QuantRS2Result};
24use scirs2_core::Complex64;
25use std::collections::HashMap;
26use std::f64::consts::{PI, SQRT_2};
27
28/// 2×2 single-qubit unitary matrix stored in row-major order:
29///   [[m00, m01], [m10, m11]]
30///
31/// No assumption is made about the specific SU(2) parameterization;
32/// the matrix may represent any U(2) unitary (global phase is tracked
33/// but ignored when measuring approximation quality).
34#[derive(Debug, Clone, Copy)]
35pub struct SU2 {
36    pub m00: Complex64,
37    pub m01: Complex64,
38    pub m10: Complex64,
39    pub m11: Complex64,
40}
41
42impl SU2 {
43    /// Create a new SU2 matrix from four complex entries.
44    pub fn new(m00: Complex64, m01: Complex64, m10: Complex64, m11: Complex64) -> Self {
45        Self { m00, m01, m10, m11 }
46    }
47
48    /// Identity gate.
49    pub fn identity() -> Self {
50        Self {
51            m00: Complex64::new(1.0, 0.0),
52            m01: Complex64::new(0.0, 0.0),
53            m10: Complex64::new(0.0, 0.0),
54            m11: Complex64::new(1.0, 0.0),
55        }
56    }
57
58    /// Construct an SU2 matrix from a named gate in {H, T, Tdg, S, Sdg, X, Y, Z}.
59    ///
60    /// Returns `None` for unrecognised gate names.
61    pub fn from_gate_name(name: &str) -> Option<Self> {
62        let inv_sqrt2 = 1.0 / SQRT_2;
63        let t_phase = Complex64::new(inv_sqrt2, inv_sqrt2); // e^{iπ/4}
64        let tdg_phase = Complex64::new(inv_sqrt2, -inv_sqrt2); // e^{-iπ/4}
65
66        match name {
67            "H" => Some(Self {
68                m00: Complex64::new(inv_sqrt2, 0.0),
69                m01: Complex64::new(inv_sqrt2, 0.0),
70                m10: Complex64::new(inv_sqrt2, 0.0),
71                m11: Complex64::new(-inv_sqrt2, 0.0),
72            }),
73            "T" => Some(Self {
74                m00: Complex64::new(1.0, 0.0),
75                m01: Complex64::new(0.0, 0.0),
76                m10: Complex64::new(0.0, 0.0),
77                m11: t_phase,
78            }),
79            "Tdg" => Some(Self {
80                m00: Complex64::new(1.0, 0.0),
81                m01: Complex64::new(0.0, 0.0),
82                m10: Complex64::new(0.0, 0.0),
83                m11: tdg_phase,
84            }),
85            "S" => Some(Self {
86                m00: Complex64::new(1.0, 0.0),
87                m01: Complex64::new(0.0, 0.0),
88                m10: Complex64::new(0.0, 0.0),
89                m11: Complex64::new(0.0, 1.0),
90            }),
91            "Sdg" => Some(Self {
92                m00: Complex64::new(1.0, 0.0),
93                m01: Complex64::new(0.0, 0.0),
94                m10: Complex64::new(0.0, 0.0),
95                m11: Complex64::new(0.0, -1.0),
96            }),
97            "X" => Some(Self {
98                m00: Complex64::new(0.0, 0.0),
99                m01: Complex64::new(1.0, 0.0),
100                m10: Complex64::new(1.0, 0.0),
101                m11: Complex64::new(0.0, 0.0),
102            }),
103            "Y" => Some(Self {
104                m00: Complex64::new(0.0, 0.0),
105                m01: Complex64::new(0.0, -1.0),
106                m10: Complex64::new(0.0, 1.0),
107                m11: Complex64::new(0.0, 0.0),
108            }),
109            "Z" => Some(Self {
110                m00: Complex64::new(1.0, 0.0),
111                m01: Complex64::new(0.0, 0.0),
112                m10: Complex64::new(0.0, 0.0),
113                m11: Complex64::new(-1.0, 0.0),
114            }),
115            _ => None,
116        }
117    }
118
119    /// Matrix multiplication: self * other.
120    pub fn mul(&self, other: &SU2) -> SU2 {
121        SU2 {
122            m00: self.m00 * other.m00 + self.m01 * other.m10,
123            m01: self.m00 * other.m01 + self.m01 * other.m11,
124            m10: self.m10 * other.m00 + self.m11 * other.m10,
125            m11: self.m10 * other.m01 + self.m11 * other.m11,
126        }
127    }
128
129    /// Conjugate transpose (adjoint / Hermitian conjugate).
130    pub fn adjoint(&self) -> SU2 {
131        SU2 {
132            m00: self.m00.conj(),
133            m01: self.m10.conj(),
134            m10: self.m01.conj(),
135            m11: self.m11.conj(),
136        }
137    }
138
139    /// Trace of the matrix.
140    pub fn trace(&self) -> Complex64 {
141        self.m00 + self.m11
142    }
143
144    /// Frobenius distance from identity, up to global phase:
145    ///   d(U, I) = sqrt(2 − |Tr(U)|)
146    ///
147    /// This equals zero only when U is a global phase times identity.
148    /// Used for table lookups (comparing approximations to targets).
149    pub fn distance_from_identity(&self) -> f64 {
150        let tr_abs = self.trace().norm();
151        let val = (2.0 - tr_abs).max(0.0);
152        val.sqrt()
153    }
154
155    /// Frobenius distance between two matrices, up to global phase:
156    ///   d(U, V) = sqrt(2 − |Tr(U†V)|)
157    ///
158    /// Used for evaluating approximation quality (the output metric).
159    pub fn frobenius_distance(&self, other: &SU2) -> f64 {
160        let udagger_v = self.adjoint().mul(other);
161        udagger_v.distance_from_identity()
162    }
163
164    /// Frobenius distance from identity WITHOUT global phase correction:
165    ///   d(U, I) = sqrt(2 − Re[Tr(U)])
166    ///
167    /// This is the metric used internally by the Solovay-Kitaev recursion.
168    /// It reflects the actual matrix distance (not up to phase), so that
169    /// the residual delta = U * u_prev† has the correct geometric meaning.
170    pub fn distance_from_identity_exact(&self) -> f64 {
171        let re_tr = self.trace().re;
172        let val = (2.0 - re_tr).max(0.0);
173        val.sqrt()
174    }
175
176    /// Frobenius distance between two matrices WITHOUT global phase correction:
177    ///   d(U, V) = sqrt(2 − Re[Tr(U†V)])
178    pub fn frobenius_distance_exact(&self, other: &SU2) -> f64 {
179        let udagger_v = self.adjoint().mul(other);
180        udagger_v.distance_from_identity_exact()
181    }
182
183    /// Rotation angle θ (in radians) such that U = exp(iθ/2 n̂·σ) * global_phase.
184    ///
185    /// Computed as θ = 2 * arccos(|Tr(U)| / 2) using the absolute trace,
186    /// which factors out the global phase and gives the physical rotation angle.
187    ///
188    /// For the SU(2) representative R (with det = 1), Tr(R) = 2 cos(θ/2) is real.
189    /// For a general U(2) matrix U = e^{iφ} R, |Tr(U)| = |Tr(R)| = 2|cos(θ/2)|.
190    pub fn rotation_angle(&self) -> f64 {
191        // Factor out global phase: |Tr(U)| = |2 cos(θ/2)| gives the physical angle.
192        let tr_abs = self.trace().norm();
193        let cos_half = (tr_abs / 2.0).clamp(0.0, 1.0);
194        2.0 * cos_half.acos()
195    }
196
197    /// Normalized rotation axis [nx, ny, nz] for the SU(2) rotation.
198    ///
199    /// Returns [0, 0, 1] (z-axis) if the rotation angle is near zero.
200    pub fn rotation_axis(&self) -> [f64; 3] {
201        let theta = self.rotation_angle();
202        let s = (theta / 2.0).sin();
203
204        if s.abs() < 1e-12 {
205            return [0.0, 0.0, 1.0];
206        }
207
208        // For U = exp(iθ/2 n̂·σ) in SU(2), U = cos(θ/2)I + i sin(θ/2) n̂·σ
209        // n̂·σ = [[nz, nx-iny],[nx+iny, -nz]]
210        // Extract components from the off-diagonal imaginary / diagonal imaginary
211        // Handle global phase: normalize by global phase factor.
212        let det = self.m00 * self.m11 - self.m01 * self.m10;
213        let global_phase_half = det.arg() / 2.0;
214        let phase_inv = Complex64::new((-global_phase_half).cos(), (-global_phase_half).sin());
215
216        let a00 = self.m00 * phase_inv;
217        let a01 = self.m01 * phase_inv;
218        let a10 = self.m10 * phase_inv;
219        let a11 = self.m11 * phase_inv;
220
221        // From SU(2) standard form: a00 = cos(θ/2) + i*nz*sin(θ/2)
222        //                            a01 = i*(nx - i*ny)*... no, let's re-derive
223        // Standard SU(2): U = [[c + i*nz*s, i*nx*s + ny*s],
224        //                       [i*nx*s - ny*s, c - i*nz*s]]
225        // where c = cos(θ/2), s = sin(θ/2)
226        let nz = a00.im / s;
227        let nx = a10.im / s;
228        let ny = a10.re / s;
229
230        let _ = (a01, a11); // suppress unused warnings
231
232        let norm = (nx * nx + ny * ny + nz * nz).sqrt();
233        if norm < 1e-12 {
234            [0.0, 0.0, 1.0]
235        } else {
236            [nx / norm, ny / norm, nz / norm]
237        }
238    }
239
240    /// Construct a rotation about the X-axis: exp(+i*angle/2 * X).
241    /// Matrix: [[cos(angle/2), i*sin(angle/2)], [i*sin(angle/2), cos(angle/2)]]
242    pub fn rotation_x(angle: f64) -> SU2 {
243        let c = (angle / 2.0).cos();
244        let s = (angle / 2.0).sin();
245        SU2 {
246            m00: Complex64::new(c, 0.0),
247            m01: Complex64::new(0.0, s),
248            m10: Complex64::new(0.0, s),
249            m11: Complex64::new(c, 0.0),
250        }
251    }
252
253    /// Construct a rotation about the Y-axis: exp(+i*angle/2 * Y).
254    /// Matrix: [[cos(angle/2), sin(angle/2)], [-sin(angle/2), cos(angle/2)]]
255    pub fn rotation_y(angle: f64) -> SU2 {
256        let c = (angle / 2.0).cos();
257        let s = (angle / 2.0).sin();
258        SU2 {
259            m00: Complex64::new(c, 0.0),
260            m01: Complex64::new(s, 0.0),
261            m10: Complex64::new(-s, 0.0),
262            m11: Complex64::new(c, 0.0),
263        }
264    }
265
266    /// Construct a rotation about the Z-axis: exp(+i*angle/2 * Z).
267    /// Matrix: [[e^{i*angle/2}, 0], [0, e^{-i*angle/2}]]
268    pub fn rotation_z(angle: f64) -> SU2 {
269        let half = angle / 2.0;
270        SU2 {
271            m00: Complex64::new(half.cos(), half.sin()),
272            m01: Complex64::new(0.0, 0.0),
273            m10: Complex64::new(0.0, 0.0),
274            m11: Complex64::new(half.cos(), -half.sin()),
275        }
276    }
277}
278
279/// Compute SU(2) rotation about arbitrary unit axis [nx, ny, nz] by angle theta.
280///
281/// U = cos(θ/2) I + i sin(θ/2) (nx X + ny Y + nz Z)
282///
283/// Matrix form (standard SU(2)):
284///   [[cos(θ/2) + i*nz*sin(θ/2),  (ny + i*nx)*sin(θ/2)],
285///    [(-ny + i*nx)*sin(θ/2),       cos(θ/2) - i*nz*sin(θ/2)]]
286pub fn rotation_about_axis(nx: f64, ny: f64, nz: f64, theta: f64) -> SU2 {
287    let c = (theta / 2.0).cos();
288    let s = (theta / 2.0).sin();
289    SU2 {
290        m00: Complex64::new(c, nz * s),
291        m01: Complex64::new(ny * s, nx * s),
292        m10: Complex64::new(-ny * s, nx * s),
293        m11: Complex64::new(c, -nz * s),
294    }
295}
296
297/// Decompose U into a balanced commutator V * W * V† * W† ≈ U.
298///
299/// This works when U is close to identity (small rotation angle).
300/// Based on the Dawson-Nielsen algorithm Section IV.A.
301///
302/// Given U = exp(iθ n̂·σ/2), we find φ such that sin²(2φ) = sin(θ/2),
303/// i.e. 2φ = arcsin(√sin(θ/2)), from the leading-order BCH relation
304///   VWV†W† ≈ exp(i·4φ²·n̂·σ)  with 4φ² ≈ θ for small θ.
305///
306/// Then V = rotation about e₂ by 2φ, W = rotation about e₁ by 2φ,
307/// where {e₁, e₂, n̂} form an orthonormal triad with e₂ = n̂ × e₁.
308///
309/// The group commutator VWV†W† has rotation axis +n̂ (not −n̂), because
310/// the Lie-algebra commutator [e₂·σ, e₁·σ] = 2i(e₂×e₁)·σ = 2i(−n̂)·σ
311/// and the group-commutator first-order term picks up an additional minus,
312/// giving net axis +n̂ matching delta.
313///
314/// # Convergence note
315///
316/// The perpendicular-axis BCH commutator matches the rotation **angle** of
317/// delta to leading order in θ but has O(φ) residual **axis** drift at finite
318/// rotation angles.  In practice, the fallback guard in `sk_recurse` means
319/// recursion depth > 1 rarely gives strict improvement over depth 1: the
320/// commutator construction removes one layer of error but the axis drift
321/// floors the residual near the depth-1 result.  Depth 1 provides the
322/// practical O(ε^{3/2}) improvement; deeper depths are bounded above
323/// (never worse due to the fallback) but not guaranteed to be strictly better.
324///
325/// Returns (V, W) such that V*W*V†*W† ≈ U.
326pub fn balanced_commutator_decompose(u: &SU2) -> QuantRS2Result<(SU2, SU2)> {
327    let theta = u.rotation_angle();
328
329    // BCH formula: sin²(2φ) = sin(θ/2), i.e. 2φ = arcsin(√sin(θ/2))
330    // For small θ: 4φ² ≈ θ, matching the rotation angle of the commutator.
331    let sin_half_theta = (theta / 2.0).sin().abs();
332
333    let sin_sq_phi_arg = sin_half_theta.sqrt();
334    // Clamp due to floating point
335    let phi = if sin_sq_phi_arg >= 1.0 {
336        PI / 4.0
337    } else {
338        sin_sq_phi_arg.asin() / 2.0
339    };
340
341    let n_hat = u.rotation_axis();
342    let (nx, ny, nz) = (n_hat[0], n_hat[1], n_hat[2]);
343
344    // Find e1 perpendicular to n̂
345    // If n̂ is not parallel to ẑ, use e1 = normalize(n̂ × ẑ)
346    // Otherwise use e1 = x̂
347    let (e1x, e1y, e1z) = if (nz.abs() - 1.0).abs() > 1e-6 {
348        // n̂ × ẑ = (ny*1 - nz*0, nz*0 - nx*1, nx*0 - ny*0) = (ny, -nx, 0)
349        let cx = ny;
350        let cy = -nx;
351        let cz = 0.0_f64;
352        let norm = (cx * cx + cy * cy + cz * cz).sqrt();
353        if norm < 1e-12 {
354            (1.0_f64, 0.0_f64, 0.0_f64)
355        } else {
356            (cx / norm, cy / norm, cz / norm)
357        }
358    } else {
359        (1.0_f64, 0.0_f64, 0.0_f64)
360    };
361
362    // e2 = n̂ × e1
363    let e2x = ny * e1z - nz * e1y;
364    let e2y = nz * e1x - nx * e1z;
365    let e2z = nx * e1y - ny * e1x;
366
367    // V = rotation about e2 by 2*phi, W = rotation about e1 by 2*phi.
368    // The group commutator V·W·V†·W† ≈ exp(-2iφ²(e2×e1)·σ).
369    // For our setup: e2 = n̂×e1, so e2×e1 = (n̂×e1)×e1 = -n̂ (by BAC-CAB rule).
370    // Thus commutator ≈ exp(-2iφ²(-n̂)·σ) = exp(2iφ²n̂·σ) = rotation about n̂ by 4φ².
371    // With sin²(2φ) = sin(θ/2), for small θ: 4φ² ≈ θ, giving the approximation.
372    let v = rotation_about_axis(e2x, e2y, e2z, 2.0 * phi);
373    // W = rotation about e1 by 2*phi
374    let w = rotation_about_axis(e1x, e1y, e1z, 2.0 * phi);
375
376    Ok((v, w))
377}
378
379/// Look up table of gate sequences for the universal gate set {H, T, Tdg, S, Sdg}.
380///
381/// **Two-stage Clifford+T construction.**  The single-qubit Clifford group
382/// modulo global phase has only 24 elements, so a naïve BFS over the 5-atom
383/// alphabet quickly collapses every branch onto that 24-element coset and
384/// stalls.  To get richer SU(2) coverage we decouple the order-24 Clifford
385/// subgroup from the T-count growth:
386///
387/// 1. **Stage 1 — Clifford enumeration.**  BFS over {H, S, Sdg} until
388///    saturation produces exactly 24 distinct unitaries (up to global phase).
389///    Each carries a sequence of length ≤ 6 — the Clifford diameter for this
390///    generator set.
391///
392/// 2. **Stage 2 — T-count strata.**  For each `k = 1 … max_t_count`, build
393///    `S_k` from `S_{k-1}` by appending `T` or `Tdg`, then a Clifford from
394///    Stage 1.  Every entry in `S_k` therefore has T-count exactly `k`,
395///    sandwiched between Cliffords.  Global de-duplication across all strata
396///    via the spatial-hash grid keeps the table compact.
397///
398/// The resulting table covers the {H,T} normal form up to T-count
399/// `max_t_count`, with Cliffords absorbed at no extra T-count.  Each
400/// additional stratum genuinely expands the SU(2) reach (the Clifford+T
401/// group is dense) rather than re-deriving cosets the previous depth
402/// already covered.
403pub struct BasicApproximationTable {
404    /// (gate_sequence, SU2_matrix) pairs. Pub for test introspection.
405    pub entries: Vec<(Vec<&'static str>, SU2)>,
406}
407
408/// Convert an SU2 matrix to a canonical SU(2) representative for hashing.
409///
410/// A U(2) matrix M = e^{iφ} R where R ∈ SU(2). We factor out the global
411/// phase by computing φ = arg(det(M)) / 2 and then R = e^{-iφ} M.
412/// The canonical SU(2) element R has det(R) = 1 exactly.
413///
414/// To break the remaining ±I ambiguity (since R and -R have the same distance
415/// from identity), we ensure that the component with the largest magnitude is
416/// positive. This gives a unique canonical quaternion (q0, q1, q2, q3) for
417/// each physical gate up to global phase.
418///
419/// Returns (q0, q1, q2, q3) where:
420///   q0 = Re(R.m00), q1 = Im(R.m00), q2 = Re(R.m10), q3 = Im(R.m10)
421fn su2_to_canonical_quat(m: &SU2) -> (f64, f64, f64, f64) {
422    // Factor out global phase: det = m00*m11 - m01*m10, det = e^{2iφ}
423    let det = m.m00 * m.m11 - m.m01 * m.m10;
424    let phase_angle = det.arg() / 2.0; // φ = arg(det)/2
425    let phase_inv = Complex64::new((-phase_angle).cos(), (-phase_angle).sin()); // e^{-iφ}
426                                                                                // R = e^{-iφ} * M  → det(R) = e^{-2iφ} * e^{2iφ} = 1
427    let r00 = m.m00 * phase_inv;
428    let r10 = m.m10 * phase_inv;
429
430    let (q0, q1, q2, q3) = (r00.re, r00.im, r10.re, r10.im);
431
432    // SU(2) double cover: R and -R represent the same physical gate.
433    // Canonicalize by ensuring the largest-magnitude component is positive.
434    let max_abs = q0.abs().max(q1.abs()).max(q2.abs()).max(q3.abs());
435    let sign = if (q0.abs() - max_abs).abs() < 1e-10 {
436        if q0 >= 0.0 {
437            1.0_f64
438        } else {
439            -1.0_f64
440        }
441    } else if (q1.abs() - max_abs).abs() < 1e-10 {
442        if q1 >= 0.0 {
443            1.0_f64
444        } else {
445            -1.0_f64
446        }
447    } else if (q2.abs() - max_abs).abs() < 1e-10 {
448        if q2 >= 0.0 {
449            1.0_f64
450        } else {
451            -1.0_f64
452        }
453    } else {
454        if q3 >= 0.0 {
455            1.0_f64
456        } else {
457            -1.0_f64
458        }
459    };
460
461    (sign * q0, sign * q1, sign * q2, sign * q3)
462}
463
464/// Grid cell key for the spatial hash (deduplication grid).
465fn quat_to_cell(q: (f64, f64, f64, f64), cell_size: f64) -> (i64, i64, i64, i64) {
466    let cell = |x: f64| (x / cell_size).floor() as i64;
467    (cell(q.0), cell(q.1), cell(q.2), cell(q.3))
468}
469
470/// Dedup tolerance: two SU(2) matrices are considered identical if their
471/// up-to-global-phase Frobenius distance is below this threshold.
472///
473/// Direct construction (e.g. `SU2::rotation_z(π/2)` for S) versus accumulated
474/// multiplication of, say, two T-gates can differ by ~1.5e-8 due to argument
475/// reduction differences inside the trigonometric primitives. `1e-6` absorbs
476/// that margin while remaining 4 orders of magnitude smaller than the typical
477/// inter-sequence spacing on SU(2). A value below ~1e-7 causes the BFS to
478/// bloat with near-duplicates (e.g. T·T failing to collapse onto S).
479const DEDUP_TOL: f64 = 1e-6;
480
481/// Grid cell size for the spatial hash.  Must be larger than `DEDUP_TOL` (in
482/// quaternion coordinates) so near-duplicates land in adjacent cells and are
483/// caught by the 16-cell neighbour sweep performed by `DedupTable::contains`.
484const CELL_SIZE: f64 = 0.002;
485
486/// Hash-grid–backed deduplication table for SU(2) elements.
487///
488/// Wraps a `Vec<(seq, matrix)>` plus a 4-D quaternion-grid hash so that
489/// `is_new` runs in O(neighbours) rather than O(N).  Used by both Stage 1
490/// (Clifford enumeration) and Stage 2 (T-count strata) so dedup is global.
491struct DedupTable {
492    entries: Vec<(Vec<&'static str>, SU2)>,
493    grid: HashMap<(i64, i64, i64, i64), Vec<usize>>,
494}
495
496impl DedupTable {
497    fn new() -> Self {
498        Self {
499            entries: Vec::new(),
500            grid: HashMap::new(),
501        }
502    }
503
504    /// Returns `true` if `mat` is already in the table (within `DEDUP_TOL`).
505    fn contains(&self, mat: &SU2) -> bool {
506        let q = su2_to_canonical_quat(mat);
507        let (cx, cy, cz, cw) = quat_to_cell(q, CELL_SIZE);
508        // Check the 3^4 = 81 neighbouring cells (adjacent in each of 4 dims).
509        for dx in -1_i64..=1 {
510            for dy in -1_i64..=1 {
511                for dz in -1_i64..=1 {
512                    for dw in -1_i64..=1 {
513                        let cell = (cx + dx, cy + dy, cz + dz, cw + dw);
514                        if let Some(indices) = self.grid.get(&cell) {
515                            for &idx in indices {
516                                let (_, ref existing_mat) = self.entries[idx];
517                                if existing_mat.frobenius_distance(mat) < DEDUP_TOL {
518                                    return true;
519                                }
520                            }
521                        }
522                    }
523                }
524            }
525        }
526        false
527    }
528
529    /// Insert (`seq`, `mat`) and register it in the spatial hash. Caller must
530    /// have already established that `mat` is not a duplicate.
531    fn insert(&mut self, seq: Vec<&'static str>, mat: SU2) {
532        let idx = self.entries.len();
533        let q = su2_to_canonical_quat(&mat);
534        let cell = quat_to_cell(q, CELL_SIZE);
535        self.grid.entry(cell).or_default().push(idx);
536        self.entries.push((seq, mat));
537    }
538}
539
540/// Enumerate the order-24 single-qubit Clifford group as `(matrix, sequence)`
541/// pairs over the generator set {H, S, Sdg}.
542///
543/// The Clifford group modulo global phase is finite with order 24, generated
544/// by H and S. BFS until saturation (queue empties) reaches every element in
545/// at most 6 generator-applications.
546///
547/// Returns exactly 24 entries; the caller may assert this invariant.
548fn enumerate_cliffords() -> Vec<(Vec<&'static str>, SU2)> {
549    let h_gate = SU2::from_gate_name("H").unwrap_or_else(SU2::identity);
550    let s_gate = SU2::from_gate_name("S").unwrap_or_else(SU2::identity);
551    let sdg_gate = SU2::from_gate_name("Sdg").unwrap_or_else(SU2::identity);
552    let generators: &[(&'static str, &SU2)] = &[("H", &h_gate), ("S", &s_gate), ("Sdg", &sdg_gate)];
553
554    let mut table = DedupTable::new();
555    table.insert(Vec::new(), SU2::identity());
556
557    let mut queue: std::collections::VecDeque<usize> = std::collections::VecDeque::new();
558    queue.push_back(0);
559
560    while let Some(idx) = queue.pop_front() {
561        let (seq, mat) = {
562            let (s, m) = &table.entries[idx];
563            (s.clone(), *m)
564        };
565        for (gen_name, gen_mat) in generators {
566            let candidate_mat = mat.mul(gen_mat);
567            if !table.contains(&candidate_mat) {
568                let mut candidate_seq = Vec::with_capacity(seq.len() + 1);
569                candidate_seq.extend_from_slice(&seq);
570                candidate_seq.push(gen_name);
571                let new_idx = table.entries.len();
572                table.insert(candidate_seq, candidate_mat);
573                queue.push_back(new_idx);
574            }
575        }
576    }
577
578    table.entries
579}
580
581impl BasicApproximationTable {
582    /// Build the basic approximation table via the two-stage Clifford+T BFS.
583    ///
584    /// Stage 1 enumerates the 24-element single-qubit Clifford group from
585    /// {H, S, Sdg}.  Stage 2 builds T-count strata `S_1, …, S_K` where each
586    /// `S_k` extends `S_{k-1}` by `T` or `Tdg` followed by an arbitrary
587    /// Clifford. Global dedup keeps the table compact while every stratum
588    /// genuinely extends SU(2) coverage (the Clifford+T group is dense).
589    ///
590    /// `table_depth` is **re-interpreted as the maximum T-count** for the
591    /// new algorithm (it was the BFS depth previously). Internally capped at
592    /// `MAX_T_COUNT_LIMIT` to avoid combinatorial explosion at large values
593    /// — a `table_depth` of 10 collapses to the same internal cap.
594    pub fn build(table_depth: usize) -> Self {
595        // Internal cap on the maximum T-count.  Stratum size grows roughly
596        // 2× per level once the Clifford-T mod-phase quotient saturates.
597        // We pick the smallest level that gives strict RY(0.3) coverage
598        // (i.e. some entry beats the identity bound on small Y rotations).
599        const MAX_T_COUNT_LIMIT: usize = 10;
600        let max_t_count = std::cmp::min(table_depth, MAX_T_COUNT_LIMIT);
601
602        let t_gate = SU2::from_gate_name("T").unwrap_or_else(SU2::identity);
603        let tdg_gate = SU2::from_gate_name("Tdg").unwrap_or_else(SU2::identity);
604        let t_atoms: &[(&'static str, &SU2)] = &[("T", &t_gate), ("Tdg", &tdg_gate)];
605
606        // ---- Stage 1: enumerate the 24-element Clifford group ----
607        let cliffords = enumerate_cliffords();
608        debug_assert_eq!(
609            cliffords.len(),
610            24,
611            "single-qubit Clifford group should have 24 elements modulo global phase"
612        );
613
614        // ---- Stage 2: T-count strata, all hashed into a global dedup table ----
615        let mut table = DedupTable::new();
616        for (seq, mat) in &cliffords {
617            table.insert(seq.clone(), *mat);
618        }
619        // `prev_stratum` carries indices into `table.entries` for the previous
620        // stratum so we extend only those (without re-extending earlier strata).
621        let mut prev_stratum: Vec<usize> = (0..cliffords.len()).collect();
622
623        for _k in 1..=max_t_count {
624            let mut next_stratum: Vec<usize> = Vec::new();
625            for &prev_idx in &prev_stratum {
626                // Snapshot the previous-stratum entry so subsequent inserts
627                // (which may grow `table.entries`) don't invalidate the borrow.
628                let (prev_seq, prev_mat) = {
629                    let (s, m) = &table.entries[prev_idx];
630                    (s.clone(), *m)
631                };
632                for (t_name, t_mat) in t_atoms {
633                    // Hoist e_mat * t_atom out of the inner Clifford loop.
634                    let after_t = prev_mat.mul(t_mat);
635                    for (c_seq, c_mat) in &cliffords {
636                        let candidate_mat = after_t.mul(c_mat);
637                        if !table.contains(&candidate_mat) {
638                            // Build the sequence only once we know it's new
639                            // — saves up to 48× allocation per kept entry.
640                            let mut candidate_seq =
641                                Vec::with_capacity(prev_seq.len() + 1 + c_seq.len());
642                            candidate_seq.extend_from_slice(&prev_seq);
643                            candidate_seq.push(t_name);
644                            candidate_seq.extend_from_slice(c_seq);
645                            let new_idx = table.entries.len();
646                            table.insert(candidate_seq, candidate_mat);
647                            next_stratum.push(new_idx);
648                        }
649                    }
650                }
651            }
652            if next_stratum.is_empty() {
653                break;
654            }
655            prev_stratum = next_stratum;
656        }
657
658        Self {
659            entries: table.entries,
660        }
661    }
662
663    /// Find the sequence in the table closest to `target` (up-to-global-phase distance).
664    ///
665    /// Returns the slice of gate names and the distance achieved.
666    /// O(N) linear scan over the entire table.
667    pub fn find_closest<'a>(&'a self, target: &SU2) -> (&'a [&'static str], f64) {
668        let mut best_dist = f64::INFINITY;
669        let mut best_seq: &[&'static str] = &[];
670
671        for (seq, mat) in &self.entries {
672            let dist = mat.frobenius_distance(target);
673            if dist < best_dist {
674                best_dist = dist;
675                best_seq = seq.as_slice();
676            }
677        }
678
679        (best_seq, best_dist)
680    }
681
682    /// Number of entries in the table.
683    pub fn len(&self) -> usize {
684        self.entries.len()
685    }
686}
687
688/// Convert a named adjoint gate.
689fn adjoint_gate(gate: &'static str) -> &'static str {
690    match gate {
691        "T" => "Tdg",
692        "Tdg" => "T",
693        "S" => "Sdg",
694        "Sdg" => "S",
695        "H" => "H",
696        "X" => "X",
697        "Y" => "Y",
698        "Z" => "Z",
699        other => other,
700    }
701}
702
703/// Compute the SU2 matrix product of a gate sequence.
704///
705/// Sequences are applied left-to-right: the first gate in the slice is applied first.
706pub fn sequence_to_matrix(seq: &[&'static str]) -> SU2 {
707    seq.iter().fold(SU2::identity(), |acc, g| {
708        let gate = SU2::from_gate_name(g).unwrap_or(SU2::identity());
709        acc.mul(&gate)
710    })
711}
712
713/// Solovay-Kitaev decomposer.
714///
715/// Approximates any single-qubit SU(2) gate to precision ε using sequences
716/// from {H, T, Tdg, S, Sdg}.
717///
718/// # Convergence behaviour
719///
720/// The internal `balanced_commutator_decompose` uses a BCH-derived formula
721/// that matches the rotation *angle* of the residual delta to leading order in
722/// the rotation angle θ, but has an O(φ) axis-drift at finite θ.  As a result:
723///
724/// - **Depth 0**: pure table lookup — precision is limited by table density.
725/// - **Depth 1**: provides meaningful O(ε^{3/2}) improvement over depth 0 for
726///   targets where the table is not already near-optimal.
727/// - **Depth ≥ 2**: the fallback guard (which never returns a worse result)
728///   means deeper recursion is bounded above by the depth-1 result but is
729///   rarely strictly better.  The axis drift of the commutator construction
730///   constitutes a floor that prevents further BCH-based reduction.
731///
732/// In practice, use `recursion_depth = 1` for the best trade-off between
733/// sequence length and approximation quality.  Depths 2–4 are safe (they never
734/// regress) but provide negligible additional improvement with this gate set.
735///
736/// # Example
737///
738/// ```rust
739/// use quantrs2_circuit::solovay_kitaev::{SU2, SOKDecomposer, sequence_to_matrix};
740///
741/// let target = SU2::rotation_z(0.5);
742/// let decomposer = SOKDecomposer::new(10, 1);
743/// let seq = decomposer.decompose(&target).expect("decomposition failed");
744/// let approx = sequence_to_matrix(&seq);
745/// let dist = approx.frobenius_distance(&target);
746/// assert!(dist < 0.15, "SK approximation distance: {dist}");
747/// ```
748pub struct SOKDecomposer {
749    table: BasicApproximationTable,
750    /// Number of SK recursion levels (0 = table lookup only; 1 = best practical depth).
751    ///
752    /// Depth 1 provides meaningful improvement over depth 0.  Depths ≥ 2 are
753    /// bounded above by the depth-1 result and rarely strictly better due to the
754    /// axis-drift floor in the BCH commutator construction (see struct doc).
755    pub recursion_depth: usize,
756}
757
758impl SOKDecomposer {
759    /// Build the decomposer with a precomputed basic approximation table.
760    ///
761    /// `table_depth`: maximum T-count for the basic table (re-interpreted from
762    ///   the previous BFS-depth meaning).  See [`BasicApproximationTable::build`]
763    ///   for the two-stage Clifford+T construction.  The table internally caps
764    ///   the value to keep build time bounded; passing `10` is fine — it will
765    ///   collapse to the internal cap.  Smaller values (0–3) give sparser
766    ///   coverage and may not be dense enough for the BCH commutator
767    ///   construction to improve the result at recursion depth 1.
768    ///
769    /// `recursion_depth`: number of SK recursion levels (0 = table only).
770    ///   Use 1 for the best improvement/gate-count trade-off.  Depth 1 gives
771    ///   O(ε^{3/2}) improvement at a cost of ~4× more gates.  Depths ≥ 2 are
772    ///   safe (never worse due to the fallback guard) but rarely strictly better
773    ///   with this commutator construction — see struct-level doc for details.
774    pub fn new(table_depth: usize, recursion_depth: usize) -> Self {
775        Self {
776            table: BasicApproximationTable::build(table_depth),
777            recursion_depth,
778        }
779    }
780
781    /// Decompose `u` into a sequence of gate names from {H, T, Tdg, S, Sdg}.
782    pub fn decompose(&self, u: &SU2) -> QuantRS2Result<Vec<&'static str>> {
783        self.sk_recurse(u, self.recursion_depth)
784    }
785
786    /// Internal recursive Solovay-Kitaev routine.
787    fn sk_recurse(&self, u: &SU2, n: usize) -> QuantRS2Result<Vec<&'static str>> {
788        if n == 0 {
789            let (seq, _dist) = self.table.find_closest(u);
790            return Ok(seq.to_vec());
791        }
792
793        // Obtain U_{n-1}: best approximation using n-1 recursion levels
794        let u_prev_seq = self.sk_recurse(u, n - 1)?;
795        let u_prev = sequence_to_matrix(&u_prev_seq);
796        // Use up-to-phase distance for quality comparison (the reported metric)
797        let prev_dist = u_prev.frobenius_distance(u);
798
799        // Compute delta = U * u_prev† in a phase-aligned way.
800        //
801        // Since u_prev is found using the up-to-phase metric, the matrix u_prev
802        // may differ from U by a global phase. We align phases before computing
803        // the residual so that delta has Re[Tr] maximized (closest to identity
804        // in the Dawson-Nielsen metric), enabling accurate commutator construction.
805        //
806        // Optimal alignment: multiply u_prev by e^{iφ} where φ = -arg(Tr(U * u_prev†))/2
807        // This makes Tr(delta) real and positive.
808        let raw_delta = u.mul(&u_prev.adjoint());
809        let tr_delta = raw_delta.trace();
810        let phase_angle = -tr_delta.arg() / 2.0; // Negative half the trace argument
811        let phase = Complex64::new(phase_angle.cos(), phase_angle.sin());
812        // Apply phase to u_prev: u_prev_phased = e^{iφ} * u_prev
813        let u_prev_phased = SU2 {
814            m00: u_prev.m00 * phase,
815            m01: u_prev.m01 * phase,
816            m10: u_prev.m10 * phase,
817            m11: u_prev.m11 * phase,
818        };
819        // delta_phased = U * u_prev_phased† = U * e^{-iφ} * u_prev†
820        // This has Re[Tr(delta_phased)] maximized = |Tr(raw_delta)|
821        let delta = u.mul(&u_prev_phased.adjoint());
822
823        // Decompose delta as a balanced commutator: delta ≈ V * W * V† * W†
824        let (v, w) = balanced_commutator_decompose(&delta)?;
825
826        // Recursively approximate V and W to depth n-1
827        let v_seq = self.sk_recurse(&v, n - 1)?;
828        let w_seq = self.sk_recurse(&w, n - 1)?;
829
830        // V† sequence: reverse and conjugate each gate
831        let v_adj_seq: Vec<&'static str> = v_seq.iter().rev().map(|g| adjoint_gate(g)).collect();
832        let w_adj_seq: Vec<&'static str> = w_seq.iter().rev().map(|g| adjoint_gate(g)).collect();
833
834        // Final sequence: V_n W_n V_n† W_n† U_{n-1}
835        let mut result = v_seq;
836        result.extend_from_slice(&w_seq);
837        result.extend(v_adj_seq);
838        result.extend(w_adj_seq);
839        result.extend_from_slice(&u_prev_seq);
840
841        // Guard: if the new result is worse than the previous approximation,
842        // fall back to u_prev_seq. The commutator decomposition may introduce
843        // more error than it corrects when the basic table is not dense enough.
844        let new_mat = sequence_to_matrix(&result);
845        let new_dist = new_mat.frobenius_distance(u);
846        if new_dist >= prev_dist {
847            return Ok(u_prev_seq);
848        }
849
850        Ok(result)
851    }
852}
853
854#[cfg(test)]
855mod tests {
856    use super::*;
857
858    /// Tolerance for floating-point comparisons in basic unitary tests.
859    /// Using 1e-6 to accommodate accumulated floating-point rounding.
860    const EPS: f64 = 1e-6;
861
862    #[test]
863    fn test_su2_identity_distance() {
864        let i = SU2::identity();
865        assert!(
866            i.distance_from_identity() < EPS,
867            "identity distance should be ~0, got {}",
868            i.distance_from_identity()
869        );
870    }
871
872    #[test]
873    fn test_su2_multiplication_hh_is_identity() {
874        let h = SU2::from_gate_name("H").expect("H gate must exist");
875        let hh = h.mul(&h);
876        assert!(
877            hh.frobenius_distance(&SU2::identity()) < EPS,
878            "H*H should equal Identity (up to global phase), got {}",
879            hh.frobenius_distance(&SU2::identity())
880        );
881    }
882
883    #[test]
884    fn test_su2_tt_is_s() {
885        let t = SU2::from_gate_name("T").expect("T gate must exist");
886        let s = SU2::from_gate_name("S").expect("S gate must exist");
887        let tt = t.mul(&t);
888        assert!(
889            tt.frobenius_distance(&s) < EPS,
890            "T*T should equal S, got {}",
891            tt.frobenius_distance(&s)
892        );
893    }
894
895    #[test]
896    fn test_adjoint_t_tdg() {
897        let t = SU2::from_gate_name("T").expect("T gate must exist");
898        let tdg = SU2::from_gate_name("Tdg").expect("Tdg gate must exist");
899        assert!(
900            t.adjoint().frobenius_distance(&tdg) < EPS,
901            "T† should equal Tdg, dist = {}",
902            t.adjoint().frobenius_distance(&tdg)
903        );
904    }
905
906    #[test]
907    fn test_rotation_z_composition() {
908        let rz1 = SU2::rotation_z(0.3);
909        let rz2 = SU2::rotation_z(0.7);
910        let combined = rz1.mul(&rz2);
911        let expected = SU2::rotation_z(1.0);
912        assert!(
913            combined.frobenius_distance(&expected) < EPS,
914            "RZ(0.3)*RZ(0.7) should equal RZ(1.0)"
915        );
916    }
917
918    #[test]
919    fn test_basic_approximation_table_size() {
920        let table = BasicApproximationTable::build(5);
921        assert!(
922            table.entries.len() > 10,
923            "Table should have many entries, got {}",
924            table.entries.len()
925        );
926    }
927
928    #[test]
929    fn test_basic_approximation_table_finds_identity() {
930        let table = BasicApproximationTable::build(5);
931        let identity = SU2::identity();
932        let (_seq, dist) = table.find_closest(&identity);
933        assert!(
934            dist < 0.1,
935            "Should find approximation close to identity, dist = {}",
936            dist
937        );
938    }
939
940    #[test]
941    fn test_balanced_commutator_small_rotation() {
942        let small_u = SU2::rotation_z(0.1);
943        let (v, w) =
944            balanced_commutator_decompose(&small_u).expect("commutator decompose should succeed");
945        let commutator = v.mul(&w).mul(&v.adjoint()).mul(&w.adjoint());
946        let dist = commutator.frobenius_distance(&small_u);
947        assert!(
948            dist < 0.2,
949            "Balanced commutator should approximate small rotation U, got dist={}",
950            dist
951        );
952    }
953
954    #[test]
955    fn test_balanced_commutator_identity() {
956        // Identity has zero rotation → commutator of identity matrices = identity
957        let u = SU2::identity();
958        let (v, w) =
959            balanced_commutator_decompose(&u).expect("commutator decompose should succeed");
960        let commutator = v.mul(&w).mul(&v.adjoint()).mul(&w.adjoint());
961        let dist = commutator.frobenius_distance(&u);
962        assert!(
963            dist < 0.1,
964            "Commutator decompose of identity should be identity, got dist={}",
965            dist
966        );
967    }
968
969    #[test]
970    fn test_solovay_kitaev_rz_depth0() {
971        // Depth=0 is pure table lookup — should find something within 0.5 of target.
972        let target = SU2::rotation_z(0.5);
973        let decomposer = SOKDecomposer::new(10, 0);
974        let seq = decomposer.decompose(&target).expect("decompose failed");
975        let approx = sequence_to_matrix(&seq);
976        let dist = approx.frobenius_distance(&target);
977        // Table lookup: with depth=10, expect dist < 0.2 for most targets
978        assert!(
979            dist < 0.5,
980            "Depth-0 should be within 0.5 of target, got {dist}"
981        );
982    }
983
984    #[test]
985    fn test_solovay_kitaev_rz_depth1() {
986        // Approximate RZ(0.5) using SK at depth=1 with the two-stage Clifford+T table
987        // (table_depth=10 → max_t_count=10, ~73k entries).
988        // SK at depth 1 gives O(ε^{3/2}) improvement over depth=0.
989        // Empirically: depth-0 dist ≈ 0.044, depth-1 dist ≈ 0.037.
990        let target = SU2::rotation_z(0.5);
991        let decomposer = SOKDecomposer::new(10, 1);
992        let seq = decomposer.decompose(&target).expect("decompose failed");
993
994        let approx = sequence_to_matrix(&seq);
995        let dist = approx.frobenius_distance(&target);
996        let depth0_dist = {
997            let d0 = SOKDecomposer::new(10, 0);
998            let s0 = d0.decompose(&target).expect("depth0 failed");
999            sequence_to_matrix(&s0).frobenius_distance(&target)
1000        };
1001        // SK depth=1 must strictly improve over depth=0
1002        assert!(
1003            dist < depth0_dist,
1004            "SK depth=1 ({dist:.6}) should improve over depth=0 ({depth0_dist:.6})"
1005        );
1006        // Absolute bound: with the dense Clifford+T table, depth-1 reaches ~0.037
1007        assert!(
1008            dist < 0.05,
1009            "SK depth=1 should achieve dist < 0.05, got {dist:.6}"
1010        );
1011        assert!(!seq.is_empty(), "Sequence should not be empty");
1012    }
1013
1014    #[test]
1015    fn test_solovay_kitaev_rz_depth2() {
1016        // At depth 2, SK falls back to the depth-1 result due to the axis-drift
1017        // floor in the BCH commutator construction (see SOKDecomposer doc).
1018        // The fallback guard guarantees depth-2 is never worse than depth-1.
1019        let target = SU2::rotation_z(0.5);
1020        let decomposer2 = SOKDecomposer::new(10, 2);
1021        let seq2 = decomposer2.decompose(&target).expect("decompose failed");
1022        let dist2 = sequence_to_matrix(&seq2).frobenius_distance(&target);
1023
1024        let decomposer1 = SOKDecomposer::new(10, 1);
1025        let seq1 = decomposer1.decompose(&target).expect("depth1 failed");
1026        let dist1 = sequence_to_matrix(&seq1).frobenius_distance(&target);
1027
1028        // Depth=2 must be at least as good as depth=1 (fallback guard ensures this).
1029        // Strict improvement is not guaranteed with BCH commutator at this gate set density.
1030        assert!(
1031            dist2 <= dist1 + 1e-9,
1032            "SK depth=2 ({dist2:.6}) should not be worse than depth=1 ({dist1:.6})"
1033        );
1034    }
1035
1036    #[test]
1037    fn test_solovay_kitaev_convergence_debug() {
1038        // Check convergence at each depth to understand algorithm behavior.
1039        // Uses table_depth=10 (~73k entries via the two-stage Clifford+T BFS) for
1040        // dense coverage.
1041        let target = SU2::rotation_z(0.5);
1042        let table = BasicApproximationTable::build(10);
1043
1044        eprintln!("Table size: {}", table.entries.len());
1045
1046        // Depth 0: table lookup — should find a good starting approximation
1047        let (seq0, dist0) = table.find_closest(&target);
1048        eprintln!("Depth 0: dist={:.6}, seq_len={}", dist0, seq0.len());
1049
1050        // Print the top 5 closest entries in the table to RZ(0.5)
1051        let mut dists: Vec<(f64, usize)> = table
1052            .entries
1053            .iter()
1054            .enumerate()
1055            .map(|(i, (_, mat))| (mat.frobenius_distance(&target), i))
1056            .collect();
1057        dists.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
1058        eprintln!("Top-5 closest table entries to RZ(0.5):");
1059        for (dist, idx) in dists.iter().take(5) {
1060            eprintln!("  dist={:.6}, seq={:?}", dist, table.entries[*idx].0);
1061        }
1062
1063        // Debug the commutator construction with phase-alignment (as in sk_recurse)
1064        let u_prev = sequence_to_matrix(seq0);
1065        let raw_delta = target.mul(&u_prev.adjoint());
1066        let tr_delta = raw_delta.trace();
1067        let phase_angle = -tr_delta.arg() / 2.0;
1068        let phase = Complex64::new(phase_angle.cos(), phase_angle.sin());
1069        let u_prev_phased = SU2 {
1070            m00: u_prev.m00 * phase,
1071            m01: u_prev.m01 * phase,
1072            m10: u_prev.m10 * phase,
1073            m11: u_prev.m11 * phase,
1074        };
1075        let delta = target.mul(&u_prev_phased.adjoint());
1076        let delta_dist = delta.distance_from_identity();
1077        eprintln!("Phase-aligned delta dist from identity: {:.6}", delta_dist);
1078        eprintln!(
1079            "Phase-aligned delta rotation angle: {:.6}",
1080            delta.rotation_angle()
1081        );
1082
1083        let (v, w) = balanced_commutator_decompose(&delta).expect("commutator failed");
1084        let (v_seq, v_dist) = table.find_closest(&v);
1085        let (w_seq, w_dist) = table.find_closest(&w);
1086        eprintln!("V approx dist: {:.6}, W approx dist: {:.6}", v_dist, w_dist);
1087
1088        // Compute the commutator using exact v and w
1089        let exact_comm = v.mul(&w).mul(&v.adjoint()).mul(&w.adjoint());
1090        let exact_comm_dist = exact_comm.frobenius_distance(&delta);
1091        eprintln!("Exact commutator dist from delta: {:.6}", exact_comm_dist);
1092
1093        // Compute the commutator using approximated v and w
1094        let v_mat = sequence_to_matrix(v_seq);
1095        let w_mat = sequence_to_matrix(w_seq);
1096        let approx_comm = v_mat
1097            .mul(&w_mat)
1098            .mul(&v_mat.adjoint())
1099            .mul(&w_mat.adjoint());
1100        let approx_comm_to_delta = approx_comm.frobenius_distance(&delta);
1101        let approx_comm_to_i = approx_comm.distance_from_identity();
1102        eprintln!(
1103            "Approx commutator dist from delta: {:.6}",
1104            approx_comm_to_delta
1105        );
1106        eprintln!("Approx commutator dist from I: {:.6}", approx_comm_to_i);
1107
1108        // Full depth=1 result: V_approx * W_approx * V_approx† * W_approx† * u_prev
1109        let v_adj_seq: Vec<&'static str> = v_seq.iter().rev().map(|g| adjoint_gate(g)).collect();
1110        let w_adj_seq: Vec<&'static str> = w_seq.iter().rev().map(|g| adjoint_gate(g)).collect();
1111        let mut full_seq = v_seq.to_vec();
1112        full_seq.extend_from_slice(w_seq);
1113        full_seq.extend(v_adj_seq);
1114        full_seq.extend(w_adj_seq);
1115        full_seq.extend_from_slice(seq0);
1116        let full_mat = sequence_to_matrix(&full_seq);
1117        let full_dist = full_mat.frobenius_distance(&target);
1118        eprintln!("Full depth=1 result dist from target: {:.6}", full_dist);
1119
1120        // Using the full decomposer at depth=1
1121        let decomposer = SOKDecomposer::new(10, 1);
1122        let sk_seq = decomposer.decompose(&target).expect("SK decompose failed");
1123        let sk_mat = sequence_to_matrix(&sk_seq);
1124        let sk_dist = sk_mat.frobenius_distance(&target);
1125        eprintln!("SK depth=1 dist: {:.6}, seq_len={}", sk_dist, sk_seq.len());
1126
1127        assert!(dist0 < 0.5, "depth-0 should find something reasonable");
1128    }
1129
1130    #[test]
1131    fn test_solovay_kitaev_rz_depth3() {
1132        // At depth 3, SK is monotone (fallback guard ensures never-worse).
1133        // The BCH commutator axis-drift floors the result near the depth-1 value;
1134        // strict improvement over depth-1 is not guaranteed with this gate set.
1135        // With table_depth=10 (~73k entries), depth-1 achieves dist ≈ 0.037.
1136        let target = SU2::rotation_z(0.5);
1137        let d1 = SOKDecomposer::new(10, 1)
1138            .decompose(&target)
1139            .expect("depth1 failed");
1140        let dist1 = sequence_to_matrix(&d1).frobenius_distance(&target);
1141
1142        let d3 = SOKDecomposer::new(10, 3)
1143            .decompose(&target)
1144            .expect("depth3 failed");
1145        let dist3 = sequence_to_matrix(&d3).frobenius_distance(&target);
1146
1147        // Depth=3 must be at least as good as depth=1 (SK is monotone by construction).
1148        // Strict improvement is not guaranteed — depth-3 typically equals depth-1.
1149        assert!(
1150            dist3 <= dist1 + 1e-9,
1151            "SK depth=3 ({dist3:.6}) should not be worse than depth=1 ({dist1:.6})"
1152        );
1153        assert!(!d3.is_empty(), "Sequence should not be empty");
1154    }
1155
1156    #[test]
1157    fn test_solovay_kitaev_sequence_gates_valid() {
1158        // All gates in sequence should be from {H, T, Tdg, S, Sdg}
1159        let target = SU2::rotation_z(0.5);
1160        let decomposer = SOKDecomposer::new(10, 2);
1161        let seq = decomposer.decompose(&target).expect("decompose failed");
1162
1163        for gate in &seq {
1164            assert!(
1165                ["H", "T", "Tdg", "S", "Sdg"].contains(gate),
1166                "Gate '{}' not in universal set {{H, T, Tdg, S, Sdg}}",
1167                gate
1168            );
1169        }
1170    }
1171
1172    #[test]
1173    fn test_sequence_to_matrix_empty() {
1174        let m = sequence_to_matrix(&[]);
1175        assert!(
1176            m.frobenius_distance(&SU2::identity()) < EPS,
1177            "Empty sequence should give identity"
1178        );
1179    }
1180
1181    #[test]
1182    fn test_sequence_to_matrix_h() {
1183        let m = sequence_to_matrix(&["H"]);
1184        let h = SU2::from_gate_name("H").expect("H must exist");
1185        assert!(m.frobenius_distance(&h) < EPS);
1186    }
1187
1188    #[test]
1189    fn test_adjoint_gate_roundtrip() {
1190        assert_eq!(adjoint_gate("T"), "Tdg");
1191        assert_eq!(adjoint_gate("Tdg"), "T");
1192        assert_eq!(adjoint_gate("H"), "H");
1193        assert_eq!(adjoint_gate("S"), "Sdg");
1194        assert_eq!(adjoint_gate("Sdg"), "S");
1195    }
1196
1197    #[test]
1198    fn test_rotation_about_axis_z() {
1199        // rotation_about_axis(0, 0, 1, theta) should equal rotation_z(theta)
1200        let theta = 0.7;
1201        let r1 = rotation_about_axis(0.0, 0.0, 1.0, theta);
1202        let r2 = SU2::rotation_z(theta);
1203        assert!(
1204            r1.frobenius_distance(&r2) < EPS,
1205            "rotation_about_axis(z) should match rotation_z"
1206        );
1207    }
1208
1209    #[test]
1210    fn test_rotation_about_axis_x() {
1211        let theta = 1.2;
1212        let r1 = rotation_about_axis(1.0, 0.0, 0.0, theta);
1213        let r2 = SU2::rotation_x(theta);
1214        assert!(
1215            r1.frobenius_distance(&r2) < EPS,
1216            "rotation_about_axis(x) should match rotation_x"
1217        );
1218    }
1219
1220    #[test]
1221    fn test_clifford_enumeration_size() {
1222        // Sanity check: the single-qubit Clifford group modulo global phase
1223        // has exactly 24 elements, generated by {H, S, Sdg}.
1224        let cliffords = enumerate_cliffords();
1225        assert_eq!(
1226            cliffords.len(),
1227            24,
1228            "Clifford group enumeration should yield exactly 24 elements"
1229        );
1230    }
1231
1232    #[test]
1233    fn test_two_stage_table_covers_small_y_rotation() {
1234        // Coverage regression test: with the previous single-stage BFS at depth 10,
1235        // the table contained no element closer to RY(0.3) than the identity
1236        // (depth-0 dist = 0.149, seq_len = 0). The two-stage Clifford+T BFS
1237        // expands SU(2) coverage so RY(0.3) gets a non-trivial approximation.
1238        let table = BasicApproximationTable::build(10);
1239        let target = SU2::rotation_y(0.3);
1240        let (seq, dist) = table.find_closest(&target);
1241        assert!(
1242            !seq.is_empty(),
1243            "Table should cover RY(0.3) with a non-identity sequence (got seq_len=0, dist={dist:.6})"
1244        );
1245        // Distance should be meaningfully better than the identity bound (~0.150).
1246        assert!(
1247            dist < 0.10,
1248            "RY(0.3) approximation should beat identity bound 0.15, got {dist:.6}"
1249        );
1250    }
1251}