dreamwell_intelligence/
density_matrix.rs

1// DensityMatrixN — arbitrary-dimension density matrix for quantum transformers.
2//
3// Hermitian, trace-1, positive semidefinite. Heap-allocated for any dimension.
4// Clean Compute: contiguous Vec<Complex>, pre-allocated scratch buffers,
5// uses dreamwell-math linalg/eigen/matrix_exp for all heavy lifting.
6
7use crate::complex::Complex;
8
9/// Golden ratio — used for temperature scaling.
10const PHI: f32 = 1.618033988;
11
12/// Deterministic pseudo-random from seed (splitmix64-derived, f32 output).
13fn splitmix64_f32(seed: u64) -> f32 {
14    let mut z = seed.wrapping_add(0x9e3779b97f4a7c15);
15    z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
16    z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
17    z = z ^ (z >> 31);
18    (z as f32) / (u64::MAX as f32)
19}
20
21/// Arbitrary-dimension density matrix (NxN, row-major).
22/// Pre-allocated scratch buffers for zero-allocation hot-path operations.
23#[derive(Clone)]
24pub struct DensityMatrixN {
25    pub dim: usize,
26    pub entries: Vec<Complex>,
27    /// Scratch buffers for evolve() and entropy — avoids per-call allocation.
28    pub(crate) scratch_a: Vec<Complex>,
29    #[allow(dead_code)]
30    pub(crate) scratch_b: Vec<Complex>,
31}
32
33impl DensityMatrixN {
34    fn with_scratch(dim: usize, entries: Vec<Complex>) -> Self {
35        let n2 = dim * dim;
36        Self {
37            dim,
38            entries,
39            scratch_a: vec![Complex::ZERO; n2],
40            scratch_b: vec![Complex::ZERO; n2],
41        }
42    }
43
44    /// Create the maximally mixed state I/N.
45    pub fn maximally_mixed(dim: usize) -> Self {
46        let mut entries = vec![Complex::ZERO; dim * dim];
47        let p = 1.0 / dim as f32;
48        for k in 0..dim {
49            entries[k * dim + k] = Complex::new(p, 0.0);
50        }
51        Self::with_scratch(dim, entries)
52    }
53
54    /// Create a pure state |k⟩⟨k|.
55    pub fn pure_state(k: usize, dim: usize) -> Self {
56        let mut entries = vec![Complex::ZERO; dim * dim];
57        entries[k * dim + k] = Complex::ONE;
58        Self::with_scratch(dim, entries)
59    }
60
61    /// Create an equal superposition: |ψ⟩ = (1/√N) Σ|k⟩ → ρ = |ψ⟩⟨ψ|.
62    pub fn equal_superposition(dim: usize) -> Self {
63        let amp = 1.0 / (dim as f32).sqrt();
64        let val = amp * amp;
65        let mut entries = vec![Complex::ZERO; dim * dim];
66        for i in 0..dim {
67            for j in 0..dim {
68                entries[i * dim + j] = Complex::new(val, 0.0);
69            }
70        }
71        Self::with_scratch(dim, entries)
72    }
73
74    /// Diagonal populations (probabilities).
75    pub fn populations(&self) -> Vec<f32> {
76        (0..self.dim).map(|k| self.entries[k * self.dim + k].re).collect()
77    }
78
79    /// Sum of off-diagonal magnitudes (coherence).
80    pub fn coherence_magnitude(&self) -> f32 {
81        let mut sum = 0.0f32;
82        for i in 0..self.dim {
83            for j in (i + 1)..self.dim {
84                sum += self.entries[i * self.dim + j].norm();
85            }
86        }
87        sum
88    }
89
90    /// Tr(ρ²) — purity. 1.0 = pure, 1/N = maximally mixed.
91    pub fn purity(&self) -> f32 {
92        let mut sum = 0.0f32;
93        for e in &self.entries {
94            sum += e.norm_sq();
95        }
96        sum
97    }
98
99    /// Von Neumann entropy via eigenvalue decomposition.
100    /// S = -Σ λ_i ln(λ_i) where λ_i are eigenvalues of ρ.
101    /// Uses Jacobi rotation solver from dreamwell-math.
102    pub fn von_neumann_entropy(&self) -> f32 {
103        let d = self.dim;
104        // Copy entries to scratch for eigenvalue computation (destructive)
105        let mut work = self.entries.clone(); // unavoidable copy for eigenvalue
106        let mut eigenvalues = vec![0.0f32; d];
107        dreamwell_math::eigen::eigenvalues_hermitian(&mut work, &mut eigenvalues, d, 50, 1e-8);
108        dreamwell_math::eigen::von_neumann_entropy(&eigenvalues)
109    }
110
111    /// Helmholtz free energy: F = ⟨H⟩ - T·S.
112    /// H is given as diagonal energies.
113    /// Temperature T = 1/(1 + φ·coherence_magnitude) — φ-scaled thermal partition.
114    /// Higher coherence → lower temperature → free energy dominated by energy (ordered).
115    /// Lower coherence → higher temperature → free energy dominated by entropy (disordered).
116    pub fn free_energy(&self, energies: &[f32]) -> f32 {
117        let pops = self.populations();
118        let expected_h: f32 = pops.iter().zip(energies.iter()).map(|(p, e)| p * e).sum();
119        let temperature = 1.0 / (1.0 + PHI * self.coherence_magnitude());
120        let entropy = self.von_neumann_entropy();
121        expected_h - temperature * entropy
122    }
123
124    /// Apply partial dephasing: ρ → (1-ε)ρ + εΔ(ρ).
125    pub fn dephase(&mut self, epsilon: f32) {
126        let retain = (1.0 - epsilon).max(0.0);
127        for i in 0..self.dim {
128            for j in 0..self.dim {
129                if i != j {
130                    self.entries[i * self.dim + j] = self.entries[i * self.dim + j].scale(retain);
131                }
132            }
133        }
134    }
135
136    /// Apply shared dephasing with another density matrix (Model 4 coupling).
137    pub fn couple_dephase(&mut self, other: &DensityMatrixN, strength: f32) {
138        let other_coh = other.coherence_magnitude();
139        let retain = (1.0 - strength * (1.0 - other_coh.min(1.0))).max(0.0);
140        for i in 0..self.dim {
141            for j in 0..self.dim {
142                if i != j {
143                    self.entries[i * self.dim + j] = self.entries[i * self.dim + j].scale(retain);
144                }
145            }
146        }
147    }
148
149    /// Unitary evolution: ρ → UρU†.
150    /// Adaptive dispatch (Clean Compute: explicit hardware separation):
151    ///   dim < 32:  serial cgemm (L1 cache, compiler vectorizes)
152    ///   dim ≥ 32:  CPU parallel rows via rayon (no contention, scales with cores)
153    ///
154    /// GPU compute (BA-60) is available via GpuTrainingContext::batched_evolve()
155    /// for bulk operations where all matrices can be dispatched in a single
156    /// submission. Per-call GPU dispatch is slower than CPU parallel due to
157    /// Mutex serialization and buffer mapping overhead (~5ms per round-trip
158    /// vs ~0.3ms for CPU cgemm_par at dim=86).
159    pub fn evolve(&mut self, unitary: &[Complex]) {
160        let d = self.dim;
161        // Serial tiled cgemm for all dimensions. At dim=86, the tiled path
162        // processes 86×86 in ~0.3ms per multiply — fast enough that the outer
163        // rayon par_iter on windows provides all needed parallelism.
164        // Using cgemm_par here causes nested rayon contention (outer par_iter
165        // on windows + inner par_chunks on rows = thread pool thrashing).
166        dreamwell_math::linalg::cgemm(unitary, &self.entries, &mut self.scratch_a, d, d, d);
167        dreamwell_math::linalg::cgemm_conj_transpose_b(&self.scratch_a, unitary, &mut self.entries, d, d, d);
168    }
169
170    /// Build unitary U = exp(-iHdt) from a real symmetric Hamiltonian.
171    /// Uses scaling-and-squaring with Padé approximant (numerically stable at any dim).
172    pub fn hamiltonian_unitary(h: &[f32], dim: usize, dt: f32) -> Vec<Complex> {
173        let n2 = dim * dim;
174        let mut u = vec![Complex::ZERO; n2];
175        let mut sa = vec![Complex::ZERO; n2];
176        let mut sb = vec![Complex::ZERO; n2];
177        dreamwell_math::matrix_exp::expm_skew_hermitian(h, dt, &mut u, &mut sa, &mut sb, dim);
178        u
179    }
180
181    /// Born rule measurement: sample a mode index from diagonal populations.
182    pub fn born_sample(&self, seed: u64) -> usize {
183        let r = splitmix64_f32(seed);
184        let mut cumulative = 0.0f32;
185        for k in 0..self.dim {
186            cumulative += self.entries[k * self.dim + k].re.max(0.0);
187            if r < cumulative {
188                return k;
189            }
190        }
191        self.dim - 1
192    }
193
194    /// Trace (should always be ~1.0 for valid density matrix).
195    pub fn trace(&self) -> f32 {
196        (0..self.dim).map(|k| self.entries[k * self.dim + k].re).sum()
197    }
198}
199
200/// Mutex-protected GPU compute context for large-dimension matrix operations.
201/// Created once on first access. The Mutex serializes concurrent rayon threads
202/// so only one thread dispatches to the GPU staging buffer at a time.
203/// Returns None if no GPU adapter is available (graceful fallback to CPU).
204fn gpu_linalg_lock() -> Option<std::sync::MutexGuard<'static, Option<dreamwell_math::gpu_linalg::GpuLinalgContext>>> {
205    use std::sync::{Mutex, OnceLock};
206    static GPU_CTX: OnceLock<Mutex<Option<dreamwell_math::gpu_linalg::GpuLinalgContext>>> = OnceLock::new();
207    let mtx = GPU_CTX.get_or_init(|| {
208        // Max dim 256 — covers all practical transformer dimensions.
209        // Pre-allocates 256² × 8B × 5 buffers = ~2.6 MB GPU memory.
210        Mutex::new(dreamwell_math::gpu_linalg::GpuLinalgContext::new(256))
211    });
212    mtx.lock().ok()
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    #[test]
220    fn pure_state_valid() {
221        let rho = DensityMatrixN::pure_state(0, 5);
222        assert!((rho.trace() - 1.0).abs() < 1e-6);
223        assert!((rho.purity() - 1.0).abs() < 1e-6);
224        assert_eq!(rho.populations()[0], 1.0);
225    }
226
227    #[test]
228    fn equal_superposition_valid() {
229        let rho = DensityMatrixN::equal_superposition(5);
230        assert!((rho.trace() - 1.0).abs() < 1e-6);
231        let pops = rho.populations();
232        for &p in &pops {
233            assert!((p - 0.2).abs() < 1e-6);
234        }
235    }
236
237    #[test]
238    fn dephasing_reduces_coherence() {
239        let mut rho = DensityMatrixN::equal_superposition(5);
240        let coh_before = rho.coherence_magnitude();
241        rho.dephase(0.5);
242        let coh_after = rho.coherence_magnitude();
243        assert!(coh_after < coh_before, "Dephasing should reduce coherence");
244    }
245
246    #[test]
247    fn maximally_mixed_min_purity() {
248        let rho = DensityMatrixN::maximally_mixed(5);
249        assert!((rho.purity() - 0.2).abs() < 1e-6);
250    }
251
252    #[test]
253    fn free_energy_computable() {
254        let rho = DensityMatrixN::equal_superposition(5);
255        let energies = vec![0.2, 0.3, 0.1, 0.15, 0.25];
256        let f = rho.free_energy(&energies);
257        assert!(f.is_finite());
258    }
259
260    #[test]
261    fn born_sample_deterministic() {
262        let rho = DensityMatrixN::pure_state(2, 5);
263        assert_eq!(rho.born_sample(42), 2);
264        assert_eq!(rho.born_sample(42), 2);
265    }
266
267    #[test]
268    fn evolve_preserves_trace() {
269        let mut rho = DensityMatrixN::equal_superposition(4);
270        let h = vec![0.1; 16];
271        let u = DensityMatrixN::hamiltonian_unitary(&h, 4, 0.01);
272        rho.evolve(&u);
273        assert!((rho.trace() - 1.0).abs() < 0.1, "trace={}", rho.trace());
274    }
275
276    #[test]
277    fn pure_state_entropy_is_zero() {
278        let rho = DensityMatrixN::pure_state(0, 5);
279        let s = rho.von_neumann_entropy();
280        assert!(s.abs() < 0.1, "pure state entropy should be ~0, got {s}");
281    }
282
283    #[test]
284    fn maximally_mixed_entropy_is_maximal() {
285        let rho = DensityMatrixN::maximally_mixed(5);
286        let s = rho.von_neumann_entropy();
287        let expected = (5.0f32).ln(); // ln(5) ≈ 1.609
288        assert!(
289            (s - expected).abs() < 0.2,
290            "maximally mixed entropy should be ~{expected}, got {s}"
291        );
292    }
293
294    #[test]
295    fn evolve_at_dim16_preserves_trace() {
296        let dim = 16;
297        let mut rho = DensityMatrixN::equal_superposition(dim);
298        let mut h = vec![0.0f32; dim * dim];
299        for i in 0..dim {
300            h[i * dim + i] = (i as f32) * 0.1;
301        }
302        h[1] = 0.2;
303        h[dim] = 0.2; // coupling
304        let u = DensityMatrixN::hamiltonian_unitary(&h, dim, 0.1);
305        rho.evolve(&u);
306        assert!(
307            (rho.trace() - 1.0).abs() < 0.15,
308            "dim=16 trace should be ~1.0, got {}",
309            rho.trace()
310        );
311    }
312}
dreamwell_intelligence/density_matrix.rs

dreamwell_intelligence/
density_matrix.rs