dreamwell-intelligence 1.0.0

QuantumGPT (The Loom) — Quantum Information Pretrained Transformer. Density matrix attention with intrinsic thermodynamic loss, φ-scaled causal dephasing, and parameter shift gradient.
Documentation
// Spectral Annealing — the final quantum accelerator.
//
// The density matrix's eigenvalue spectrum encodes the optimal learning rate:
//   ∂S/∂λ_i = -(1 + ln(λ_i))
//
// λ→0 (irrelevant mode): gradient→∞ (learn aggressively)
// λ→1 (dominant mode):   gradient→-1 (learn conservatively)
// λ=1/φ²≈0.382:          gradient→0 (equilibrium — the golden ratio)
//
// The spectral entropy gradient IS the natural per-parameter learning rate.
// We already compute eigenvalues for von Neumann entropy. This reads what's there.
//
// Clean Compute: zero additional allocation. All eigenvalues from existing entropy call.

use crate::density_matrix::DensityMatrixN;

const PHI_INV: f32 = 0.618033988;

/// Compute spectral annealing factor from eigenvalue spectrum.
/// Returns a scaling factor for the learning rate based on the smallest
/// meaningful eigenvalue's entropy gradient.
///
/// The factor is -(1 + ln(λ_min)), clamped to [0.1, 5.0].
/// At equilibrium (λ_min = 1/φ²), the factor ≈ 0.96 (near unity).
/// This means the learning rate naturally equilibrates when the spectral
/// structure reaches golden-ratio spacing.
pub fn spectral_annealing_factor(eigenvalues: &[f32]) -> f32 {
    if eigenvalues.is_empty() {
        return 1.0;
    }

    // Find smallest nonzero eigenvalue
    let lambda_min = eigenvalues
        .iter()
        .copied()
        .filter(|&l| l > 1e-12)
        .fold(f32::MAX, f32::min);

    if lambda_min >= f32::MAX {
        return 1.0;
    }

    // Spectral entropy gradient: -(1 + ln(λ))
    let factor = -(1.0 + lambda_min.ln());
    factor.clamp(PHI_INV.powi(5), PHI_INV.powi(-3)) // clamp to [1/φ⁵, φ³] ≈ [0.090, 4.236]
}

/// Free energy momentum — φ-weighted ring buffer of free energy history.
/// Tracks dF/dt with golden-ratio-decaying weights: recent changes matter more.
///
/// This is the quantum analog of Adam's momentum:
///   dF/dt ≈ Σ_i φ^(-i) × (F[t-i] - F[t-i-1])
#[derive(Debug, Clone)]
pub struct FreeEnergyMomentum {
    history: [f32; 8],
    idx: usize,
    count: usize,
    /// Initial loss at epoch 0 — convergence requires improvement from this baseline.
    initial_loss: Option<f32>,
    /// Most recent loss value for convergence check.
    latest_loss: f32,
}

impl FreeEnergyMomentum {
    pub fn new() -> Self {
        Self {
            history: [0.0; 8],
            idx: 0,
            count: 0,
            initial_loss: None,
            latest_loss: 0.0,
        }
    }

    /// Record a new loss value (used for convergence and momentum).
    pub fn push(&mut self, loss: f32) {
        if self.initial_loss.is_none() {
            self.initial_loss = Some(loss);
        }
        self.latest_loss = loss;
        self.history[self.idx] = loss;
        self.idx = (self.idx + 1) % 8;
        self.count += 1;
    }

    /// Compute φ-weighted momentum: exponentially decaying dF/dt.
    /// Positive momentum = F increasing (bad). Negative = F decreasing (good).
    pub fn momentum(&self) -> f32 {
        if self.count < 2 {
            return 0.0;
        }
        let mut df = 0.0f32;
        let mut weight = 1.0f32;
        let n = (self.count - 1).min(7);
        for i in 0..n {
            // Most recent pair first (i=0), then older pairs
            let newer = (self.idx + 8 - 1 - i) % 8;
            let older = (self.idx + 8 - 2 - i) % 8;
            df += weight * (self.history[newer] - self.history[older]);
            weight *= PHI_INV;
        }
        df
    }

    /// Should we reduce the learning rate?
    /// Returns true if loss INCREASED significantly (overshooting → LR too high).
    /// Now tracks loss, not free energy.
    pub fn should_anneal(&self) -> bool {
        if self.count < 8 {
            return false;
        }
        let curr = self.history[(self.idx + 7) % 8];
        let prev = self.history[(self.idx + 6) % 8];
        // Loss increased by more than 1/φ³ = 0.236 (absolute)
        // Scale by the loss magnitude for relative comparison
        let threshold = prev.abs() * PHI_INV * PHI_INV * PHI_INV;
        curr > prev + threshold.max(PHI_INV.powi(8)) // floor at 1/φ⁸ ≈ 0.013
    }

    /// Thermodynamic convergence: has the model BOTH improved AND stabilized?
    /// Two conditions must hold simultaneously:
    ///   1. Loss has decreased by at least 1/φ (~38%) from initial (model learned something)
    ///   2. Relative |dLoss/dt| is below 1/φ⁸ (~1.3%) for 40+ epochs (model has stabilized)
    /// A model at random-chance loss has NOT converged — it hasn't started.
    pub fn converged(&self) -> bool {
        // Need F(5) × F(6) = 5 × 8 = 40 data points (5 full buffer cycles)
        if self.count < 40 {
            return false;
        }

        // Condition 1: loss must have improved by at least 1/φ from initial.
        // At random chance (loss = ln(vocab)), no improvement = no convergence.
        if let Some(initial) = self.initial_loss {
            let improvement_ratio = 1.0 - (self.latest_loss / initial);
            if improvement_ratio < PHI_INV {
                // must improve by >38.2%
                return false;
            }
        } else {
            return false;
        }

        // Condition 2: relative momentum below 1/φ⁸ ≈ 0.013
        let mom = self.momentum().abs();
        let avg = self.history.iter().map(|v| v.abs()).sum::<f32>() / 8.0;
        if avg < 1e-6 {
            return true;
        }
        let threshold = PHI_INV.powi(8);
        mom / avg < threshold
    }
}

/// Compute the spectral learning rate.
/// base_lr × spectral_factor × (1 + φ⁻¹·|momentum|)
///
/// This combines:
/// 1. Base learning rate (user-specified)
/// 2. Spectral factor (from eigenvalue entropy gradient)
/// 3. Momentum boost (from free energy trajectory)
pub fn spectral_lr(base_lr: f32, eigenvalues: &[f32], momentum: f32) -> f32 {
    let spectral = spectral_annealing_factor(eigenvalues);
    let momentum_boost = 1.0 + PHI_INV * momentum.abs().min(2.0);
    base_lr * spectral * momentum_boost
}

/// Extract eigenvalues from a density matrix (convenience wrapper).
/// Allocates a work copy (eigenvalue computation is destructive).
pub fn eigenvalues(rho: &DensityMatrixN) -> Vec<f32> {
    let d = rho.dim;
    let mut work = rho.entries.clone();
    let mut evals = vec![0.0f32; d];
    dreamwell_math::eigen::eigenvalues_hermitian(&mut work, &mut evals, d, 50, 1e-8);
    evals
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn spectral_factor_pure_state() {
        // Pure state: λ = [1, 0, 0, 0, 0]. λ_min (nonzero) = 1.0.
        // factor = -(1 + ln(1)) = -(1+0) = -1 → clamped to 0.1
        // Pure states have LOW spectral factor (conservative learning — already converged).
        let evals = vec![1.0, 0.0, 0.0, 0.0, 0.0];
        let f = spectral_annealing_factor(&evals);
        assert!(f <= 0.2, "pure state should have low factor (converged), got {f}");
    }

    #[test]
    fn spectral_factor_mixed() {
        // Maximally mixed: λ = [0.2, 0.2, 0.2, 0.2, 0.2]. ln(0.2) ≈ -1.609
        // factor = -(1 + (-1.609)) = 0.609 ≈ 1/φ
        let evals = vec![0.2, 0.2, 0.2, 0.2, 0.2];
        let f = spectral_annealing_factor(&evals);
        assert!(
            (f - PHI_INV).abs() < 0.1,
            "mixed state factor should be ~1/φ={PHI_INV}, got {f}"
        );
    }

    #[test]
    fn spectral_factor_near_convergence() {
        // Near-pure: λ_min = 0.01. ln(0.01) ≈ -4.605. factor = -(1 + (-4.605)) = 3.605
        let evals = vec![0.95, 0.01, 0.01, 0.01, 0.02];
        let f = spectral_annealing_factor(&evals);
        assert!(f > 2.0, "near-convergence should have aggressive factor, got {f}");
    }

    #[test]
    fn spectral_factor_at_golden_equilibrium() {
        // λ_min = 1/φ² ≈ 0.382. ln(0.382) ≈ -0.962. factor = -(1 + (-0.962)) = -0.038 → clamped to 0.1
        // Actually: -(1 + ln(0.382)) = -(1 - 0.962) = -0.038 → clamped to 0.1
        let evals = vec![0.618, 0.382, 0.0, 0.0, 0.0]; // 0.382 = 1/φ²
        let f = spectral_annealing_factor(&evals);
        assert!(f <= 0.2, "golden equilibrium should be near-zero (clamped), got {f}");
    }

    #[test]
    fn momentum_ring_buffer() {
        let mut m = FreeEnergyMomentum::new();
        m.push(10.0);
        m.push(9.0);
        m.push(8.0);
        // Momentum should be negative (F decreasing = good)
        let mom = m.momentum();
        assert!(mom < 0.0, "decreasing F should give negative momentum, got {mom}");
    }

    #[test]
    fn momentum_phi_decay() {
        let mut m = FreeEnergyMomentum::new();
        // Constant decrease of 1.0 per step
        for i in 0..8 {
            m.push(100.0 - i as f32);
        }
        let mom = m.momentum();
        // Should be negative (F decreasing)
        assert!(mom < 0.0, "momentum should be negative for decreasing F");
        // Should be bounded by the φ-weighted sum
        assert!(mom > -10.0, "momentum should be bounded, got {mom}");
    }

    #[test]
    fn convergence_gate_fires() {
        let mut m = FreeEnergyMomentum::new();
        // Loss catastrophically increasing → should trigger annealing
        // Relative increase of >23.6% triggers the gate
        for i in 0..7 {
            m.push(5.0 + i as f32 * 0.1); // gentle rise
        }
        m.push(8.0); // sudden spike: 5.6 → 8.0 = +42% relative increase
        assert!(m.should_anneal(), "catastrophic loss spike should trigger annealing");
    }

    #[test]
    fn convergence_gate_stable() {
        let mut m = FreeEnergyMomentum::new();
        // F stable → should not trigger
        for _ in 0..8 {
            m.push(5.0);
        }
        assert!(!m.should_anneal(), "stable F should not trigger annealing");
    }

    #[test]
    fn spectral_lr_adapts() {
        let mixed = vec![0.2, 0.2, 0.2, 0.2, 0.2];
        let near_pure = vec![0.95, 0.01, 0.01, 0.01, 0.02];
        let lr_mixed = spectral_lr(0.01, &mixed, 0.0);
        let lr_near = spectral_lr(0.01, &near_pure, 0.0);
        // Near-pure should have higher effective lr (more aggressive on small modes)
        assert!(
            lr_near > lr_mixed,
            "near-pure lr ({lr_near}) should be > mixed lr ({lr_mixed})"
        );
    }

    #[test]
    fn eigenvalues_from_density_matrix() {
        let rho = DensityMatrixN::pure_state(0, 5);
        let evals = eigenvalues(&rho);
        assert_eq!(evals.len(), 5);
        // Pure state: one eigenvalue = 1, rest ≈ 0
        assert!(evals[0] > 0.9, "dominant eigenvalue should be ~1.0, got {}", evals[0]);
    }

    // === THREE REQUIRED PROOF TESTS ===

    #[test]
    fn proof_dephasing_composition_monotonicity() {
        // Proof 1: Two sequential dephasings must produce monotonically decreasing F
        let mut rho = DensityMatrixN::equal_superposition(5);
        let energies = vec![0.2, 0.3, 0.1, 0.15, 0.25];
        let f0 = rho.free_energy(&energies);
        rho.dephase(0.1);
        let f1 = rho.free_energy(&energies);
        rho.dephase(0.1);
        let f2 = rho.free_energy(&energies);
        assert!(f1 <= f0 + 0.01, "First dephasing: F must not increase ({f1} > {f0})");
        assert!(f2 <= f1 + 0.01, "Second dephasing: F must not increase ({f2} > {f1})");
    }

    #[test]
    fn proof_meta_density_matrix_second_law() {
        // Proof 3: Parameter-space density matrix must respect the Second Law
        let dim = 10;
        let mut meta_rho = DensityMatrixN::equal_superposition(dim);
        let energies: Vec<f32> = (0..dim).map(|i| i as f32 * 0.1).collect();
        let mut prev_f = meta_rho.free_energy(&energies);
        for _ in 0..100 {
            meta_rho.dephase(0.05);
            let f = meta_rho.free_energy(&energies);
            assert!(
                f <= prev_f + 0.01,
                "Meta-F must not increase under dephasing: {} > {}",
                f,
                prev_f
            );
            prev_f = f;
        }
    }
}