dreamwell-intelligence 1.0.0

// Learned Hamiltonian — parameterized coupling matrix for quantum attention.
//
// The Hamiltonian encodes token-token interactions. Diagonal = per-mode energy.
// Off-diagonal = coupling strength between modes. The couplings are the
// learnable parameters — they play the role of Q·K^T in classical attention.

const PHI: f32 = 1.618033988;
const PHI_INV: f32 = 0.618033988; // 1/φ
const PHI_INV_SQ: f32 = 0.381966011; // 1/φ²
const PHI_INV_5: f32 = 0.090169944; // 1/φ⁵ — adiabatic evolution step
const PHI_INV_8: f32 = 0.013155617; // 1/φ⁸ — convergence stability floor

/// Learnable Hamiltonian parameters for one QCT block.
#[derive(Clone)]
pub struct LearnedHamiltonian {
    /// Model dimension (density matrix size).
    pub dim: usize,
    /// Diagonal bias energies [dim]. Learnable.
    pub bias: Vec<f32>,
    /// Off-diagonal coupling strengths [dim*(dim-1)/2]. Learnable.
    pub couplings: Vec<f32>,
    /// Base dephasing rate. Learnable.
    pub dephasing_rate: f32,
    /// Temperature for free energy. Learnable.
    pub temperature: f32,
}

impl LearnedHamiltonian {
    /// Initialize with φ-derived random values from seed.
    ///
    /// All initialization scales are derived from the golden ratio:
    ///   Bias range:     [1/φ⁵, 1/φ⁴] = [0.090, 0.146] — matches dt and loss weight
    ///   Coupling range: [1-1/φ, 1/φ]  = [0.382, 0.618] — golden partition of unit interval
    ///   Dephasing:      1/φ⁵ = 0.090  — matches evolution time step
    ///   Temperature:    1.0            — natural scale (not φ-governed)
    ///
    /// This ensures initial free energy F is scaled consistently with the
    /// thermodynamic gate (1/φ³/dim) and the gradient landscape is φ-smooth
    /// from epoch 0.
    pub fn new(dim: usize, seed: u64) -> Self {
        let num_couplings = dim * (dim - 1) / 2;
        let mut bias = Vec::with_capacity(dim);
        let mut couplings = Vec::with_capacity(num_couplings);

        // Bias ∈ [1/φ⁵, 1/φ⁴] = [0.090, 0.146]
        // These are the mode energies. The range matches the evolution step (1/φ⁵)
        // and the free energy loss weight (1/φ⁴), placing initial F in the regime
        // where the thermodynamic gate is active for some positions.
        let bias_lo = PHI_INV_5; // 0.090
        let bias_range = PHI_INV_5 * PHI; // 0.146 - 0.090 = 0.056
        for k in 0..dim {
            let s = seed.wrapping_add(k as u64).wrapping_mul(0x9e3779b97f4a7c15);
            bias.push(bias_lo + (bias_range - bias_lo) * ((s % 1000) as f32 / 1000.0));
        }

        // Couplings ∈ [1-1/φ, 1/φ] = [0.382, 0.618]
        // The golden partition of the unit interval. This produces coupling strengths
        // that are self-similar with every other φ-derived threshold in the pipeline.
        // The coupling range width is 1/φ² = 0.236, matching the thermodynamic gate base.
        let coupling_lo = PHI_INV_SQ; // 0.382
        let coupling_range = PHI_INV - PHI_INV_SQ; // 0.618 - 0.382 = 0.236
        for k in 0..num_couplings {
            let s = seed.wrapping_add((dim + k) as u64).wrapping_mul(0xbf58476d1ce4e5b9);
            couplings.push(coupling_lo + coupling_range * ((s % 1000) as f32 / 1000.0));
        }

        Self {
            dim,
            bias,
            couplings,
            dephasing_rate: PHI_INV_5, // 1/φ⁵ = 0.090 — matches evolution dt
            temperature: 1.0,          // natural scale
        }
    }

    /// Build the dim×dim Hamiltonian matrix (real, symmetric) for a given position.
    /// The Hamiltonian includes bias + couplings + position-dependent phase.
    pub fn build_matrix(&self, position: usize) -> Vec<f32> {
        let d = self.dim;
        let mut h = vec![0.0f32; d * d];

        // Diagonal: bias + position-dependent phase drift.
        // Phase rate = 1/φ⁵ per position — matches evolution dt for adiabatic consistency.
        // Phase amplitude = 1/φ⁵ — perturbation is same scale as the time step,
        // ensuring the position-dependent modulation is adiabatic (slow relative to evolution).
        let phase = position as f32 * PHI_INV_5;
        for k in 0..d {
            h[k * d + k] = self.bias[k] + PHI_INV_5 * (k as f32 * phase).sin();
        }

        // Off-diagonal: coupling strengths (symmetric)
        let mut idx = 0;
        for i in 0..d {
            for j in (i + 1)..d {
                h[i * d + j] = self.couplings[idx];
                h[j * d + i] = self.couplings[idx];
                idx += 1;
            }
        }

        h
    }

    /// Compute causal dephasing rate for distance between positions.
    /// Uses φ-scaled exponential decay: ε = ε_base × (1 - exp(-distance/φ))
    pub fn causal_dephasing(&self, distance: usize) -> f32 {
        self.dephasing_rate * (1.0 - (-(distance as f32) / PHI).exp())
    }

    /// Total number of learnable parameters.
    pub fn num_params(&self) -> usize {
        self.bias.len() + self.couplings.len() + 2 // +2 for dephasing_rate and temperature
    }

    /// Get all parameters as a flat vector (for gradient computation).
    pub fn params(&self) -> Vec<f32> {
        let mut p = Vec::with_capacity(self.num_params());
        p.extend_from_slice(&self.bias);
        p.extend_from_slice(&self.couplings);
        p.push(self.dephasing_rate);
        p.push(self.temperature);
        p
    }

    /// Set all parameters from a flat vector.
    /// Clamps to φ-derived bounds:
    ///   dephasing: [1/φ⁸, 1.0] — floor at convergence stability, ceiling at full dephasing
    ///   temperature: [1/φ⁵, φ⁵] — floor matches dt, ceiling allows high-entropy exploration
    pub fn set_params(&mut self, params: &[f32]) {
        let d = self.dim;
        let nc = d * (d - 1) / 2;
        self.bias.copy_from_slice(&params[..d]);
        self.couplings.copy_from_slice(&params[d..d + nc]);
        self.dephasing_rate = params[d + nc].clamp(PHI_INV_8, 1.0); // [1/φ⁸, 1]
        self.temperature = params[d + nc + 1].clamp(PHI_INV_5, PHI.powi(5)); // [1/φ⁵, φ⁵]
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn hamiltonian_symmetric() {
        let h = LearnedHamiltonian::new(5, 42);
        let m = h.build_matrix(0);
        for i in 0..5 {
            for j in 0..5 {
                assert!(
                    (m[i * 5 + j] - m[j * 5 + i]).abs() < 1e-6,
                    "H must be symmetric: H[{i},{j}]={} vs H[{j},{i}]={}",
                    m[i * 5 + j],
                    m[j * 5 + i]
                );
            }
        }
    }

    #[test]
    fn causal_dephasing_increases_with_distance() {
        let h = LearnedHamiltonian::new(5, 42);
        let e1 = h.causal_dephasing(1);
        let e5 = h.causal_dephasing(5);
        let e20 = h.causal_dephasing(20);
        assert!(e1 < e5, "dephasing should increase with distance");
        assert!(e5 < e20);
    }

    #[test]
    fn params_roundtrip() {
        let h = LearnedHamiltonian::new(4, 0);
        let p = h.params();
        let mut h2 = LearnedHamiltonian::new(4, 99);
        h2.set_params(&p);
        assert_eq!(h.bias, h2.bias);
        assert_eq!(h.couplings, h2.couplings);
    }
}