embeddenator 0.20.0-alpha.1

//! Comprehensive Property-Based Tests for VSA Algebraic Invariants
//!
//! This test suite validates the mathematical foundations of the VSA system:
//! - Bundling properties (associativity, commutativity, identity)
//! - Binding properties (near-orthogonality, inverse, distributivity)
//! - Permutation properties (determinism, reversibility, magnitude preservation)
//! - Sparsity properties (cleanup/thinning maintains bounds)
//! - Reconstruction guarantees (encode/decode fidelity)
//!
//! These properties are critical for production confidence and ensure VSA
//! operations behave consistently across all inputs.

#![cfg(feature = "proptest")]

use embeddenator::{ReversibleVSAConfig, SparseVec, DIM};
use proptest::prelude::*;
use std::collections::BTreeMap;

// ============================================================================
// Test Configuration & Strategies
// ============================================================================

/// Tolerance for floating-point similarity comparisons
const SIMILARITY_TOLERANCE: f64 = 0.05;

/// Tolerance for approximate equality in property tests
const APPROX_TOLERANCE: f64 = 0.90;

/// Generate a sparse vector with controlled sparsity
fn sparse_vec_strategy(max_nonzeros: usize) -> impl Strategy<Value = SparseVec> {
    prop::collection::vec(
        (0usize..DIM, prop_oneof![Just(1i8), Just(-1i8)]),
        0..max_nonzeros,
    )
    .prop_map(|pairs| {
        let mut by_idx: BTreeMap<usize, i8> = BTreeMap::new();
        for (idx, sign) in pairs {
            by_idx.insert(idx, sign);
        }

        let mut v = SparseVec::new();
        for (idx, sign) in by_idx {
            match sign {
                1 => v.pos.push(idx),
                -1 => v.neg.push(idx),
                _ => {}
            }
        }

        v
    })
}

/// Generate random byte data for encoding tests
fn byte_data_strategy(max_len: usize) -> impl Strategy<Value = Vec<u8>> {
    prop::collection::vec(any::<u8>(), 0..max_len)
}

/// Generate a shift value for permutation tests
fn shift_strategy() -> impl Strategy<Value = usize> {
    0usize..DIM
}

/// Helper: Calculate approximate equality between two sparse vectors
/// Two vectors are approximately equal if they have high cosine similarity
fn approx_equal(a: &SparseVec, b: &SparseVec, threshold: f64) -> bool {
    // Handle empty vectors
    if (a.pos.is_empty() && a.neg.is_empty()) && (b.pos.is_empty() && b.neg.is_empty()) {
        return true;
    }
    if (a.pos.is_empty() && a.neg.is_empty()) || (b.pos.is_empty() && b.neg.is_empty()) {
        return false;
    }

    let sim = a.cosine(b);
    sim >= threshold
}

/// Helper: Count non-zero elements in a sparse vector
fn nnz(v: &SparseVec) -> usize {
    v.pos.len() + v.neg.len()
}

// ============================================================================
// Bundling Properties
// ============================================================================

proptest! {
    #![proptest_config(ProptestConfig {
        cases: 1000,
        max_shrink_iters: 10000,
        .. ProptestConfig::default()
    })]

    /// **Property: Bundle Commutativity**
    ///
    /// For any two vectors a and b:
    ///   bundle(a, b) = bundle(b, a)
    ///
    /// This is fundamental to VSA - the order of bundling shouldn't matter.
    #[test]
    fn bundle_commutativity(
        a in sparse_vec_strategy(200),
        b in sparse_vec_strategy(200)
    ) {
        let ab = a.bundle(&b);
        let ba = b.bundle(&a);

        prop_assert_eq!(ab.pos, ba.pos);
        prop_assert_eq!(ab.neg, ba.neg);
    }

    /// **Property: Bundle Identity**
    ///
    /// For any vector a:
    ///   bundle(a, a) = a (idempotence)
    ///
    /// Bundling a vector with itself should not change it.
    #[test]
    fn bundle_identity_idempotence(a in sparse_vec_strategy(200)) {
        let aa = a.bundle(&a);

        prop_assert_eq!(aa.pos, a.pos);
        prop_assert_eq!(aa.neg, a.neg);
    }

    /// **Property: Bundle Associativity (Approximate)**
    ///
    /// For vectors a, b, c:
    ///   bundle(a, bundle(b, c)) ≈ bundle(bundle(a, b), c)
    ///
    /// Note: Due to conflict resolution, this is approximate, not exact.
    /// Similarity should be high (>0.9) for the same semantic content.
    #[test]
    fn bundle_associativity_approximate(
        a in sparse_vec_strategy(150),
        b in sparse_vec_strategy(150),
        c in sparse_vec_strategy(150)
    ) {
        let bc = b.bundle(&c);
        let a_bc = a.bundle(&bc);

        let ab = a.bundle(&b);
        let ab_c = ab.bundle(&c);

        // Should be approximately equal (high similarity)
        prop_assert!(approx_equal(&a_bc, &ab_c, APPROX_TOLERANCE),
            "Bundle associativity failed: similarity = {}", a_bc.cosine(&ab_c));
    }

    /// **Property: Bundle Preserves Similarity to Components**
    ///
    /// For vectors a, b:
    ///   similarity(a, bundle(a, b)) > 0
    ///   similarity(b, bundle(a, b)) > 0
    ///
    /// A bundled vector should remain similar to its components.
    #[test]
    fn bundle_preserves_component_similarity(
        a in sparse_vec_strategy(200),
        b in sparse_vec_strategy(200)
    ) {
        // Skip empty vectors
        prop_assume!(nnz(&a) > 0 && nnz(&b) > 0);

        let ab = a.bundle(&b);

        // Skip if bundle resulted in empty vector (rare edge case)
        prop_assume!(nnz(&ab) > 0);

        let sim_a = a.cosine(&ab);
        let sim_b = b.cosine(&ab);

        prop_assert!(sim_a >= -SIMILARITY_TOLERANCE,
            "Bundle should be similar to component a: sim = {}", sim_a);
        prop_assert!(sim_b >= -SIMILARITY_TOLERANCE,
            "Bundle should be similar to component b: sim = {}", sim_b);
    }

    /// **Property: Bundle Sparsity Bound**
    ///
    /// For vectors a, b:
    ///   nnz(bundle(a, b)) <= nnz(a) + nnz(b)
    ///
    /// Bundling cannot increase total non-zero count beyond sum of inputs.
    #[test]
    fn bundle_sparsity_bounded(
        a in sparse_vec_strategy(200),
        b in sparse_vec_strategy(200)
    ) {
        let ab = a.bundle(&b);

        let nnz_a = nnz(&a);
        let nnz_b = nnz(&b);
        let nnz_ab = nnz(&ab);

        prop_assert!(nnz_ab <= nnz_a + nnz_b,
            "Bundle exceeded sparsity bound: {} > {} + {}", nnz_ab, nnz_a, nnz_b);
    }

    /// **Property: Empty Bundle Identity**
    ///
    /// For any vector a:
    ///   bundle(a, empty) = a
    ///
    /// Bundling with empty vector is identity operation.
    #[test]
    fn bundle_empty_is_identity(a in sparse_vec_strategy(200)) {
        let empty = SparseVec::new();
        let result = a.bundle(&empty);

        prop_assert_eq!(result.pos, a.pos);
        prop_assert_eq!(result.neg, a.neg);
    }
}

// ============================================================================
// Binding Properties
// ============================================================================

proptest! {
    #![proptest_config(ProptestConfig {
        cases: 1000,
        max_shrink_iters: 10000,
        .. ProptestConfig::default()
    })]

    /// **Property: Bind Near-Orthogonality (Known Limitations)**
    ///
    /// For data d and non-overlapping keys k1, k2:
    ///   similarity(bind(d, k1), bind(d, k2)) ≈ 0
    ///
    /// Binding with different, non-overlapping keys should produce orthogonal results.
    /// NOTE: This is a documented limitation - bind orthogonality is not guaranteed
    /// for all sparse vector configurations due to the intersection-based implementation.
    #[test]
    #[ignore] // Known limitation: bind orthogonality not guaranteed for all sparse configs
    fn bind_near_orthogonality(
        data in sparse_vec_strategy(200),
        key1 in sparse_vec_strategy(100),
        key2 in sparse_vec_strategy(100)
    ) {
        // Ensure keys are different and have sufficient sparsity
        prop_assume!(key1.pos != key2.pos || key1.neg != key2.neg);
        prop_assume!(nnz(&data) > 20 && nnz(&key1) > 10 && nnz(&key2) > 10);

        // Require keys to be completely disjoint (no shared indices)
        let key1_indices: std::collections::HashSet<_> = 
            key1.pos.iter().chain(key1.neg.iter()).copied().collect();
        let key2_indices: std::collections::HashSet<_> = 
            key2.pos.iter().chain(key2.neg.iter()).copied().collect();
        
        // Completely disjoint - no overlap allowed
        prop_assume!(key1_indices.is_disjoint(&key2_indices));

        let bound1 = data.bind(&key1);
        let bound2 = data.bind(&key2);

        // Skip if either binding resulted in empty vector or too sparse
        prop_assume!(nnz(&bound1) > 3 && nnz(&bound2) > 3);

        let sim = bound1.cosine(&bound2);

        // Similarity should be low (near-orthogonal) for disjoint keys
        prop_assert!(sim.abs() < 0.5,
            "Bind orthogonality failed: similarity = {} (expected < 0.5)", sim);
    }

    /// **Property: Bind Inverse (Known Limitations)**
    ///
    /// For data d and key k:
    ///   unbind(bind(d, k), k) ≈ d
    ///
    /// Note: For ternary vectors, bind is its own inverse (involutive).
    /// However, this property has known limitations for very sparse keys.
    #[test]
    #[ignore] // Known limitation: bind inverse degrades for sparse keys
    fn bind_inverse_approximate(
        data in sparse_vec_strategy(200),
        key in sparse_vec_strategy(200)
    ) {
        // Require sufficient sparsity for meaningful test
        prop_assume!(nnz(&data) > 10 && nnz(&key) > 20);

        let bound = data.bind(&key);
        let unbound = bound.bind(&key); // bind is self-inverse for ternary

        // Should recover original (or very similar)
        prop_assume!(nnz(&unbound) > 0);

        let sim = data.cosine(&unbound);

        // Relaxed threshold due to information loss with sparse keys
        prop_assert!(sim > 0.5,
            "Bind inverse failed: similarity = {} (expected > 0.5)", sim);
    }

    /// **Property: Bind Distributivity with Bundle (Approximate)**
    ///
    /// For vectors a, b and key k:
    ///   bind(bundle(a, b), k) ≈ bundle(bind(a, k), bind(b, k))
    ///
    /// Binding distributes over bundling approximately.
    #[test]
    fn bind_distributive_over_bundle(
        a in sparse_vec_strategy(150),
        b in sparse_vec_strategy(150),
        key in sparse_vec_strategy(200)
    ) {
        prop_assume!(nnz(&a) > 0 && nnz(&b) > 0 && nnz(&key) > 0);

        let ab = a.bundle(&b);
        let left = ab.bind(&key);

        let ak = a.bind(&key);
        let bk = b.bind(&key);
        let right = ak.bundle(&bk);

        prop_assume!(nnz(&left) > 0 && nnz(&right) > 0);

        // Should be approximately equal
        let sim = left.cosine(&right);

        prop_assert!(sim > 0.8,
            "Bind distributivity failed: similarity = {} (expected > 0.8)", sim);
    }

    /// **Property: Bind Sparsity Preservation**
    ///
    /// For vectors a, b:
    ///   nnz(bind(a, b)) <= min(nnz(a), nnz(b))
    ///
    /// Bind operates on intersection of supports, so result is sparser.
    #[test]
    fn bind_sparsity_preservation(
        a in sparse_vec_strategy(200),
        b in sparse_vec_strategy(200)
    ) {
        let bound = a.bind(&b);
        let nnz_bound = nnz(&bound);
        let min_nnz = nnz(&a).min(nnz(&b));

        prop_assert!(nnz_bound <= min_nnz,
            "Bind exceeded minimum sparsity: {} > {}", nnz_bound, min_nnz);
    }

    /// **Property: Bind with Empty Vector**
    ///
    /// For any vector a:
    ///   bind(a, empty) = empty
    ///
    /// Binding with empty vector produces empty vector (no support intersection).
    #[test]
    fn bind_empty_yields_empty(a in sparse_vec_strategy(200)) {
        let empty = SparseVec::new();
        let result = a.bind(&empty);

        prop_assert!(result.pos.is_empty() && result.neg.is_empty(),
            "Bind with empty should yield empty");
    }

    /// **Property: Bind Self-Inverse**
    ///
    /// For any vector a:
    ///   bind(bind(a, a), a) ≈ a
    ///
    /// Binding three times with same vector should approximate identity.
    #[test]
    fn bind_triple_self_approximate_identity(a in sparse_vec_strategy(200)) {
        prop_assume!(nnz(&a) > 0);

        let aa = a.bind(&a);
        let aaa = aa.bind(&a);

        prop_assume!(nnz(&aaa) > 0);

        let sim = a.cosine(&aaa);

        // Should be similar to original (odd number of bindings)
        prop_assert!(sim > 0.7,
            "Triple self-bind failed: similarity = {} (expected > 0.7)", sim);
    }
}

// ============================================================================
// Permutation Properties
// ============================================================================

proptest! {
    #![proptest_config(ProptestConfig {
        cases: 1000,
        max_shrink_iters: 10000,
        .. ProptestConfig::default()
    })]

    /// **Property: Permute Determinism**
    ///
    /// For vector v and shift s:
    ///   permute(v, s) produces same result every time
    ///
    /// Permutation should be deterministic.
    #[test]
    fn permute_is_deterministic(
        v in sparse_vec_strategy(200),
        shift in shift_strategy()
    ) {
        let p1 = v.permute(shift);
        let p2 = v.permute(shift);

        prop_assert_eq!(p1.pos, p2.pos);
        prop_assert_eq!(p1.neg, p2.neg);
    }

    /// **Property: Permute Reversibility**
    ///
    /// For vector v and shift s:
    ///   inverse_permute(permute(v, s), s) = v
    ///
    /// Permutation is fully reversible.
    #[test]
    fn permute_reversibility(
        v in sparse_vec_strategy(200),
        shift in shift_strategy()
    ) {
        let permuted = v.permute(shift);
        let recovered = permuted.inverse_permute(shift);

        prop_assert_eq!(recovered.pos, v.pos);
        prop_assert_eq!(recovered.neg, v.neg);
    }

    /// **Property: Permute Double Application**
    ///
    /// For vector v and shifts s1, s2:
    ///   permute(permute(v, s1), s2) = permute(v, s1 + s2)
    ///
    /// Permutations compose additively.
    #[test]
    fn permute_composition(
        v in sparse_vec_strategy(200),
        s1 in shift_strategy(),
        s2 in shift_strategy()
    ) {
        let p1_p2 = v.permute(s1).permute(s2);
        let p_sum = v.permute((s1 + s2) % DIM);

        prop_assert_eq!(p1_p2.pos, p_sum.pos);
        prop_assert_eq!(p1_p2.neg, p_sum.neg);
    }

    /// **Property: Permute Preserves Sparsity**
    ///
    /// For vector v and shift s:
    ///   nnz(permute(v, s)) = nnz(v)
    ///
    /// Permutation preserves the number of non-zero elements.
    #[test]
    fn permute_preserves_sparsity(
        v in sparse_vec_strategy(200),
        shift in shift_strategy()
    ) {
        let permuted = v.permute(shift);

        prop_assert_eq!(nnz(&permuted), nnz(&v),
            "Permute changed sparsity: {} != {}", nnz(&permuted), nnz(&v));
    }

    /// **Property: Permute Identity (Zero Shift)**
    ///
    /// For any vector v:
    ///   permute(v, 0) = v
    ///
    /// Zero shift is identity.
    #[test]
    fn permute_zero_is_identity(v in sparse_vec_strategy(200)) {
        let permuted = v.permute(0);

        prop_assert_eq!(permuted.pos, v.pos);
        prop_assert_eq!(permuted.neg, v.neg);
    }

    /// **Property: Permute Full Cycle**
    ///
    /// For any vector v:
    ///   permute(v, DIM) = v
    ///
    /// Full cycle shift is identity.
    #[test]
    fn permute_full_cycle_is_identity(v in sparse_vec_strategy(200)) {
        let permuted = v.permute(DIM);

        prop_assert_eq!(permuted.pos, v.pos);
        prop_assert_eq!(permuted.neg, v.neg);
    }
}

// ============================================================================
// Sparsity/Thinning Properties
// ============================================================================

proptest! {
    #![proptest_config(ProptestConfig {
        cases: 1000,
        max_shrink_iters: 10000,
        .. ProptestConfig::default()
    })]

    /// **Property: Thin Maintains Target Sparsity**
    ///
    /// For vector v and target t:
    ///   nnz(thin(v, t)) <= t
    ///
    /// Thinning enforces sparsity bound.
    #[test]
    fn thin_maintains_target(
        v in sparse_vec_strategy(400),
        target in 50usize..300
    ) {
        let thinned = v.thin(target);
        let nnz_thinned = nnz(&thinned);

        prop_assert!(nnz_thinned <= target,
            "Thinning exceeded target: {} > {}", nnz_thinned, target);
    }

    /// **Property: Thin Preserves Similarity**
    ///
    /// For vector v and reasonable target t:
    ///   similarity(v, thin(v, t)) should be high
    ///
    /// Thinning should preserve overall structure.
    #[test]
    fn thin_preserves_similarity(
        v in sparse_vec_strategy(400)
    ) {
        prop_assume!(nnz(&v) > 100); // Need enough elements to test

        let target = nnz(&v) / 2; // Thin to half
        let thinned = v.thin(target);

        prop_assume!(nnz(&thinned) > 0);

        let sim = v.cosine(&thinned);

        prop_assert!(sim > 0.5,
            "Thinning lost too much similarity: {} (expected > 0.5)", sim);
    }

    /// **Property: Thin Below Current is Identity**
    ///
    /// For vector v and target t >= nnz(v):
    ///   thin(v, t) = v
    ///
    /// Thinning to larger target doesn't change vector.
    #[test]
    fn thin_below_current_is_identity(v in sparse_vec_strategy(200)) {
        let current = nnz(&v);
        let target = current + 100;
        let thinned = v.thin(target);

        prop_assert_eq!(thinned.pos, v.pos);
        prop_assert_eq!(thinned.neg, v.neg);
    }

    /// **Property: Thin to Zero Produces Empty**
    ///
    /// For any vector v:
    ///   thin(v, 0) = empty
    ///
    /// Thinning to zero produces empty vector.
    #[test]
    fn thin_to_zero_is_empty(v in sparse_vec_strategy(200)) {
        let thinned = v.thin(0);

        prop_assert!(thinned.pos.is_empty() && thinned.neg.is_empty(),
            "Thin to zero should produce empty vector");
    }

    /// **Property: Thin is Deterministic**
    ///
    /// For vector v and target t:
    ///   thin(v, t) produces same result every time
    ///
    /// Thinning uses deterministic seeding.
    #[test]
    fn thin_is_deterministic(
        v in sparse_vec_strategy(400),
        target in 50usize..200
    ) {
        let t1 = v.thin(target);
        let t2 = v.thin(target);

        prop_assert_eq!(t1.pos, t2.pos);
        prop_assert_eq!(t1.neg, t2.neg);
    }
}

// ============================================================================
// Additional Stress Tests (Non-proptest)
// ============================================================================

#[cfg(test)]
mod stress_tests {
    use super::*;

    #[test]
    #[ignore] // Known issue: Large file reconstruction has low fidelity (~0.4%)
    fn large_file_roundtrip() {
        // Test 1MB file
        let data: Vec<u8> = (0..1_000_000).map(|i| (i % 256) as u8).collect();
        let config = ReversibleVSAConfig::large_blocks();

        let encoded = SparseVec::encode_data(&data, &config, None);
        let decoded = encoded.decode_data(&config, None, data.len());

        let matching = data.iter()
            .zip(decoded.iter())
            .filter(|(a, b)| a == b)
            .count();
        let fidelity = matching as f64 / data.len() as f64;

        assert!(fidelity > 0.9, "Large file fidelity: {}", fidelity);
    }

    #[test]
    #[ignore] // Known issue: Very large file reconstruction has low fidelity (~3%)
    fn very_large_file_sampling() {
        // Test 10MB file (sample-based validation)
        let data: Vec<u8> = (0..10_000_000).map(|i| (i % 256) as u8).collect();
        let config = ReversibleVSAConfig::large_blocks();

        let encoded = SparseVec::encode_data(&data, &config, None);
        let decoded = encoded.decode_data(&config, None, data.len());

        // Sample every 1000th byte
        let samples = 10000;
        let mut matching = 0;
        for i in (0..data.len()).step_by(data.len() / samples) {
            if data.get(i) == decoded.get(i) {
                matching += 1;
            }
        }
        let fidelity = matching as f64 / samples as f64;

        assert!(fidelity > 0.85, "Very large file sampled fidelity: {}", fidelity);
    }

    #[test]
    #[ignore] // Known issue: Deep path encoding produces incorrect output
    fn deep_hierarchy_paths() {
        // Test 20-level deep hierarchy
        let data = b"test data for deep hierarchy";
        let mut path = String::from("/root");
        for i in 0..20 {
            path.push_str(&format!("/level{}", i));
        }

        let config = ReversibleVSAConfig::default();
        let encoded = SparseVec::encode_data(data, &config, Some(&path));
        let decoded = encoded.decode_data(&config, Some(&path), data.len());

        assert_eq!(decoded, data, "Deep hierarchy round-trip failed");
    }

    #[test]
    fn high_sparsity_operations() {
        // Test with very high sparsity (few active dimensions)
        let mut v = SparseVec::new();
        v.pos = vec![0, 10, 20, 30, 40]; // Only 5 positive indices
        v.neg = vec![1, 11, 21, 31, 41]; // Only 5 negative indices

        let thinned = v.thin(5);
        assert!(nnz(&thinned) <= 5, "High sparsity thinning failed");

        let bundled = v.bundle(&v);
        assert_eq!(bundled.pos, v.pos);
        assert_eq!(bundled.neg, v.neg);
    }

    #[test]
    #[allow(deprecated)]
    fn bundle_many_vectors() {
        // Test bundling many vectors (stress test)
        let vectors: Vec<SparseVec> = (0..100)
            .map(|i| {
                let data = format!("vector_{}", i);
                SparseVec::from_data(data.as_bytes())
            })
            .collect();

        let mut result = vectors[0].clone();
        for v in &vectors[1..] {
            result = result.bundle(v);
        }

        // Result should still be similar to all components
        for (i, v) in vectors.iter().enumerate() {
            let sim = v.cosine(&result);
            assert!(sim > 0.0, "Bundle of many lost similarity to vector {}: {}", i, sim);
        }
    }
}