#![cfg(feature = "proptest")]
use embeddenator::{ReversibleVSAConfig, SparseVec, DIM};
use proptest::prelude::*;
use std::collections::BTreeMap;
const SIMILARITY_TOLERANCE: f64 = 0.05;
const APPROX_TOLERANCE: f64 = 0.90;
fn sparse_vec_strategy(max_nonzeros: usize) -> impl Strategy<Value = SparseVec> {
prop::collection::vec(
(0usize..DIM, prop_oneof![Just(1i8), Just(-1i8)]),
0..max_nonzeros,
)
.prop_map(|pairs| {
let mut by_idx: BTreeMap<usize, i8> = BTreeMap::new();
for (idx, sign) in pairs {
by_idx.insert(idx, sign);
}
let mut v = SparseVec::new();
for (idx, sign) in by_idx {
match sign {
1 => v.pos.push(idx),
-1 => v.neg.push(idx),
_ => {}
}
}
v
})
}
fn byte_data_strategy(max_len: usize) -> impl Strategy<Value = Vec<u8>> {
prop::collection::vec(any::<u8>(), 0..max_len)
}
fn shift_strategy() -> impl Strategy<Value = usize> {
0usize..DIM
}
fn approx_equal(a: &SparseVec, b: &SparseVec, threshold: f64) -> bool {
if (a.pos.is_empty() && a.neg.is_empty()) && (b.pos.is_empty() && b.neg.is_empty()) {
return true;
}
if (a.pos.is_empty() && a.neg.is_empty()) || (b.pos.is_empty() && b.neg.is_empty()) {
return false;
}
let sim = a.cosine(b);
sim >= threshold
}
fn nnz(v: &SparseVec) -> usize {
v.pos.len() + v.neg.len()
}
proptest! {
#![proptest_config(ProptestConfig {
cases: 1000,
max_shrink_iters: 10000,
.. ProptestConfig::default()
})]
#[test]
fn bundle_commutativity(
a in sparse_vec_strategy(200),
b in sparse_vec_strategy(200)
) {
let ab = a.bundle(&b);
let ba = b.bundle(&a);
prop_assert_eq!(ab.pos, ba.pos);
prop_assert_eq!(ab.neg, ba.neg);
}
#[test]
fn bundle_identity_idempotence(a in sparse_vec_strategy(200)) {
let aa = a.bundle(&a);
prop_assert_eq!(aa.pos, a.pos);
prop_assert_eq!(aa.neg, a.neg);
}
#[test]
fn bundle_associativity_approximate(
a in sparse_vec_strategy(150),
b in sparse_vec_strategy(150),
c in sparse_vec_strategy(150)
) {
let bc = b.bundle(&c);
let a_bc = a.bundle(&bc);
let ab = a.bundle(&b);
let ab_c = ab.bundle(&c);
prop_assert!(approx_equal(&a_bc, &ab_c, APPROX_TOLERANCE),
"Bundle associativity failed: similarity = {}", a_bc.cosine(&ab_c));
}
#[test]
fn bundle_preserves_component_similarity(
a in sparse_vec_strategy(200),
b in sparse_vec_strategy(200)
) {
prop_assume!(nnz(&a) > 0 && nnz(&b) > 0);
let ab = a.bundle(&b);
prop_assume!(nnz(&ab) > 0);
let sim_a = a.cosine(&ab);
let sim_b = b.cosine(&ab);
prop_assert!(sim_a >= -SIMILARITY_TOLERANCE,
"Bundle should be similar to component a: sim = {}", sim_a);
prop_assert!(sim_b >= -SIMILARITY_TOLERANCE,
"Bundle should be similar to component b: sim = {}", sim_b);
}
#[test]
fn bundle_sparsity_bounded(
a in sparse_vec_strategy(200),
b in sparse_vec_strategy(200)
) {
let ab = a.bundle(&b);
let nnz_a = nnz(&a);
let nnz_b = nnz(&b);
let nnz_ab = nnz(&ab);
prop_assert!(nnz_ab <= nnz_a + nnz_b,
"Bundle exceeded sparsity bound: {} > {} + {}", nnz_ab, nnz_a, nnz_b);
}
#[test]
fn bundle_empty_is_identity(a in sparse_vec_strategy(200)) {
let empty = SparseVec::new();
let result = a.bundle(&empty);
prop_assert_eq!(result.pos, a.pos);
prop_assert_eq!(result.neg, a.neg);
}
}
proptest! {
#![proptest_config(ProptestConfig {
cases: 1000,
max_shrink_iters: 10000,
.. ProptestConfig::default()
})]
#[test]
#[ignore] fn bind_near_orthogonality(
data in sparse_vec_strategy(200),
key1 in sparse_vec_strategy(100),
key2 in sparse_vec_strategy(100)
) {
prop_assume!(key1.pos != key2.pos || key1.neg != key2.neg);
prop_assume!(nnz(&data) > 20 && nnz(&key1) > 10 && nnz(&key2) > 10);
let key1_indices: std::collections::HashSet<_> =
key1.pos.iter().chain(key1.neg.iter()).copied().collect();
let key2_indices: std::collections::HashSet<_> =
key2.pos.iter().chain(key2.neg.iter()).copied().collect();
prop_assume!(key1_indices.is_disjoint(&key2_indices));
let bound1 = data.bind(&key1);
let bound2 = data.bind(&key2);
prop_assume!(nnz(&bound1) > 3 && nnz(&bound2) > 3);
let sim = bound1.cosine(&bound2);
prop_assert!(sim.abs() < 0.5,
"Bind orthogonality failed: similarity = {} (expected < 0.5)", sim);
}
#[test]
#[ignore] fn bind_inverse_approximate(
data in sparse_vec_strategy(200),
key in sparse_vec_strategy(200)
) {
prop_assume!(nnz(&data) > 10 && nnz(&key) > 20);
let bound = data.bind(&key);
let unbound = bound.bind(&key);
prop_assume!(nnz(&unbound) > 0);
let sim = data.cosine(&unbound);
prop_assert!(sim > 0.5,
"Bind inverse failed: similarity = {} (expected > 0.5)", sim);
}
#[test]
fn bind_distributive_over_bundle(
a in sparse_vec_strategy(150),
b in sparse_vec_strategy(150),
key in sparse_vec_strategy(200)
) {
prop_assume!(nnz(&a) > 0 && nnz(&b) > 0 && nnz(&key) > 0);
let ab = a.bundle(&b);
let left = ab.bind(&key);
let ak = a.bind(&key);
let bk = b.bind(&key);
let right = ak.bundle(&bk);
prop_assume!(nnz(&left) > 0 && nnz(&right) > 0);
let sim = left.cosine(&right);
prop_assert!(sim > 0.8,
"Bind distributivity failed: similarity = {} (expected > 0.8)", sim);
}
#[test]
fn bind_sparsity_preservation(
a in sparse_vec_strategy(200),
b in sparse_vec_strategy(200)
) {
let bound = a.bind(&b);
let nnz_bound = nnz(&bound);
let min_nnz = nnz(&a).min(nnz(&b));
prop_assert!(nnz_bound <= min_nnz,
"Bind exceeded minimum sparsity: {} > {}", nnz_bound, min_nnz);
}
#[test]
fn bind_empty_yields_empty(a in sparse_vec_strategy(200)) {
let empty = SparseVec::new();
let result = a.bind(&empty);
prop_assert!(result.pos.is_empty() && result.neg.is_empty(),
"Bind with empty should yield empty");
}
#[test]
fn bind_triple_self_approximate_identity(a in sparse_vec_strategy(200)) {
prop_assume!(nnz(&a) > 0);
let aa = a.bind(&a);
let aaa = aa.bind(&a);
prop_assume!(nnz(&aaa) > 0);
let sim = a.cosine(&aaa);
prop_assert!(sim > 0.7,
"Triple self-bind failed: similarity = {} (expected > 0.7)", sim);
}
}
proptest! {
#![proptest_config(ProptestConfig {
cases: 1000,
max_shrink_iters: 10000,
.. ProptestConfig::default()
})]
#[test]
fn permute_is_deterministic(
v in sparse_vec_strategy(200),
shift in shift_strategy()
) {
let p1 = v.permute(shift);
let p2 = v.permute(shift);
prop_assert_eq!(p1.pos, p2.pos);
prop_assert_eq!(p1.neg, p2.neg);
}
#[test]
fn permute_reversibility(
v in sparse_vec_strategy(200),
shift in shift_strategy()
) {
let permuted = v.permute(shift);
let recovered = permuted.inverse_permute(shift);
prop_assert_eq!(recovered.pos, v.pos);
prop_assert_eq!(recovered.neg, v.neg);
}
#[test]
fn permute_composition(
v in sparse_vec_strategy(200),
s1 in shift_strategy(),
s2 in shift_strategy()
) {
let p1_p2 = v.permute(s1).permute(s2);
let p_sum = v.permute((s1 + s2) % DIM);
prop_assert_eq!(p1_p2.pos, p_sum.pos);
prop_assert_eq!(p1_p2.neg, p_sum.neg);
}
#[test]
fn permute_preserves_sparsity(
v in sparse_vec_strategy(200),
shift in shift_strategy()
) {
let permuted = v.permute(shift);
prop_assert_eq!(nnz(&permuted), nnz(&v),
"Permute changed sparsity: {} != {}", nnz(&permuted), nnz(&v));
}
#[test]
fn permute_zero_is_identity(v in sparse_vec_strategy(200)) {
let permuted = v.permute(0);
prop_assert_eq!(permuted.pos, v.pos);
prop_assert_eq!(permuted.neg, v.neg);
}
#[test]
fn permute_full_cycle_is_identity(v in sparse_vec_strategy(200)) {
let permuted = v.permute(DIM);
prop_assert_eq!(permuted.pos, v.pos);
prop_assert_eq!(permuted.neg, v.neg);
}
}
proptest! {
#![proptest_config(ProptestConfig {
cases: 1000,
max_shrink_iters: 10000,
.. ProptestConfig::default()
})]
#[test]
fn thin_maintains_target(
v in sparse_vec_strategy(400),
target in 50usize..300
) {
let thinned = v.thin(target);
let nnz_thinned = nnz(&thinned);
prop_assert!(nnz_thinned <= target,
"Thinning exceeded target: {} > {}", nnz_thinned, target);
}
#[test]
fn thin_preserves_similarity(
v in sparse_vec_strategy(400)
) {
prop_assume!(nnz(&v) > 100);
let target = nnz(&v) / 2; let thinned = v.thin(target);
prop_assume!(nnz(&thinned) > 0);
let sim = v.cosine(&thinned);
prop_assert!(sim > 0.5,
"Thinning lost too much similarity: {} (expected > 0.5)", sim);
}
#[test]
fn thin_below_current_is_identity(v in sparse_vec_strategy(200)) {
let current = nnz(&v);
let target = current + 100;
let thinned = v.thin(target);
prop_assert_eq!(thinned.pos, v.pos);
prop_assert_eq!(thinned.neg, v.neg);
}
#[test]
fn thin_to_zero_is_empty(v in sparse_vec_strategy(200)) {
let thinned = v.thin(0);
prop_assert!(thinned.pos.is_empty() && thinned.neg.is_empty(),
"Thin to zero should produce empty vector");
}
#[test]
fn thin_is_deterministic(
v in sparse_vec_strategy(400),
target in 50usize..200
) {
let t1 = v.thin(target);
let t2 = v.thin(target);
prop_assert_eq!(t1.pos, t2.pos);
prop_assert_eq!(t1.neg, t2.neg);
}
}
#[cfg(test)]
mod stress_tests {
use super::*;
#[test]
#[ignore] fn large_file_roundtrip() {
let data: Vec<u8> = (0..1_000_000).map(|i| (i % 256) as u8).collect();
let config = ReversibleVSAConfig::large_blocks();
let encoded = SparseVec::encode_data(&data, &config, None);
let decoded = encoded.decode_data(&config, None, data.len());
let matching = data.iter()
.zip(decoded.iter())
.filter(|(a, b)| a == b)
.count();
let fidelity = matching as f64 / data.len() as f64;
assert!(fidelity > 0.9, "Large file fidelity: {}", fidelity);
}
#[test]
#[ignore] fn very_large_file_sampling() {
let data: Vec<u8> = (0..10_000_000).map(|i| (i % 256) as u8).collect();
let config = ReversibleVSAConfig::large_blocks();
let encoded = SparseVec::encode_data(&data, &config, None);
let decoded = encoded.decode_data(&config, None, data.len());
let samples = 10000;
let mut matching = 0;
for i in (0..data.len()).step_by(data.len() / samples) {
if data.get(i) == decoded.get(i) {
matching += 1;
}
}
let fidelity = matching as f64 / samples as f64;
assert!(fidelity > 0.85, "Very large file sampled fidelity: {}", fidelity);
}
#[test]
#[ignore] fn deep_hierarchy_paths() {
let data = b"test data for deep hierarchy";
let mut path = String::from("/root");
for i in 0..20 {
path.push_str(&format!("/level{}", i));
}
let config = ReversibleVSAConfig::default();
let encoded = SparseVec::encode_data(data, &config, Some(&path));
let decoded = encoded.decode_data(&config, Some(&path), data.len());
assert_eq!(decoded, data, "Deep hierarchy round-trip failed");
}
#[test]
fn high_sparsity_operations() {
let mut v = SparseVec::new();
v.pos = vec![0, 10, 20, 30, 40]; v.neg = vec![1, 11, 21, 31, 41];
let thinned = v.thin(5);
assert!(nnz(&thinned) <= 5, "High sparsity thinning failed");
let bundled = v.bundle(&v);
assert_eq!(bundled.pos, v.pos);
assert_eq!(bundled.neg, v.neg);
}
#[test]
#[allow(deprecated)]
fn bundle_many_vectors() {
let vectors: Vec<SparseVec> = (0..100)
.map(|i| {
let data = format!("vector_{}", i);
SparseVec::from_data(data.as_bytes())
})
.collect();
let mut result = vectors[0].clone();
for v in &vectors[1..] {
result = result.bundle(v);
}
for (i, v) in vectors.iter().enumerate() {
let sim = v.cosine(&result);
assert!(sim > 0.0, "Bundle of many lost similarity to vector {}: {}", i, sim);
}
}
}