use super::*;
use crate::code::ast::{Token, TokenType};
use crate::code::path::AstPath;
#[test]
fn falsify_emb_001_cosine_self_similarity() {
let test_vecs = vec![
vec![1.0, 0.0, 0.0],
vec![0.5, -0.3, 0.8, 0.1],
vec![100.0, -200.0, 300.0],
];
for (idx, v) in test_vecs.iter().enumerate() {
let emb = CodeEmbedding::new(Vector::from_vec(v.clone()));
let sim = emb.cosine_similarity(&emb);
assert!(
(sim - 1.0).abs() < 1e-10,
"FALSIFIED EMB-001: cosine_similarity(x, x) = {sim} != 1.0 for test case {idx}"
);
}
}
#[test]
fn falsify_emb_002_dimension_mismatch() {
let a = CodeEmbedding::new(Vector::from_vec(vec![1.0, 2.0, 3.0]));
let b = CodeEmbedding::new(Vector::from_vec(vec![1.0, 2.0]));
let sim = a.cosine_similarity(&b);
assert!(
sim.abs() < 1e-10,
"FALSIFIED EMB-002: dimension mismatch should return 0.0, got {sim}"
);
}
#[test]
fn falsify_emb_003_attention_weights_sum_to_one() {
let encoder = Code2VecEncoder::new(32).with_seed(42);
let paths = vec![
AstPath::new(
Token::new(TokenType::Identifier, "x"),
vec![AstNodeType::Parameter, AstNodeType::Function],
Token::new(TokenType::Identifier, "y"),
),
AstPath::new(
Token::new(TokenType::Identifier, "a"),
vec![AstNodeType::Return],
Token::new(TokenType::Identifier, "b"),
),
AstPath::new(
Token::new(TokenType::Identifier, "foo"),
vec![
AstNodeType::Function,
AstNodeType::Block,
AstNodeType::Return,
],
Token::new(TokenType::Identifier, "bar"),
),
];
let embedding = encoder.aggregate_paths(&paths);
let weights = embedding
.attention_weights()
.expect("FALSIFIED EMB-003: no attention weights returned");
assert_eq!(
weights.len(),
paths.len(),
"FALSIFIED EMB-003: attention weights len {} != paths len {}",
weights.len(),
paths.len()
);
let sum: f64 = weights.iter().sum();
assert!(
(sum - 1.0).abs() < 1e-6,
"FALSIFIED EMB-003: attention weights sum = {sum} != 1.0"
);
for (i, &w) in weights.iter().enumerate() {
assert!(
w >= 0.0,
"FALSIFIED EMB-003: attention weight[{i}] = {w} < 0"
);
}
}
#[test]
fn falsify_emb_004_deterministic_encoding() {
let path = AstPath::new(
Token::new(TokenType::Identifier, "alpha"),
vec![AstNodeType::Function, AstNodeType::Return],
Token::new(TokenType::Identifier, "beta"),
);
let enc1 = Code2VecEncoder::new(64).with_seed(777);
let enc2 = Code2VecEncoder::new(64).with_seed(777);
let emb1 = enc1.encode_path(&path);
let emb2 = enc2.encode_path(&path);
for (i, (&a, &b)) in emb1.iter().zip(emb2.iter()).enumerate() {
assert!(
(a - b).abs() < 1e-15,
"FALSIFIED EMB-004: encode_path differs at dim {i}: {a} vs {b}"
);
}
}
#[test]
fn falsify_emb_005_aggregate_output_dimension() {
let dims = [4, 16, 64, 128];
for &dim in &dims {
let encoder = Code2VecEncoder::new(dim);
let paths = vec![AstPath::new(
Token::new(TokenType::Identifier, "v"),
vec![AstNodeType::Variable],
Token::new(TokenType::Identifier, "w"),
)];
let embedding = encoder.aggregate_paths(&paths);
assert_eq!(
embedding.dim(),
dim,
"FALSIFIED EMB-005: aggregate dim {} != encoder dim {dim}",
embedding.dim()
);
}
}
#[test]
fn falsify_emb_006_empty_paths_zero_vector() {
let encoder = Code2VecEncoder::new(32);
let embedding = encoder.aggregate_paths(&[]);
assert_eq!(
embedding.dim(),
32,
"FALSIFIED EMB-006: empty aggregate dim {} != 32",
embedding.dim()
);
for (i, &val) in embedding.vector().as_slice().iter().enumerate() {
assert!(
val.abs() < 1e-15,
"FALSIFIED EMB-006: empty aggregate [{i}] = {val} != 0.0"
);
}
}
#[test]
fn falsify_emb_007_zero_vector_cosine() {
let zero = CodeEmbedding::new(Vector::from_vec(vec![0.0, 0.0, 0.0]));
let nonzero = CodeEmbedding::new(Vector::from_vec(vec![1.0, 2.0, 3.0]));
let sim = zero.cosine_similarity(&nonzero);
assert!(
sim.abs() < 1e-10,
"FALSIFIED EMB-007: cos(0, x) = {sim} != 0.0"
);
let sim2 = zero.cosine_similarity(&zero);
assert!(
sim2.abs() < 1e-10,
"FALSIFIED EMB-007: cos(0, 0) = {sim2} != 0.0"
);
}
mod code_emb_proptest_falsify {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(30))]
#[test]
fn falsify_emb_001_prop_cosine_self(
a in -50.0f64..50.0,
b in -50.0f64..50.0,
c in -50.0f64..50.0,
) {
prop_assume!(a.abs() + b.abs() + c.abs() > 1e-6);
let emb = CodeEmbedding::new(Vector::from_vec(vec![a, b, c]));
let sim = emb.cosine_similarity(&emb);
prop_assert!(
(sim - 1.0).abs() < 1e-6,
"FALSIFIED EMB-001-prop: cos(x,x)={} != 1.0 for [{},{},{}]",
sim, a, b, c
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(30))]
#[test]
fn falsify_emb_006_prop_cosine_bounded(
a1 in -50.0f64..50.0,
a2 in -50.0f64..50.0,
b1 in -50.0f64..50.0,
b2 in -50.0f64..50.0,
) {
prop_assume!(a1.abs() + a2.abs() > 1e-6);
prop_assume!(b1.abs() + b2.abs() > 1e-6);
let ea = CodeEmbedding::new(Vector::from_vec(vec![a1, a2]));
let eb = CodeEmbedding::new(Vector::from_vec(vec![b1, b2]));
let sim = ea.cosine_similarity(&eb);
prop_assert!(
(-1.0 - 1e-6..=1.0 + 1e-6).contains(&sim),
"FALSIFIED EMB-006-prop: cos(a,b)={} out of [-1,1]",
sim
);
}
}
}