#![allow(clippy::unwrap_used)]
use super::*;
use crate::autograd::matmul;
use crate::Tensor;
#[test]
fn test_falsify_f_math_001_ba_zero_at_init_comprehensive() {
let dims = [(32, 64), (64, 64), (128, 256), (256, 128), (512, 512)];
let ranks = [4, 8, 16, 32, 64, 128];
for &(d_out, d_in) in &dims {
for &r in &ranks {
if r > d_out.min(d_in) {
continue; }
let base_data: Vec<f32> =
(0..d_out * d_in).map(|i| (i as f32 * 0.07).sin() * 0.5).collect();
let base_weight = Tensor::from_vec(base_data, false);
let lora = LoRALayer::new(base_weight, d_out, d_in, r, r as f32);
let ba = matmul(lora.lora_b(), lora.lora_a(), d_out, r, d_in);
let max_abs = ba.data().iter().map(|v| v.abs()).fold(0.0f32, f32::max);
assert!(
max_abs < 1e-10,
"F-MATH-001 violated: B@A != 0 at init for d_out={d_out}, d_in={d_in}, r={r}. \
max|B@A| = {max_abs}"
);
}
}
}
#[test]
fn test_falsify_f_math_007_trainable_param_count() {
let configs = [
(64, 64, 4),
(64, 64, 8),
(64, 64, 16),
(128, 64, 8),
(256, 128, 16),
(512, 512, 32),
(768, 768, 64),
(1024, 256, 128),
(4, 4, 2), (3, 5, 1), ];
for &(d_out, d_in, rank) in &configs {
let base_weight = Tensor::from_vec(vec![0.0; d_out * d_in], false);
let mut lora = LoRALayer::new(base_weight, d_out, d_in, rank, rank as f32);
let params = lora.trainable_params();
assert_eq!(params.len(), 2, "Should have exactly 2 trainable param tensors (A and B)");
let total_elements: usize = params.iter().map(|p| p.len()).sum();
let expected = rank * d_in + d_out * rank;
assert_eq!(
total_elements, expected,
"F-MATH-007 violated: trainable elements = {total_elements}, \
expected r*(d_in+d_out) = {expected} for d_out={d_out}, d_in={d_in}, r={rank}"
);
assert_eq!(
params[0].len(),
rank * d_in,
"A matrix should have r*d_in = {} elements",
rank * d_in
);
assert_eq!(
params[1].len(),
d_out * rank,
"B matrix should have d_out*r = {} elements",
d_out * rank
);
for (i, p) in params.iter().enumerate() {
assert!(p.requires_grad(), "F-MATH-007: trainable param {i} must require grad");
}
}
}
#[test]
fn test_falsify_f_math_008_nf4_dequantize_tolerance() {
use crate::quant::{dequantize_4bit, quantize_4bit};
let test_vectors: Vec<Vec<f32>> = vec![
(0..256).map(|i| (i as f32 - 128.0) * 0.01).collect(),
(0..256).map(|i| (i as f32 * 0.05).sin() * 0.5).collect(),
(0..256).map(|i| (i as f32 * 0.1).cos() * 2.0).collect(),
(0..256).map(|i| (i as f32 * 0.001).sin() * 0.01).collect(),
vec![0.0; 256],
vec![0.42; 256],
];
for (vec_idx, original) in test_vectors.iter().enumerate() {
let quantized = quantize_4bit(original);
let recovered = dequantize_4bit(&quantized);
assert_eq!(
original.len(),
recovered.len(),
"F-MATH-008: length mismatch after round-trip for vector {vec_idx}"
);
let mut max_abs_err = 0.0f32;
let mut max_rel_err = 0.0f32;
for (i, (&orig, &rec)) in original.iter().zip(recovered.iter()).enumerate() {
let abs_err = (orig - rec).abs();
max_abs_err = max_abs_err.max(abs_err);
if orig.abs() > 1e-6 {
let rel_err = abs_err / orig.abs();
max_rel_err = max_rel_err.max(rel_err);
}
assert!(rec.is_finite(), "F-MATH-008: dequantized value at [{i}] is not finite: {rec}");
}
let absmax = original.iter().map(|v| v.abs()).fold(0.0f32, f32::max);
let tolerance = if absmax < 1e-6 {
1e-6 } else {
absmax * 0.35 };
assert!(
max_abs_err <= tolerance,
"F-MATH-008 violated: max abs error {max_abs_err} > tolerance {tolerance} \
for vector {vec_idx} (absmax={absmax})"
);
}
}
fn make_lora_trainer_and_train(num_steps: usize) -> crate::train::TransformerTrainer {
use crate::train::{LMBatch, TransformerTrainConfig};
use crate::transformer::TransformerConfig;
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(4, 8.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.01)
.with_max_steps(num_steps)
.with_use_cuda(false);
let mut trainer = crate::train::TransformerTrainer::new(config);
let seq_len = 8;
let input_ids: Vec<u32> = (0..seq_len).map(|i| (i as u32) % 100 + 1).collect();
let target_ids: Vec<u32> = (0..seq_len).map(|i| (i as u32 + 1) % 100 + 1).collect();
for _ in 0..num_steps {
let batch = LMBatch::single(input_ids.clone(), target_ids.clone());
let _loss = trainer.train_batch(&batch);
}
trainer
}
fn hash_f32_slice(data: &[f32]) -> [u8; 32] {
let bytes: &[u8] = bytemuck::cast_slice(data);
let mut hash = [0u8; 32];
for chunk_idx in 0..32 {
let mut h: u64 = 0xcbf29ce484222325;
for (i, &b) in bytes.iter().enumerate() {
h ^= u64::from(b).wrapping_add(chunk_idx as u64);
h = h.wrapping_mul(0x100000001b3);
h ^= i as u64;
}
hash[chunk_idx] = (h & 0xff) as u8;
}
hash
}
#[test]
fn test_falsify_f_freeze_003_base_weights_unchanged_after_100_steps() {
use crate::train::{LMBatch, TransformerTrainConfig};
use crate::transformer::TransformerConfig;
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(4, 8.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.01)
.with_use_cuda(false);
let mut trainer = crate::train::TransformerTrainer::new(config);
let pre_snapshots: Vec<(String, Vec<f32>)> = trainer
.model()
.named_parameters()
.into_iter()
.map(|(name, tensor)| (name, tensor.data().to_vec()))
.collect();
let pre_hashes: Vec<(String, [u8; 32])> =
pre_snapshots.iter().map(|(name, data)| (name.clone(), hash_f32_slice(data))).collect();
let seq_len = 8;
let input_ids: Vec<u32> = (0..seq_len).map(|i| (i as u32) % 100 + 1).collect();
let target_ids: Vec<u32> = (0..seq_len).map(|i| (i as u32 + 1) % 100 + 1).collect();
for _ in 0..100 {
let batch = LMBatch::single(input_ids.clone(), target_ids.clone());
let _loss = trainer.train_batch(&batch);
}
let post_params = trainer.model().named_parameters();
for (pre_name, pre_hash) in &pre_hashes {
if pre_name.contains("norm") {
continue;
}
let post_tensor = post_params
.iter()
.find(|(name, _)| name == pre_name)
.map_or_else(|| panic!("Parameter {pre_name} disappeared after training"), |(_, t)| t);
let post_data = post_tensor.data().to_vec();
let post_hash = hash_f32_slice(&post_data);
assert_eq!(
pre_hash, &post_hash,
"F-FREEZE-003 violated: base weight '{pre_name}' changed after 100 LoRA steps \
(hash mismatch)"
);
}
}
#[test]
fn test_falsify_f_freeze_004_lora_a_changes_after_training() {
use crate::train::{LMBatch, TransformerTrainConfig};
use crate::transformer::TransformerConfig;
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(4, 8.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.01)
.with_use_cuda(false);
let mut trainer = crate::train::TransformerTrainer::new(config);
let initial_a: Vec<Vec<f32>> =
trainer.lora_layers().unwrap().iter().map(|l| l.lora_a().data().to_vec()).collect();
let seq_len = 8;
let input_ids: Vec<u32> = (1..=seq_len as u32).collect();
let target_ids: Vec<u32> = (2..=seq_len as u32 + 1).collect();
for _ in 0..50 {
let batch = LMBatch::single(input_ids.clone(), target_ids.clone());
let _loss = trainer.train_batch(&batch);
}
let mut any_changed = false;
for (layer_idx, lora_layer) in trainer.lora_layers().unwrap().iter().enumerate() {
let current_a = lora_layer.lora_a().data();
let init_a = &initial_a[layer_idx];
let max_diff =
current_a.iter().zip(init_a.iter()).map(|(c, i)| (c - i).abs()).fold(0.0f32, f32::max);
if max_diff > 1e-8 {
any_changed = true;
}
}
assert!(
any_changed,
"F-FREEZE-004 violated: NO LoRA A matrix changed after 50 training steps. \
Gradients may not be flowing to A."
);
}
#[test]
fn test_falsify_f_freeze_005_lora_b_changes_after_training() {
use crate::train::{LMBatch, TransformerTrainConfig};
use crate::transformer::TransformerConfig;
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(4, 8.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.01)
.with_use_cuda(false);
let mut trainer = crate::train::TransformerTrainer::new(config);
let initial_b: Vec<Vec<f32>> =
trainer.lora_layers().unwrap().iter().map(|l| l.lora_b().data().to_vec()).collect();
for (i, b) in initial_b.iter().enumerate() {
let max_val = b.iter().map(|v| v.abs()).fold(0.0f32, f32::max);
assert!(
max_val < 1e-10,
"Pre-condition: B[{i}] should be initialized to zeros, but max|B| = {max_val}"
);
}
let seq_len = 8;
let input_ids: Vec<u32> = (1..=seq_len as u32).collect();
let target_ids: Vec<u32> = (2..=seq_len as u32 + 1).collect();
for _ in 0..50 {
let batch = LMBatch::single(input_ids.clone(), target_ids.clone());
let _loss = trainer.train_batch(&batch);
}
let mut any_changed = false;
for lora_layer in trainer.lora_layers().unwrap() {
let current_b = lora_layer.lora_b().data();
let max_abs = current_b.iter().map(|v| v.abs()).fold(0.0f32, f32::max);
if max_abs > 1e-8 {
any_changed = true;
}
}
assert!(
any_changed,
"F-FREEZE-005 violated: NO LoRA B matrix changed after 50 training steps. \
B matrices are still all zeros. Gradients may not be flowing to B."
);
}
#[test]
fn test_falsify_f_freeze_007_optimizer_no_base_weight_entries() {
let trainer = make_lora_trainer_and_train(5);
let num_lora_layers = trainer.lora_layers().unwrap().len();
let num_lora_params = num_lora_layers * 2;
let num_transformer_layers = trainer.model().layers.len();
let num_norm_params = num_transformer_layers * 2 + 1;
let expected_trainable_count = num_lora_params + num_norm_params;
let total_model_params = trainer.model().parameters().len();
assert!(
total_model_params > expected_trainable_count,
"Precondition: model has {total_model_params} params, \
trainable should be only {expected_trainable_count}"
);
assert!(trainer.is_lora(), "Trainer must be in LoRA mode");
let lora_layers = trainer.lora_layers().unwrap();
let mut trainable_count = 0;
for _layer in lora_layers {
trainable_count += 1; trainable_count += 1; }
trainable_count += num_transformer_layers * 2 + 1;
assert_eq!(
trainable_count, expected_trainable_count,
"F-FREEZE-007: trainable param count mismatch: \
got {trainable_count}, expected {expected_trainable_count}"
);
}
#[test]
fn test_falsify_f_freeze_008_base_weights_not_updated_despite_grad() {
use crate::train::{LMBatch, TransformerTrainConfig};
use crate::transformer::TransformerConfig;
let model_config = TransformerConfig::tiny();
let config = TransformerTrainConfig::new(model_config.clone())
.with_lora(4, 8.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.01)
.with_use_cuda(false);
let model = crate::transformer::Transformer::new(&model_config);
let pre_weights: Vec<(String, Vec<f32>)> = model
.named_parameters()
.into_iter()
.filter(|(name, _)| {
!name.contains("norm") && !name.contains("embed") && !name.contains("lm_head")
})
.map(|(name, t)| (name, t.data().to_vec()))
.collect();
let mut trainer = crate::train::TransformerTrainer::with_model(model, config);
let batch = LMBatch::single(vec![1, 2, 3, 4, 5, 6, 7, 8], vec![2, 3, 4, 5, 6, 7, 8, 9]);
for _ in 0..10 {
trainer.train_batch(&batch);
}
for (name, pre_data) in &pre_weights {
let post_data: Vec<f32> = trainer
.model()
.named_parameters()
.into_iter()
.find(|(n, _)| n == name)
.map(|(_, t)| t.data().to_vec())
.expect("parameter should exist");
assert_eq!(
pre_data, &post_data,
"F-FREEZE-008 violated: base weight '{name}' was updated by optimizer during LoRA training"
);
}
}