use super::*;
use crate::transformer::{Transformer, TransformerConfig};
fn make_training_batches(count: usize, seq_len: usize) -> Vec<LMBatch> {
(0..count)
.map(|i| {
let offset = (i * 7) as u32 % 200;
let input_ids: Vec<u32> =
(0..seq_len).map(|j| (offset + j as u32 + 1) % 250 + 1).collect();
let target_ids: Vec<u32> = input_ids.iter().map(|&x| (x % 250) + 1).collect();
LMBatch::single(input_ids, target_ids)
})
.collect()
}
#[test]
fn test_falsify_f_conv_003_cpu_lora_loss_decreases() {
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(8, 16.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.01)
.with_max_steps(200);
let mut trainer = TransformerTrainer::new(config);
let batch = LMBatch::single(vec![1, 2, 3, 4, 5, 6, 7, 8], vec![2, 3, 4, 5, 6, 7, 8, 9]);
let first_loss = trainer.train_batch(&batch);
assert!(first_loss > 0.0, "Initial loss must be positive");
let mut last_loss = first_loss;
for _ in 1..200 {
last_loss = trainer.train_batch(&batch);
}
assert!(
last_loss < first_loss,
"F-CONV-003 FALSIFIED: loss after 200 steps ({last_loss:.4}) \
should be less than initial loss ({first_loss:.4})"
);
}
#[test]
fn test_falsify_f_conv_005_rslora_stable_high_rank() {
let rank = 64;
let alpha = rank as f32 * 2.0;
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(rank, alpha, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.05)
.with_max_steps(200);
let mut trainer = TransformerTrainer::new(config);
let batch = LMBatch::single(vec![1, 2, 3, 4, 5, 6, 7, 8], vec![2, 3, 4, 5, 6, 7, 8, 9]);
let first_loss = trainer.train_batch(&batch);
assert!(first_loss.is_finite(), "Initial loss must be finite");
let mut any_diverged = false;
let mut last_loss = first_loss;
for _ in 1..200 {
last_loss = trainer.train_batch(&batch);
if !last_loss.is_finite() || last_loss > first_loss * 10.0 {
any_diverged = true;
break;
}
}
assert!(!any_diverged, "F-CONV-005 FALSIFIED: rsLoRA diverged at rank={rank}");
assert!(
last_loss < first_loss,
"F-CONV-005 FALSIFIED: rsLoRA at rank={rank} did not reduce loss \
(initial={first_loss:.4}, final={last_loss:.4})"
);
}
#[test]
#[ignore = "non-deterministic: higher LoRA rank can overparameterize, causing optimization instability"]
fn test_falsify_f_conv_006_higher_rank_lower_loss() {
let ranks = [4, 16, 32];
let mut final_losses = Vec::new();
let batch = LMBatch::single(vec![1, 2, 3, 4, 5, 6, 7, 8], vec![2, 3, 4, 5, 6, 7, 8, 9]);
for &rank in &ranks {
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(rank, rank as f32 * 2.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.05)
.with_max_steps(200);
let mut trainer = TransformerTrainer::new(config);
for _ in 0..200 {
trainer.train_batch(&batch);
}
let (loss, _, _) =
trainer.forward_single(&[1, 2, 3, 4, 5, 6, 7, 8], &[2, 3, 4, 5, 6, 7, 8, 9]);
final_losses.push((rank, loss));
}
let loss_r4 = final_losses[0].1;
let loss_r32 = final_losses[2].1;
assert!(
loss_r32 <= loss_r4 * 1.05,
"F-CONV-006 FALSIFIED: rank=32 loss ({loss_r32:.4}) much worse than rank=4 loss ({loss_r4:.4}). \
All losses: {final_losses:?}"
);
}
#[test]
#[ignore = "non-deterministic: 200-step LoRA training may not converge with random init in CI"]
fn test_falsify_f_conv_007_lora_loss_less_than_random() {
let baseline_config = TransformerTrainConfig::new(TransformerConfig::tiny()).with_lora(
8,
16.0,
vec!["q_proj".to_string(), "v_proj".to_string()],
);
let baseline_trainer = TransformerTrainer::new(baseline_config);
let (baseline_loss, _, _) =
baseline_trainer.forward_single(&[1, 2, 3, 4, 5, 6, 7, 8], &[2, 3, 4, 5, 6, 7, 8, 9]);
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(8, 16.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.01)
.with_max_steps(200);
let mut trainer = TransformerTrainer::new(config);
let batches = make_training_batches(200, 8);
for batch in &batches {
trainer.train_batch(batch);
}
let (trained_loss, _, _) =
trainer.forward_single(&[1, 2, 3, 4, 5, 6, 7, 8], &[2, 3, 4, 5, 6, 7, 8, 9]);
assert!(
trained_loss < baseline_loss,
"F-CONV-007 FALSIFIED: trained LoRA loss ({trained_loss:.4}) \
should be < untrained baseline ({baseline_loss:.4})"
);
}
#[test]
fn test_falsify_f_conv_008_no_catastrophic_forgetting() {
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(8, 16.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.01);
let mut trainer = TransformerTrainer::new(config);
let batches = make_training_batches(50, 8);
let epoch1_loss = trainer.train_epoch(&batches);
assert!(epoch1_loss > 0.0, "Epoch 1 loss must be positive");
let _epoch2_loss = trainer.train_epoch(&batches);
let epoch3_loss = trainer.train_epoch(&batches);
assert!(
epoch3_loss < epoch1_loss,
"F-CONV-008 FALSIFIED: catastrophic forgetting detected. \
epoch3 loss ({epoch3_loss:.4}) should be < epoch1 loss ({epoch1_loss:.4})"
);
}
#[test]
fn test_falsify_f_edge_001_rank_one() {
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(1, 2.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.01)
.with_max_steps(50);
let mut trainer = TransformerTrainer::new(config);
assert!(trainer.is_lora(), "LoRA should be active with rank=1");
let batches = make_training_batches(50, 8);
let first_loss = trainer.train_batch(&batches[0]);
for batch in &batches[1..] {
trainer.train_batch(batch);
}
let (final_loss, _, _) =
trainer.forward_single(&[1, 2, 3, 4, 5, 6, 7, 8], &[2, 3, 4, 5, 6, 7, 8, 9]);
assert!(
final_loss < first_loss,
"F-EDGE-001 FALSIFIED: rank=1 LoRA did not decrease loss \
(initial={first_loss:.4}, final={final_loss:.4})"
);
}
#[test]
fn test_falsify_f_edge_002_full_rank() {
let model_config = TransformerConfig::tiny();
let full_rank = model_config.hidden_size;
let config = TransformerTrainConfig::new(model_config)
.with_lora(
full_rank,
full_rank as f32 * 2.0,
vec!["q_proj".to_string(), "v_proj".to_string()],
)
.with_lr(0.01)
.with_max_steps(50);
let mut trainer = TransformerTrainer::new(config);
assert!(trainer.is_lora(), "LoRA should be active at full rank");
let lora = trainer.lora_layers().expect("LoRA layers should exist");
assert_eq!(lora[0].rank(), full_rank, "LoRA rank should equal hidden_size");
let batches = make_training_batches(50, 8);
let first_loss = trainer.train_batch(&batches[0]);
assert!(first_loss.is_finite(), "Full-rank LoRA must produce finite loss");
for batch in &batches[1..] {
let loss = trainer.train_batch(batch);
assert!(loss.is_finite(), "Full-rank LoRA loss must stay finite");
}
let (final_loss, _, _) =
trainer.forward_single(&[1, 2, 3, 4, 5, 6, 7, 8], &[2, 3, 4, 5, 6, 7, 8, 9]);
assert!(
final_loss < first_loss,
"F-EDGE-002 FALSIFIED: full-rank LoRA (r={full_rank}) did not decrease loss \
(initial={first_loss:.4}, final={final_loss:.4})"
);
}
#[test]
fn test_falsify_f_edge_003_single_target_module() {
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(4, 8.0, vec!["q_proj".to_string()])
.with_lr(0.01)
.with_max_steps(10);
let mut trainer = TransformerTrainer::new(config);
let lora = trainer.lora_layers().expect("LoRA layers should exist");
assert_eq!(lora.len(), 2, "Targeting only q_proj on 2-layer model should create 2 LoRA layers");
let b_before: Vec<Vec<f32>> = lora.iter().map(|l| l.lora_b().data().to_vec()).collect();
let batches = make_training_batches(10, 8);
for batch in &batches {
trainer.train_batch(batch);
}
let lora_after = trainer.lora_layers().expect("LoRA layers should exist");
let mut any_b_changed = false;
for (i, layer) in lora_after.iter().enumerate() {
let b_data = layer.lora_b().data();
let max_diff: f32 =
b_data.iter().zip(&b_before[i]).map(|(a, b)| (a - b).abs()).fold(0.0f32, f32::max);
if max_diff > 1e-10 {
any_b_changed = true;
}
}
assert!(
any_b_changed,
"F-EDGE-003 FALSIFIED: LoRA B weights for q_proj did not change after training"
);
let named = trainer.model().named_parameters();
let has_frozen_v_proj = named.iter().any(|(name, _)| name.contains("v_proj"));
assert!(has_frozen_v_proj, "Model should still have v_proj weights (just not adapted)");
}
#[test]
fn test_falsify_f_edge_006_no_lora_full_finetune() {
let model_config = TransformerConfig::tiny();
let config = TransformerTrainConfig::new(model_config.clone()).with_lr(0.01);
assert!(!config.is_lora(), "Config without .with_lora() should not be LoRA");
let model = Transformer::new(&model_config);
let weights_before: Vec<(String, Vec<f32>)> =
model.named_parameters().into_iter().map(|(name, t)| (name, t.data().to_vec())).collect();
let mut trainer = TransformerTrainer::with_model(model, config);
assert!(!trainer.is_lora(), "Trainer should not be in LoRA mode");
assert!(trainer.lora_layers().is_none(), "No LoRA layers should exist");
let batches = make_training_batches(10, 8);
for batch in &batches {
trainer.train_batch(batch);
}
let weights_after: Vec<(String, Vec<f32>)> = trainer
.model()
.named_parameters()
.into_iter()
.map(|(name, t)| (name, t.data().to_vec()))
.collect();
let mut changed_count = 0;
for ((name_b, data_b), (_name_a, data_a)) in weights_before.iter().zip(&weights_after) {
if data_b == data_a {
eprintln!("F-EDGE-006 WARNING: weight '{name_b}' unchanged in full FT");
} else {
changed_count += 1;
}
}
assert!(changed_count > 0, "F-EDGE-006 FALSIFIED: no weights changed during full fine-tuning");
let attn_changed =
weights_before.iter().zip(&weights_after).any(|((name, before), (_, after))| {
(name.contains("q_proj") || name.contains("v_proj")) && before != after
});
assert!(
attn_changed,
"F-EDGE-006 FALSIFIED: attention weights did not change in full fine-tuning"
);
}
#[test]
fn test_falsify_f_edge_007_grad_accumulation_with_lora() {
let config = TransformerTrainConfig::new(TransformerConfig::tiny())
.with_lora(8, 16.0, vec!["q_proj".to_string(), "v_proj".to_string()])
.with_lr(0.05)
.with_accumulation_steps(4);
let mut trainer = TransformerTrainer::new(config);
assert!(trainer.is_lora(), "LoRA should be active");
let batch = LMBatch::single(vec![1, 2, 3, 4, 5, 6, 7, 8], vec![2, 3, 4, 5, 6, 7, 8, 9]);
for _ in 0..3 {
trainer.train_batch(&batch);
}
assert_eq!(
trainer.step(),
0,
"No optimizer step should occur before accumulation_steps batches"
);
trainer.train_batch(&batch);
assert_eq!(trainer.step(), 1, "Optimizer step should occur after accumulation_steps=4 batches");
for _ in 0..36 {
trainer.train_batch(&batch);
}
let lora = trainer.lora_layers().expect("LoRA layers should exist");
let any_nonzero_b = lora.iter().any(|l| l.lora_b().data().iter().any(|&x| x.abs() > 1e-10));
assert!(
any_nonzero_b,
"F-EDGE-007 FALSIFIED: LoRA B weights are still zero after gradient accumulation training"
);
}