use scirs2_core::ndarray::{Array1, Array2};
use crate::{NeuralError, Result};
#[derive(Debug, Clone)]
pub struct VeraConfig {
pub rank: usize,
pub alpha: f64,
pub projection_seed: u64,
}
impl Default for VeraConfig {
fn default() -> Self {
Self {
rank: 64,
alpha: 1.0,
projection_seed: 42,
}
}
}
pub struct VeraLayer {
weight: Array2<f64>,
pub b_scale: Array1<f64>,
pub d_scale: Array1<f64>,
out_features: usize,
in_features: usize,
scaling: f64,
config: VeraConfig,
}
impl VeraLayer {
pub fn new(weight: Array2<f64>, config: &VeraConfig) -> Result<Self> {
if config.rank == 0 {
return Err(NeuralError::InvalidArgument(
"VeRA rank must be > 0".to_string(),
));
}
let (out_f, in_f) = (weight.nrows(), weight.ncols());
let scaling = config.alpha / config.rank as f64;
let b_init = 1.0 / (config.rank as f64).sqrt();
Ok(Self {
weight,
b_scale: Array1::from_elem(config.rank, b_init),
d_scale: Array1::ones(out_f),
out_features: out_f,
in_features: in_f,
scaling,
config: config.clone(),
})
}
pub fn generate_a(&self) -> Array2<f64> {
let mut state = self.config.projection_seed;
let r = self.config.rank;
let scale = 1.0 / (r as f64).sqrt();
Array2::from_shape_fn((r, self.in_features), |_| {
state = lcg_next(state);
lcg_to_uniform(state) * scale
})
}
pub fn generate_b(&self) -> Array2<f64> {
let mut state = self
.config
.projection_seed
.wrapping_add(0xDEAD_BEEF_DEAD_BEEF);
let r = self.config.rank;
let scale = 1.0 / (r as f64).sqrt();
Array2::from_shape_fn((self.out_features, r), |_| {
state = lcg_next(state);
lcg_to_uniform(state) * scale
})
}
pub fn compute_delta(&self) -> Array2<f64> {
let a = self.generate_a(); let b_mat = self.generate_b();
let mut scaled_a = a;
for (i, mut row) in scaled_a.rows_mut().into_iter().enumerate() {
let s = self.b_scale[i];
row.mapv_inplace(|v| v * s);
}
let mut delta = b_mat.dot(&scaled_a) * self.scaling;
for (i, mut row) in delta.rows_mut().into_iter().enumerate() {
let s = self.d_scale[i];
row.mapv_inplace(|v| v * s);
}
delta
}
pub fn effective_weight(&self) -> Array2<f64> {
&self.weight + &self.compute_delta()
}
pub fn forward(&self, input: &Array2<f64>) -> Result<Array2<f64>> {
if input.ncols() != self.in_features {
return Err(NeuralError::DimensionMismatch(format!(
"VeRA expects {} input features, got {}",
self.in_features,
input.ncols()
)));
}
Ok(input.dot(&self.effective_weight().t()))
}
pub fn n_trainable_params(&self) -> usize {
self.b_scale.len() + self.d_scale.len()
}
pub fn dims(&self) -> (usize, usize) {
(self.out_features, self.in_features)
}
pub fn weight(&self) -> &Array2<f64> {
&self.weight
}
}
#[derive(Debug, Clone)]
pub struct VeRAConfig {
pub rank: usize,
pub alpha: f64,
pub seed: u64,
}
impl Default for VeRAConfig {
fn default() -> Self {
Self {
rank: 8,
alpha: 16.0,
seed: 42,
}
}
}
#[derive(Debug, Clone)]
pub struct SharedRandomMatrices {
pub a: Array2<f64>,
pub b: Array2<f64>,
}
impl SharedRandomMatrices {
pub fn new(in_features: usize, out_features: usize, rank: usize, seed: u64) -> Self {
let a = Self::fill_normal(rank, in_features, seed, rank);
let b = Array2::zeros((out_features, rank));
Self { a, b }
}
fn fill_normal(rows: usize, cols: usize, seed: u64, norm_rows: usize) -> Array2<f64> {
let sigma = 1.0 / (norm_rows as f64).sqrt();
let mut state = if seed == 0 {
0xCAFE_BABE_DEAD_BEEF
} else {
seed
};
let total = rows * cols;
let mut values = Vec::with_capacity(total);
let mut i = 0usize;
while i < total {
state = lcg_next(state);
let u1 = lcg_to_unit(state);
state = lcg_next(state);
let u2 = lcg_to_unit(state);
let r = (-2.0 * u1.ln()).sqrt();
let theta = std::f64::consts::TAU * u2;
values.push(r * theta.cos() * sigma);
i += 1;
if i < total {
values.push(r * theta.sin() * sigma);
i += 1;
}
}
Array2::from_shape_vec((rows, cols), values).unwrap_or_else(|_| Array2::zeros((rows, cols)))
}
}
pub struct VeRALayer {
pub weight: Array2<f64>,
pub d: Array1<f64>,
pub b_vec: Array1<f64>,
pub shared: SharedRandomMatrices,
config: VeRAConfig,
merged: bool,
out_features: usize,
in_features: usize,
}
impl VeRALayer {
pub fn new(
weight: Array2<f64>,
shared: SharedRandomMatrices,
config: &VeRAConfig,
) -> Result<Self> {
let out_f = weight.nrows();
let in_f = weight.ncols();
let rank = config.rank;
if shared.a.nrows() != rank {
return Err(NeuralError::DimensionMismatch(format!(
"shared.a must have {} rows (rank), got {}",
rank,
shared.a.nrows()
)));
}
if shared.a.ncols() != in_f {
return Err(NeuralError::DimensionMismatch(format!(
"shared.a must have {} cols (in_features={in_f}), got {}",
in_f,
shared.a.ncols()
)));
}
if shared.b.nrows() != out_f {
return Err(NeuralError::DimensionMismatch(format!(
"shared.b must have {} rows (out_features={out_f}), got {}",
out_f,
shared.b.nrows()
)));
}
if shared.b.ncols() != rank {
return Err(NeuralError::DimensionMismatch(format!(
"shared.b must have {} cols (rank), got {}",
rank,
shared.b.ncols()
)));
}
Ok(Self {
weight,
d: Array1::from_elem(rank, 0.01),
b_vec: Array1::from_elem(out_f, 0.01),
shared,
config: config.clone(),
merged: false,
out_features: out_f,
in_features: in_f,
})
}
#[inline]
pub fn scaling(&self) -> f64 {
self.config.alpha / self.config.rank as f64
}
pub fn delta_weight(&self) -> Array2<f64> {
let a = &self.shared.a; let b_mat = &self.shared.b; let scaling = self.scaling();
let mut scaled_a = a.clone();
for (i, mut row) in scaled_a.rows_mut().into_iter().enumerate() {
let s = self.d[i];
row.mapv_inplace(|v| v * s);
}
let mut delta = b_mat.dot(&scaled_a) * scaling;
for (i, mut row) in delta.rows_mut().into_iter().enumerate() {
let s = self.b_vec[i];
row.mapv_inplace(|v| v * s);
}
delta
}
pub fn effective_weight(&self) -> Array2<f64> {
if self.merged {
self.weight.clone()
} else {
&self.weight + &self.delta_weight()
}
}
pub fn merge(&mut self) -> Result<()> {
if self.merged {
return Err(NeuralError::InvalidArgument(
"VeRALayer is already merged".to_string(),
));
}
let delta = self.delta_weight();
self.weight = &self.weight + δ
self.merged = true;
Ok(())
}
pub fn unmerge(&mut self) -> Result<()> {
if !self.merged {
return Err(NeuralError::InvalidArgument(
"VeRALayer is not merged".to_string(),
));
}
self.merged = false;
let delta = self.delta_weight();
self.weight = &self.weight - δ
Ok(())
}
pub fn forward(&self, input: &Array2<f64>) -> Result<Array2<f64>> {
if input.ncols() != self.in_features {
return Err(NeuralError::DimensionMismatch(format!(
"VeRALayer expects {} input features, got {}",
self.in_features,
input.ncols()
)));
}
Ok(self.effective_weight().dot(&input.t()).t().to_owned())
}
pub fn trainable_params(&self) -> usize {
self.d.len() + self.b_vec.len()
}
pub fn total_params(&self) -> usize {
self.weight.len()
}
pub fn is_merged(&self) -> bool {
self.merged
}
pub fn config(&self) -> &VeRAConfig {
&self.config
}
pub fn dims(&self) -> (usize, usize) {
(self.out_features, self.in_features)
}
}
#[inline]
fn lcg_next(state: u64) -> u64 {
state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407)
}
#[inline]
fn lcg_to_uniform(state: u64) -> f64 {
let frac = (state >> 11) as f64 / (1u64 << 53) as f64;
frac * 2.0 - 1.0
}
#[inline]
fn lcg_to_unit(state: u64) -> f64 {
let frac = (state >> 11) as f64 / (1u64 << 53) as f64;
if frac == 0.0 {
f64::EPSILON
} else {
frac
}
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::{Array1, Array2};
fn make_vera_layer(out: usize, in_f: usize, rank: usize) -> VeraLayer {
let w = Array2::from_shape_fn((out, in_f), |(i, j)| (i * in_f + j) as f64 * 0.1);
VeraLayer::new(
w,
&VeraConfig {
rank,
..Default::default()
},
)
.expect("VeraLayer::new failed")
}
#[test]
fn vera_delta_shape() {
let layer = make_vera_layer(8, 6, 4);
let delta = layer.compute_delta();
assert_eq!(delta.shape(), &[8, 6]);
}
#[test]
fn vera_zero_b_scale() {
let mut layer = make_vera_layer(4, 6, 8);
layer.b_scale.fill(0.0);
let delta = layer.compute_delta();
for v in delta.iter() {
assert!(v.abs() < 1e-14, "expected zero delta, got {v}");
}
}
#[test]
fn vera_zero_d_scale() {
let mut layer = make_vera_layer(4, 6, 8);
layer.d_scale.fill(0.0);
let delta = layer.compute_delta();
for v in delta.iter() {
assert!(v.abs() < 1e-14, "expected zero delta, got {v}");
}
}
#[test]
fn vera_reproducible() {
let w = Array2::<f64>::eye(6);
let cfg = VeraConfig {
rank: 4,
projection_seed: 1234,
..Default::default()
};
let l1 = VeraLayer::new(w.clone(), &cfg).expect("l1");
let l2 = VeraLayer::new(w, &cfg).expect("l2");
let a1 = l1.generate_a();
let a2 = l2.generate_a();
let b1 = l1.generate_b();
let b2 = l2.generate_b();
for (x, y) in a1.iter().zip(a2.iter()) {
assert!((x - y).abs() < 1e-15, "A matrices differ");
}
for (x, y) in b1.iter().zip(b2.iter()) {
assert!((x - y).abs() < 1e-15, "B matrices differ");
}
}
#[test]
fn vera_forward_output_shape() {
let layer = make_vera_layer(5, 7, 16);
let input = Array2::from_elem((3, 7), 0.5);
let out = layer.forward(&input).expect("forward");
assert_eq!(out.shape(), &[3, 5]);
}
#[test]
fn vera_n_params_efficient() {
let rank = 16_usize;
let out = 64_usize;
let in_f = 64_usize;
let layer = make_vera_layer(out, in_f, rank);
let expected = rank + out;
assert_eq!(layer.n_trainable_params(), expected);
let lora_params = rank * in_f + out * rank; assert!(
layer.n_trainable_params() < lora_params,
"VeRA ({}) should use fewer params than LoRA ({})",
layer.n_trainable_params(),
lora_params
);
}
#[test]
fn vera_effective_weight_changes_with_scale() {
let layer = make_vera_layer(4, 6, 8);
let base = layer.effective_weight();
let mut layer2 = make_vera_layer(4, 6, 8);
layer2.b_scale.fill(2.0);
let modified = layer2.effective_weight();
let all_same = base
.iter()
.zip(modified.iter())
.all(|(a, b)| (a - b).abs() < 1e-14);
assert!(
!all_same,
"effective_weight did not change when b_scale changed"
);
}
#[test]
fn vera_invalid_rank_zero() {
let w = Array2::<f64>::eye(4);
let cfg = VeraConfig {
rank: 0,
..Default::default()
};
assert!(VeraLayer::new(w, &cfg).is_err());
}
#[test]
fn vera_config_defaults() {
let cfg = VeRAConfig::default();
assert_eq!(cfg.rank, 8);
assert!((cfg.alpha - 16.0).abs() < 1e-15);
assert_eq!(cfg.seed, 42);
}
#[test]
fn shared_matrices_dimensions() {
let shared = SharedRandomMatrices::new(32, 64, 8, 42);
assert_eq!(shared.a.shape(), &[8, 32]);
assert_eq!(shared.b.shape(), &[64, 8]);
}
#[test]
fn shared_matrices_b_is_zeros() {
let shared = SharedRandomMatrices::new(16, 32, 4, 7);
for v in shared.b.iter() {
assert_eq!(*v, 0.0, "shared.b must be zero-initialised");
}
}
#[test]
fn shared_matrices_a_nonzero() {
let shared = SharedRandomMatrices::new(16, 32, 4, 7);
let nonzero = shared.a.iter().any(|v| v.abs() > 1e-10);
assert!(nonzero, "shared.a should have nonzero entries");
}
#[test]
fn shared_matrices_reproducible() {
let s1 = SharedRandomMatrices::new(10, 20, 4, 999);
let s2 = SharedRandomMatrices::new(10, 20, 4, 999);
for (x, y) in s1.a.iter().zip(s2.a.iter()) {
assert!(
(x - y).abs() < 1e-15,
"SharedRandomMatrices not reproducible"
);
}
}
fn make_vera(out: usize, in_f: usize, rank: usize) -> VeRALayer {
let w = Array2::from_shape_fn((out, in_f), |(i, j)| (i * in_f + j) as f64 * 0.05);
let shared = SharedRandomMatrices::new(in_f, out, rank, 42);
let cfg = VeRAConfig {
rank,
alpha: rank as f64,
seed: 42,
};
VeRALayer::new(w, shared, &cfg).expect("VeRALayer::new failed")
}
#[test]
fn vera_layer_creation_ok() {
let _layer = make_vera(8, 16, 4);
}
#[test]
fn vera_layer_dim_mismatch_a_rows() {
let w = Array2::<f64>::zeros((8, 16));
let mut shared = SharedRandomMatrices::new(16, 8, 4, 1);
shared.a = Array2::zeros((3, 16)); let cfg = VeRAConfig {
rank: 4,
alpha: 4.0,
seed: 1,
};
assert!(VeRALayer::new(w, shared, &cfg).is_err());
}
#[test]
fn vera_layer_dim_mismatch_a_cols() {
let w = Array2::<f64>::zeros((8, 16));
let mut shared = SharedRandomMatrices::new(16, 8, 4, 1);
shared.a = Array2::zeros((4, 10)); let cfg = VeRAConfig {
rank: 4,
alpha: 4.0,
seed: 1,
};
assert!(VeRALayer::new(w, shared, &cfg).is_err());
}
#[test]
fn vera_layer_dim_mismatch_b_rows() {
let w = Array2::<f64>::zeros((8, 16));
let mut shared = SharedRandomMatrices::new(16, 8, 4, 1);
shared.b = Array2::zeros((5, 4)); let cfg = VeRAConfig {
rank: 4,
alpha: 4.0,
seed: 1,
};
assert!(VeRALayer::new(w, shared, &cfg).is_err());
}
#[test]
fn vera_layer_delta_shape() {
let layer = make_vera(6, 10, 4);
let delta = layer.delta_weight();
assert_eq!(delta.shape(), &[6, 10]);
}
#[test]
fn vera_layer_effective_weight_shape() {
let layer = make_vera(6, 10, 4);
let eff = layer.effective_weight();
assert_eq!(eff.shape(), &[6, 10]);
}
#[test]
fn vera_layer_effective_weight_equals_weight_plus_delta() {
let layer = make_vera(5, 8, 4);
let delta = layer.delta_weight();
let eff = layer.effective_weight();
let expected = &layer.weight + δ
for (a, b) in eff.iter().zip(expected.iter()) {
assert!(
(a - b).abs() < 1e-14,
"effective_weight != weight + delta_weight"
);
}
}
#[test]
fn vera_layer_trainable_params() {
let rank = 4_usize;
let out = 12_usize;
let in_f = 20_usize;
let layer = make_vera(out, in_f, rank);
assert_eq!(layer.trainable_params(), rank + out);
}
#[test]
fn vera_layer_total_params() {
let out = 12_usize;
let in_f = 20_usize;
let layer = make_vera(out, in_f, 4);
assert_eq!(layer.total_params(), out * in_f);
}
#[test]
fn vera_layer_merge_sets_flag() {
let mut layer = make_vera(4, 8, 2);
assert!(!layer.is_merged());
layer.merge().expect("merge");
assert!(layer.is_merged());
}
#[test]
fn vera_layer_double_merge_errors() {
let mut layer = make_vera(4, 8, 2);
layer.merge().expect("first merge");
assert!(layer.merge().is_err(), "second merge should fail");
}
#[test]
fn vera_layer_unmerge_restores_flag() {
let mut layer = make_vera(4, 8, 2);
layer.merge().expect("merge");
layer.unmerge().expect("unmerge");
assert!(!layer.is_merged());
}
#[test]
fn vera_layer_unmerge_without_merge_errors() {
let mut layer = make_vera(4, 8, 2);
assert!(
layer.unmerge().is_err(),
"unmerge without prior merge should fail"
);
}
#[test]
fn vera_layer_merge_unmerge_roundtrip() {
let layer = make_vera(6, 12, 3);
let eff_before = layer.effective_weight();
let mut merged = make_vera(6, 12, 3);
merged.d = layer.d.clone();
merged.b_vec = layer.b_vec.clone();
merged.merge().expect("merge");
merged.unmerge().expect("unmerge");
let eff_after = merged.effective_weight();
for (a, b) in eff_before.iter().zip(eff_after.iter()) {
assert!((a - b).abs() < 1e-12, "roundtrip error: {a} vs {b}");
}
}
#[test]
fn vera_layer_forward_shape() {
let layer = make_vera(8, 16, 4);
let input = Array2::from_elem((5, 16), 0.1_f64);
let out = layer.forward(&input).expect("forward");
assert_eq!(out.shape(), &[5, 8]);
}
#[test]
fn vera_layer_forward_wrong_features_errors() {
let layer = make_vera(8, 16, 4);
let input = Array2::from_elem((3, 10), 0.1_f64); assert!(layer.forward(&input).is_err());
}
#[test]
fn vera_layer_scaling_alpha_over_rank() {
let cfg = VeRAConfig {
rank: 4,
alpha: 8.0,
seed: 1,
};
let w = Array2::<f64>::eye(4);
let shared = SharedRandomMatrices::new(4, 4, 4, 1);
let layer = VeRALayer::new(w, shared, &cfg).expect("new");
assert!((layer.scaling() - 2.0).abs() < 1e-15);
}
#[test]
fn vera_layer_shared_matrices_reuse() {
let shared = SharedRandomMatrices::new(8, 6, 4, 77);
let cfg = VeRAConfig {
rank: 4,
alpha: 4.0,
seed: 77,
};
let w1 = Array2::<f64>::zeros((6, 8));
let w2 = Array2::from_elem((6, 8), 0.1_f64);
let mut layer1 = VeRALayer::new(w1, shared.clone(), &cfg).expect("l1");
let mut layer2 = VeRALayer::new(w2, shared, &cfg).expect("l2");
let d_val = Array1::from_elem(4, 0.5_f64);
let b_val = Array1::from_elem(6, 0.3_f64);
layer1.d = d_val.clone();
layer1.b_vec = b_val.clone();
layer2.d = d_val;
layer2.b_vec = b_val;
let delta1 = layer1.delta_weight();
let delta2 = layer2.delta_weight();
for (a, b) in delta1.iter().zip(delta2.iter()) {
assert!(
(a - b).abs() < 1e-13,
"deltas differ despite identical shared matrices: {a} vs {b}"
);
}
}
}