use scirs2_core::ndarray::{Array1, Array2};
use crate::{NeuralError, Result};
#[derive(Debug, Clone)]
pub struct DoraConfig {
pub rank: usize,
pub alpha: f64,
pub column_norm_eps: f64,
}
impl Default for DoraConfig {
fn default() -> Self {
Self {
rank: 4,
alpha: 1.0,
column_norm_eps: 1e-8,
}
}
}
pub struct DoraLinear {
weight: Array2<f64>,
pub lora_a: Array2<f64>,
pub lora_b: Array2<f64>,
pub magnitude: Array1<f64>,
scaling: f64,
config: DoraConfig,
}
impl DoraLinear {
pub fn new(weight: Array2<f64>, config: &DoraConfig) -> Result<Self> {
let (out_features, in_features) = (weight.nrows(), weight.ncols());
if config.rank == 0 {
return Err(NeuralError::InvalidArgument(
"DoRA rank must be > 0".to_string(),
));
}
if config.rank > in_features.min(out_features) {
return Err(NeuralError::InvalidArgument(format!(
"DoRA rank {} is invalid for weight [{out_features}×{in_features}]: \
rank must be ≤ min(out, in) = {}",
config.rank,
in_features.min(out_features)
)));
}
let scaling = config.alpha / config.rank as f64;
let lora_b = Array2::zeros((out_features, config.rank));
let lora_a = Array2::from_elem((config.rank, in_features), 0.02);
let magnitude = Array1::from_shape_fn(out_features, |i| {
row_l2_norm(weight.row(i).iter().copied()).max(config.column_norm_eps)
});
Ok(Self {
weight,
lora_a,
lora_b,
magnitude,
scaling,
config: config.clone(),
})
}
pub fn effective_weight(&self) -> Array2<f64> {
let delta = self.lora_b.dot(&self.lora_a) * self.scaling;
let adapted = &self.weight + δ
let mut normalized = adapted;
for i in 0..normalized.nrows() {
let row_norm =
row_l2_norm(normalized.row(i).iter().copied()).max(self.config.column_norm_eps);
let mag = self.magnitude[i];
let scale = mag / row_norm;
normalized.row_mut(i).mapv_inplace(|v| v * scale);
}
normalized
}
pub fn forward(&self, input: &Array2<f64>) -> Result<Array2<f64>> {
let in_features = self.weight.ncols();
if input.ncols() != in_features {
return Err(NeuralError::DimensionMismatch(format!(
"DoRA expects {} input features, got {}",
in_features,
input.ncols()
)));
}
let w = self.effective_weight();
Ok(input.dot(&w.t()))
}
pub fn merge_into_base(&mut self) {
self.weight = self.effective_weight();
self.lora_a.fill(0.0);
self.lora_b.fill(0.0);
for i in 0..self.weight.nrows() {
let norm =
row_l2_norm(self.weight.row(i).iter().copied()).max(self.config.column_norm_eps);
self.magnitude[i] = norm;
}
}
pub fn n_trainable_params(&self) -> usize {
self.lora_a.len() + self.lora_b.len() + self.magnitude.len()
}
pub fn dims(&self) -> (usize, usize) {
(self.weight.nrows(), self.weight.ncols())
}
pub fn weight(&self) -> &Array2<f64> {
&self.weight
}
}
fn row_l2_norm(iter: impl Iterator<Item = f64>) -> f64 {
iter.map(|v| v * v).sum::<f64>().sqrt()
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
fn make_layer(out: usize, in_f: usize, rank: usize) -> DoraLinear {
let w = Array2::from_shape_fn((out, in_f), |(i, j)| (i * in_f + j) as f64 * 0.1);
DoraLinear::new(
w,
&DoraConfig {
rank,
..Default::default()
},
)
.expect("DoraLinear::new failed")
}
#[test]
fn dora_effective_weight_shape() {
let layer = make_layer(8, 6, 2);
let w = layer.effective_weight();
assert_eq!(w.shape(), &[8, 6]);
}
#[test]
fn dora_zero_adapter_identity() {
let w = Array2::from_shape_fn((4, 4), |(i, j)| (i * 4 + j + 1) as f64);
let cfg = DoraConfig {
rank: 2,
..Default::default()
};
let layer = DoraLinear::new(w.clone(), &cfg).expect("new");
let eff = layer.effective_weight();
for i in 0..4 {
let norm_w = row_l2_norm(w.row(i).iter().copied()).max(cfg.column_norm_eps);
let mag = layer.magnitude[i];
for j in 0..4 {
let expected = w[[i, j]] * mag / norm_w;
assert!(
(eff[[i, j]] - expected).abs() < 1e-10,
"row {i} col {j}: expected {expected}, got {}",
eff[[i, j]]
);
}
}
}
#[test]
fn dora_magnitude_initialized_correctly() {
let w = Array2::from_shape_fn((3, 4), |(i, j)| (i + j + 1) as f64);
let cfg = DoraConfig {
rank: 2,
..Default::default()
};
let layer = DoraLinear::new(w.clone(), &cfg).expect("new");
for i in 0..3 {
let expected = row_l2_norm(w.row(i).iter().copied()).max(cfg.column_norm_eps);
assert!(
(layer.magnitude[i] - expected).abs() < 1e-10,
"magnitude mismatch at row {i}: expected {expected}, got {}",
layer.magnitude[i]
);
}
}
#[test]
fn dora_forward_output_shape() {
let layer = make_layer(5, 8, 3);
let input = Array2::from_elem((4, 8), 1.0);
let out = layer.forward(&input).expect("forward");
assert_eq!(out.shape(), &[4, 5]);
}
#[test]
fn dora_merge_preserves_forward() {
let w = Array2::from_shape_fn((4, 6), |(i, j)| (i * 6 + j + 1) as f64 * 0.1);
let cfg = DoraConfig {
rank: 2,
..Default::default()
};
let mut layer = DoraLinear::new(w, &cfg).expect("new");
layer.lora_b = Array2::from_shape_fn((4, 2), |(i, j)| (i as f64 - j as f64) * 0.01);
let input = Array2::from_shape_fn((3, 6), |(i, j)| (i * 6 + j) as f64 * 0.05 + 0.1);
let before = layer.forward(&input).expect("before merge");
layer.merge_into_base();
let after = layer.forward(&input).expect("after merge");
for (a, b) in before.iter().zip(after.iter()) {
assert!((a - b).abs() < 1e-9, "merge changed output: {a} vs {b}");
}
}
#[test]
fn dora_invalid_rank_zero() {
let w = Array2::<f64>::eye(4);
let cfg = DoraConfig {
rank: 0,
..Default::default()
};
assert!(DoraLinear::new(w, &cfg).is_err());
}
#[test]
fn dora_rank_larger_than_dim() {
let w = Array2::<f64>::eye(4);
let cfg = DoraConfig {
rank: 5,
..Default::default()
};
assert!(DoraLinear::new(w, &cfg).is_err());
}
#[test]
fn dora_n_params_correct() {
let out = 8;
let in_f = 6;
let rank = 3;
let layer = make_layer(out, in_f, rank);
let expected = rank * in_f + out * rank + out;
assert_eq!(layer.n_trainable_params(), expected);
}
}