pub(crate) fn task_arithmetic_merge(
base_tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
task_models: &[BTreeMap<String, (Vec<f32>, Vec<usize>)>],
scales: &[f32],
) -> BTreeMap<String, (Vec<f32>, Vec<usize>)> {
let mut merged = BTreeMap::new();
for (name, (base_data, shape)) in base_tensors {
let mut result = base_data.clone();
for (model_idx, model_tensors) in task_models.iter().enumerate() {
let (model_data, _) = model_tensors.get(name).expect("validated above");
let scale = scales.get(model_idx).copied().unwrap_or(1.0);
for (i, (&m_val, r_val)) in model_data.iter().zip(result.iter_mut()).enumerate() {
let _ = i; *r_val += scale * (m_val - base_data[i]);
}
}
merged.insert(name.clone(), (result, shape.clone()));
}
merged
}
pub(crate) fn nuslerp_tensors(
model_a: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
model_b: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
t: f32,
) -> BTreeMap<String, (Vec<f32>, Vec<usize>)> {
let mut merged = BTreeMap::new();
for (name, (data_a, shape)) in model_a {
let (data_b, _) = model_b.get(name).expect("validated above");
let merged_data = nuslerp_vectors(data_a, data_b, t);
merged.insert(name.clone(), (merged_data, shape.clone()));
}
merged
}
fn nuslerp_vectors(a: &[f32], b: &[f32], t: f32) -> Vec<f32> {
let norm_a = vector_norm(a);
let norm_b = vector_norm(b);
if norm_a < 1e-12 || norm_b < 1e-12 {
return lerp_vectors(a, b, t);
}
let dot: f64 = a
.iter()
.zip(b.iter())
.map(|(&x, &y)| f64::from(x) * f64::from(y))
.sum();
let cos_omega = (dot / (norm_a * norm_b)).clamp(-1.0, 1.0);
if cos_omega.abs() > 0.9995 {
return nlerp_vectors(a, b, t);
}
let omega = cos_omega.acos();
let sin_omega = omega.sin();
let t64 = f64::from(t);
let coeff_a = ((1.0 - t64) * omega).sin() / sin_omega;
let coeff_b = (t64 * omega).sin() / sin_omega;
a.iter()
.zip(b.iter())
.map(|(&x, &y)| (coeff_a * f64::from(x) + coeff_b * f64::from(y)) as f32)
.collect()
}
fn nlerp_vectors(a: &[f32], b: &[f32], t: f32) -> Vec<f32> {
let lerped: Vec<f32> = a
.iter()
.zip(b.iter())
.map(|(&x, &y)| x * (1.0 - t) + y * t)
.collect();
let norm = vector_norm(&lerped);
if norm < 1e-12 {
return lerped;
}
let target_norm = f64::from(1.0 - t) * vector_norm(a) + f64::from(t) * vector_norm(b);
let scale = (target_norm / norm) as f32;
lerped.iter().map(|&x| x * scale).collect()
}
pub(crate) fn multi_slerp_tensors(
all_tensors: &[BTreeMap<String, (Vec<f32>, Vec<usize>)>],
weights: &[f32],
) -> BTreeMap<String, (Vec<f32>, Vec<usize>)> {
assert!(
all_tensors.len() >= 2,
"MultiSLERP requires at least 2 models"
);
assert_eq!(all_tensors.len(), weights.len());
let sum: f32 = weights.iter().sum();
let norm_weights: Vec<f32> = weights.iter().map(|w| w / sum).collect();
let mut result = all_tensors[0].clone();
let mut accum_weight = norm_weights[0];
for i in 1..all_tensors.len() {
let w_i = norm_weights[i];
let t = w_i / (accum_weight + w_i);
result = nuslerp_tensors(&result, &all_tensors[i], t);
accum_weight += w_i;
}
result
}
pub(crate) fn della_merge(
base_tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
task_models: &[BTreeMap<String, (Vec<f32>, Vec<usize>)>],
drop_rate: f32,
seed: u64,
weights: Option<&[f32]>,
) -> BTreeMap<String, (Vec<f32>, Vec<usize>)> {
let mut merged = BTreeMap::new();
let num_models = task_models.len();
let default_weights: Vec<f32> = vec![1.0 / num_models as f32; num_models];
let w = weights.unwrap_or(&default_weights);
for (tensor_idx, (name, (base_data, shape))) in base_tensors.iter().enumerate() {
let mut rng = StdRng::seed_from_u64(seed.wrapping_add(tensor_idx as u64));
let mut merged_delta = vec![0.0f32; base_data.len()];
for (model_idx, model_tensors) in task_models.iter().enumerate() {
let (model_data, _) = model_tensors.get(name).expect("validated above");
let weight = w[model_idx];
let max_mag: f32 = model_data
.iter()
.zip(base_data.iter())
.map(|(&m, &b)| (m - b).abs())
.fold(0.0f32, f32::max);
if max_mag < 1e-12 {
continue;
}
for (i, (&m_val, &b_val)) in model_data.iter().zip(base_data.iter()).enumerate() {
let delta = m_val - b_val;
let magnitude_ratio = delta.abs() / max_mag;
let adaptive_drop = drop_rate * (1.0 - magnitude_ratio);
let keep = rng.random::<f32>() >= adaptive_drop;
if keep {
let rescale = 1.0 / (1.0 - adaptive_drop).max(1e-6);
merged_delta[i] += delta * rescale * weight;
}
}
}
let result: Vec<f32> = base_data
.iter()
.zip(merged_delta.iter())
.map(|(&b, &d)| b + d)
.collect();
merged.insert(name.clone(), (result, shape.clone()));
}
merged
}
pub(crate) fn breadcrumbs_merge(
base_tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
task_models: &[BTreeMap<String, (Vec<f32>, Vec<usize>)>],
scales: &[f32],
outlier_k: f32,
) -> BTreeMap<String, (Vec<f32>, Vec<usize>)> {
let mut merged = BTreeMap::new();
for (name, (base_data, shape)) in base_tensors {
let mut result = base_data.clone();
for (model_idx, model_tensors) in task_models.iter().enumerate() {
let (model_data, _) = model_tensors.get(name).expect("validated above");
let scale = scales.get(model_idx).copied().unwrap_or(1.0);
let deltas: Vec<f32> = model_data
.iter()
.zip(base_data.iter())
.map(|(&m, &b)| m - b)
.collect();
let (mean, std) = delta_mean_std(&deltas);
let threshold = outlier_k * std;
for (i, &delta) in deltas.iter().enumerate() {
if (delta - mean).abs() <= threshold {
result[i] += scale * delta;
}
}
}
merged.insert(name.clone(), (result, shape.clone()));
}
merged
}
fn delta_mean_std(deltas: &[f32]) -> (f32, f32) {
if deltas.is_empty() {
return (0.0, 0.0);
}
let n = deltas.len() as f64;
let sum: f64 = deltas.iter().map(|&x| f64::from(x)).sum();
let mean = sum / n;
let var: f64 = deltas
.iter()
.map(|&x| {
let d = f64::from(x) - mean;
d * d
})
.sum::<f64>()
/ n;
(mean as f32, var.sqrt() as f32)
}
pub(crate) fn sce_merge(
all_tensors: &[BTreeMap<String, (Vec<f32>, Vec<usize>)>],
base_weights: &[f32],
) -> BTreeMap<String, (Vec<f32>, Vec<usize>)> {
let mut merged = BTreeMap::new();
let reference = &all_tensors[0];
let num_models = all_tensors.len();
let sum: f32 = base_weights.iter().sum();
let norm_weights: Vec<f32> = base_weights.iter().map(|w| w / sum).collect();
for (name, (_, shape)) in reference {
let model_data: Vec<&Vec<f32>> = all_tensors
.iter()
.map(|t| &t.get(name).expect("validated above").0)
.collect();
let data_len = model_data[0].len();
let variances: Vec<f64> = (0..num_models)
.map(|m| {
model_data[m]
.iter()
.map(|&x| f64::from(x) * f64::from(x))
.sum::<f64>()
/ data_len as f64
})
.collect();
let total_var: f64 = variances.iter().sum();
let adaptive_weights: Vec<f32> = if total_var < 1e-12 {
norm_weights.clone()
} else {
(0..num_models)
.map(|m| {
let var_weight = (variances[m] / total_var) as f32;
0.5 * norm_weights[m] + 0.5 * var_weight
})
.collect()
};
let w_sum: f32 = adaptive_weights.iter().sum();
let final_weights: Vec<f32> = adaptive_weights.iter().map(|w| w / w_sum).collect();
let mut merged_data = vec![0.0f32; data_len];
for (m, data) in model_data.iter().enumerate() {
let weight = final_weights[m];
for (i, &val) in data.iter().enumerate() {
merged_data[i] += val * weight;
}
}
merged.insert(name.clone(), (merged_data, shape.clone()));
}
merged
}
pub(crate) fn passthrough_merge(
all_tensors: &[BTreeMap<String, (Vec<f32>, Vec<usize>)>],
layer_ranges: &[(usize, usize, usize)],
) -> BTreeMap<String, (Vec<f32>, Vec<usize>)> {
let mut merged = BTreeMap::new();
let mut layer_map: Vec<(usize, usize)> = Vec::new();
for &(model_idx, start, end) in layer_ranges {
for layer in start..end {
layer_map.push((model_idx, layer));
}
}
let mut all_names: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
for model in all_tensors {
for name in model.keys() {
all_names.insert(name.clone());
}
}
for name in &all_names {
if let Some((layer_num, prefix, suffix)) = parse_layer_tensor_name(name) {
for (out_idx, &(model_idx, src_layer)) in layer_map.iter().enumerate() {
if src_layer == layer_num {
if let Some(model) = all_tensors.get(model_idx) {
if let Some((data, shape)) = model.get(name) {
let out_name = format!("{prefix}{out_idx}{suffix}");
merged.insert(out_name, (data.clone(), shape.clone()));
}
}
}
}
} else {
for model in all_tensors {
if let Some((data, shape)) = model.get(name) {
merged.insert(name.clone(), (data.clone(), shape.clone()));
break;
}
}
}
}
merged
}
fn parse_layer_tensor_name(name: &str) -> Option<(usize, &str, &str)> {
if let Some(pos) = name.find("layers.") {
let after_layers = &name[pos + 7..];
if let Some(dot_pos) = after_layers.find('.') {
if let Ok(num) = after_layers[..dot_pos].parse::<usize>() {
let prefix = &name[..pos + 7];
let suffix = &after_layers[dot_pos..];
return Some((num, prefix, suffix));
}
}
}
if let Some(pos) = name.find("blk.") {
let after_blk = &name[pos + 4..];
if let Some(dot_pos) = after_blk.find('.') {
if let Ok(num) = after_blk[..dot_pos].parse::<usize>() {
let prefix = &name[..pos + 4];
let suffix = &after_blk[dot_pos..];
return Some((num, prefix, suffix));
}
}
}
None
}