impl Linear {
pub fn new(in_features: usize, out_features: usize) -> Result<Self> {
if in_features == 0 || out_features == 0 {
return Err(RealizarError::InvalidShape {
reason: "in_features and out_features must be > 0".to_string(),
});
}
let weight = vec![0.0; in_features * out_features];
let bias = vec![0.0; out_features];
Ok(Self {
in_features,
out_features,
weight,
bias,
})
}
pub fn forward(&self, input: &Tensor<f32>) -> Result<Tensor<f32>> {
let shape = input.shape();
if shape.is_empty() {
return Err(RealizarError::InvalidShape {
reason: "Input tensor cannot be empty".to_string(),
});
}
let last_dim = shape[shape.len() - 1];
if last_dim != self.in_features {
return Err(RealizarError::InvalidShape {
reason: format!(
"Last dimension {} doesn't match in_features {}",
last_dim, self.in_features
),
});
}
let data = input.data();
let total_size = data.len();
let num_rows = total_size / self.in_features;
let mut output = Vec::with_capacity(num_rows * self.out_features);
for row_idx in 0..num_rows {
let input_start = row_idx * self.in_features;
let input_row = &data[input_start..input_start + self.in_features];
for j in 0..self.out_features {
let mut sum = self.bias[j];
for (i, &input_val) in input_row.iter().enumerate() {
sum += input_val * self.weight[i * self.out_features + j];
}
output.push(sum);
}
}
let mut output_shape = shape[..shape.len() - 1].to_vec();
output_shape.push(self.out_features);
debug_assert!(
output.iter().all(|&x| x.is_finite()),
"Linear layer produced NaN or Inf values - check for exploding gradients/activations"
);
Tensor::from_vec(output_shape, output)
}
#[must_use]
pub fn in_features(&self) -> usize {
self.in_features
}
#[must_use]
pub fn out_features(&self) -> usize {
self.out_features
}
#[must_use]
pub fn weight_mut(&mut self) -> &mut [f32] {
&mut self.weight
}
#[must_use]
pub fn bias_mut(&mut self) -> &mut [f32] {
&mut self.bias
}
}
#[derive(Debug, Clone)]
pub struct QuantizedLinear {
in_features: usize,
out_features: usize,
weight_bytes: Vec<u8>,
bias: Vec<f32>,
bytes_per_row: usize,
}
impl QuantizedLinear {
pub fn new(
in_features: usize,
out_features: usize,
weight_bytes: Vec<u8>,
bias: Vec<f32>,
) -> Result<Self> {
const SUPER_BLOCK_VALUES: usize = 256;
const SUPER_BLOCK_BYTES: usize = 144;
if in_features == 0 || out_features == 0 {
return Err(RealizarError::InvalidShape {
reason: "in_features and out_features must be > 0".to_string(),
});
}
if bias.len() != out_features {
return Err(RealizarError::InvalidShape {
reason: format!(
"Bias length {} doesn't match out_features {}",
bias.len(),
out_features
),
});
}
let super_blocks_per_row = in_features.div_ceil(SUPER_BLOCK_VALUES);
let bytes_per_row = super_blocks_per_row * SUPER_BLOCK_BYTES;
let expected_bytes = out_features * bytes_per_row;
if weight_bytes.len() != expected_bytes {
return Err(RealizarError::InvalidShape {
reason: format!(
"Weight bytes {} doesn't match expected {} ({}x{})",
weight_bytes.len(),
expected_bytes,
out_features,
bytes_per_row
),
});
}
Ok(Self {
in_features,
out_features,
weight_bytes,
bias,
bytes_per_row,
})
}
pub fn forward(&self, input: &Tensor<f32>) -> Result<Tensor<f32>> {
use crate::quantize::fused_q4k_dot_simd;
let shape = input.shape();
if shape.is_empty() {
return Err(RealizarError::InvalidShape {
reason: "Input tensor cannot be empty".to_string(),
});
}
let last_dim = shape[shape.len() - 1];
if last_dim != self.in_features {
return Err(RealizarError::InvalidShape {
reason: format!(
"Last dimension {} doesn't match in_features {}",
last_dim, self.in_features
),
});
}
let data = input.data();
let total_size = data.len();
let num_rows = total_size / self.in_features;
let mut output = Vec::with_capacity(num_rows * self.out_features);
for row_idx in 0..num_rows {
let input_start = row_idx * self.in_features;
let input_row = &data[input_start..input_start + self.in_features];
for j in 0..self.out_features {
let weight_start = j * self.bytes_per_row;
let weight_row =
&self.weight_bytes[weight_start..weight_start + self.bytes_per_row];
let dot = fused_q4k_dot_simd(weight_row, input_row)?;
output.push(dot + self.bias[j]);
}
}
let mut output_shape = shape[..shape.len() - 1].to_vec();
output_shape.push(self.out_features);
if output_shape.is_empty() {
output_shape.push(self.out_features);
}
Tensor::from_vec(output_shape, output)
}
#[must_use]
pub fn in_features(&self) -> usize {
self.in_features
}
#[must_use]
pub fn out_features(&self) -> usize {
self.out_features
}
#[must_use]
pub fn weight_bytes(&self) -> &[u8] {
&self.weight_bytes
}
#[must_use]
pub fn bias(&self) -> &[f32] {
&self.bias
}
#[must_use]
pub fn memory_bytes(&self) -> usize {
self.weight_bytes.len() + self.bias.len() * std::mem::size_of::<f32>()
}
}
#[derive(Debug, Clone)]
pub struct FusedLayerNormLinear {
feature_dim: usize,
out_features: usize,
eps: f32,
norm_weight: Vec<f32>,
norm_bias: Vec<f32>,
linear_weight: Vec<f32>,
linear_bias: Vec<f32>,
}