use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
use scirs2_core::numeric::{Float, NumCast};
use scirs2_core::simd_ops::SimdUnifiedOps;
use crate::error::{Result, TransformError};
use crate::normalize::{NormalizationMethod, EPSILON};
const CACHE_LINE_SIZE: usize = 64;
const L1_CACHE_SIZE: usize = 32_768;
const L2_CACHE_SIZE: usize = 262_144;
#[derive(Debug, Clone)]
pub struct AdaptiveBlockSizer {
pub optimal_block_size: usize,
pub use_cache_alignment: bool,
pub prefetch_distance: usize,
}
impl AdaptiveBlockSizer {
pub fn new<F>(datashape: &[usize]) -> Self
where
F: Float + NumCast,
{
let element_size = std::mem::size_of::<F>();
let data_size = datashape.iter().product::<usize>() * element_size;
let optimal_block_size = if data_size <= L1_CACHE_SIZE / 4 {
256
} else if data_size <= L2_CACHE_SIZE / 2 {
128
} else {
64
};
let use_cache_alignment = data_size > L1_CACHE_SIZE;
let prefetch_distance = if data_size > L2_CACHE_SIZE { 8 } else { 4 };
AdaptiveBlockSizer {
optimal_block_size,
use_cache_alignment,
prefetch_distance,
}
}
pub fn get_aligned_block_size(&self, dimensionsize: usize) -> usize {
if self.use_cache_alignment {
let cache_aligned =
(self.optimal_block_size + CACHE_LINE_SIZE - 1) & !(CACHE_LINE_SIZE - 1);
cache_aligned.min(dimensionsize)
} else {
self.optimal_block_size.min(dimensionsize)
}
}
}
#[allow(dead_code)]
pub fn simd_minmax_normalize_1d<F>(array: &Array1<F>) -> Result<Array1<F>>
where
F: Float + NumCast + SimdUnifiedOps,
{
if array.is_empty() {
return Err(TransformError::InvalidInput(
"Input array is empty".to_string(),
));
}
let min = F::simd_min_element(&array.view());
let max = F::simd_max_element(&array.view());
let range = max - min;
if range.abs() <= F::from(EPSILON).expect("Failed to convert to float") {
return Ok(Array1::from_elem(
array.len(),
F::from(0.5).expect("Failed to convert constant to float"),
));
}
let minarray = Array1::from_elem(array.len(), min);
let normalized = F::simd_sub(&array.view(), &minarray.view());
let rangearray = Array1::from_elem(array.len(), range);
let result = F::simd_div(&normalized.view(), &rangearray.view());
Ok(result)
}
#[allow(dead_code)]
pub fn simd_zscore_normalize_1d<F>(array: &Array1<F>) -> Result<Array1<F>>
where
F: Float + NumCast + SimdUnifiedOps,
{
if array.is_empty() {
return Err(TransformError::InvalidInput(
"Input array is empty".to_string(),
));
}
let mean = F::simd_mean(&array.view());
let n = F::from(array.len()).expect("Operation failed");
let meanarray = Array1::from_elem(array.len(), mean);
let centered = F::simd_sub(&array.view(), &meanarray.view());
let squared = F::simd_mul(¢ered.view(), ¢ered.view());
let variance = F::simd_sum(&squared.view()) / n;
let std_dev = variance.sqrt();
if std_dev <= F::from(EPSILON).expect("Failed to convert to float") {
return Ok(Array1::zeros(array.len()));
}
let stdarray = Array1::from_elem(array.len(), std_dev);
let result = F::simd_div(¢ered.view(), &stdarray.view());
Ok(result)
}
#[allow(dead_code)]
pub fn simd_l2_normalize_1d<F>(array: &Array1<F>) -> Result<Array1<F>>
where
F: Float + NumCast + SimdUnifiedOps,
{
if array.is_empty() {
return Err(TransformError::InvalidInput(
"Input array is empty".to_string(),
));
}
let l2_norm = F::simd_norm(&array.view());
if l2_norm <= F::from(EPSILON).expect("Failed to convert to float") {
return Ok(Array1::zeros(array.len()));
}
let normarray = Array1::from_elem(array.len(), l2_norm);
let result = F::simd_div(&array.view(), &normarray.view());
Ok(result)
}
#[allow(dead_code)]
pub fn simd_maxabs_normalize_1d<F>(array: &Array1<F>) -> Result<Array1<F>>
where
F: Float + NumCast + SimdUnifiedOps,
{
if array.is_empty() {
return Err(TransformError::InvalidInput(
"Input array is empty".to_string(),
));
}
let absarray = F::simd_abs(&array.view());
let max_abs = F::simd_max_element(&absarray.view());
if max_abs <= F::from(EPSILON).expect("Failed to convert to float") {
return Ok(Array1::zeros(array.len()));
}
let max_absarray = Array1::from_elem(array.len(), max_abs);
let result = F::simd_div(&array.view(), &max_absarray.view());
Ok(result)
}
#[allow(dead_code)]
pub fn simd_normalizearray<S, F>(
array: &ArrayBase<S, Ix2>,
method: NormalizationMethod,
axis: usize,
) -> Result<Array2<F>>
where
S: Data<Elem = F>,
F: Float + NumCast + SimdUnifiedOps,
{
if !array.is_standard_layout() {
return Err(TransformError::InvalidInput(
"Input array must be in standard memory layout".to_string(),
));
}
if array.ndim() != 2 {
return Err(TransformError::InvalidInput(
"Only 2D arrays are supported".to_string(),
));
}
if axis >= array.ndim() {
return Err(TransformError::InvalidInput(format!(
"Invalid axis {} for array with {} dimensions",
axis,
array.ndim()
)));
}
let shape = array.shape();
let mut normalized = Array2::zeros((shape[0], shape[1]));
match method {
NormalizationMethod::MinMax => simd_normalize_block_minmax(array, &mut normalized, axis)?,
NormalizationMethod::ZScore => simd_normalize_block_zscore(array, &mut normalized, axis)?,
NormalizationMethod::L2 => simd_normalize_block_l2(array, &mut normalized, axis)?,
NormalizationMethod::MaxAbs => simd_normalize_block_maxabs(array, &mut normalized, axis)?,
_ => {
return Err(TransformError::InvalidInput(
"SIMD implementation not available for this normalization method".to_string(),
));
}
}
Ok(normalized)
}
#[allow(dead_code)]
fn simd_normalize_block_minmax<S, F>(
array: &ArrayBase<S, Ix2>,
normalized: &mut Array2<F>,
axis: usize,
) -> Result<()>
where
S: Data<Elem = F>,
F: Float + NumCast + SimdUnifiedOps,
{
let shape = array.shape();
let block_sizer = AdaptiveBlockSizer::new::<F>(&[shape[0], shape[1]]);
let block_size =
block_sizer.get_aligned_block_size(if axis == 0 { shape[1] } else { shape[0] });
if axis == 0 {
let mut global_mins = Array1::zeros(shape[1]);
let mut global_maxs = Array1::zeros(shape[1]);
for block_start in (0..shape[1]).step_by(block_size) {
let block_end = (block_start + block_size).min(shape[1]);
for j in block_start..block_end {
let col = array.column(j);
let colarray = col.to_owned();
global_mins[j] = F::simd_min_element(&colarray.view());
global_maxs[j] = F::simd_max_element(&colarray.view());
}
}
for block_start in (0..shape[1]).step_by(block_size) {
let block_end = (block_start + block_size).min(shape[1]);
for j in block_start..block_end {
let col = array.column(j);
let colarray = col.to_owned();
let range = global_maxs[j] - global_mins[j];
if range.abs() <= F::from(EPSILON).expect("Failed to convert to float") {
for i in 0..shape[0] {
normalized[[i, j]] =
F::from(0.5).expect("Failed to convert constant to float");
}
} else {
let minarray = Array1::from_elem(shape[0], global_mins[j]);
let rangearray = Array1::from_elem(shape[0], range);
let centered = F::simd_sub(&colarray.view(), &minarray.view());
let norm_col = F::simd_div(¢ered.view(), &rangearray.view());
for i in 0..shape[0] {
normalized[[i, j]] = norm_col[i];
}
}
}
}
} else {
for block_start in (0..shape[0]).step_by(block_size) {
let block_end = (block_start + block_size).min(shape[0]);
for i in block_start..block_end {
let row = array.row(i);
let rowarray = row.to_owned();
let norm_row = simd_minmax_normalize_1d(&rowarray)?;
for j in 0..shape[1] {
normalized[[i, j]] = norm_row[j];
}
}
}
}
Ok(())
}
#[allow(dead_code)]
fn simd_normalize_block_zscore<S, F>(
array: &ArrayBase<S, Ix2>,
normalized: &mut Array2<F>,
axis: usize,
) -> Result<()>
where
S: Data<Elem = F>,
F: Float + NumCast + SimdUnifiedOps,
{
let shape = array.shape();
let block_sizer = AdaptiveBlockSizer::new::<F>(&[shape[0], shape[1]]);
let block_size =
block_sizer.get_aligned_block_size(if axis == 0 { shape[1] } else { shape[0] });
if axis == 0 {
let mut global_means = Array1::zeros(shape[1]);
let mut global_stds = Array1::zeros(shape[1]);
let n_samples_f = F::from(shape[0]).expect("Failed to convert to float");
for block_start in (0..shape[1]).step_by(block_size) {
let block_end = (block_start + block_size).min(shape[1]);
for j in block_start..block_end {
let col = array.column(j);
let colarray = col.to_owned();
global_means[j] = F::simd_sum(&colarray.view()) / n_samples_f;
let meanarray = Array1::from_elem(shape[0], global_means[j]);
let centered = F::simd_sub(&colarray.view(), &meanarray.view());
let squared = F::simd_mul(¢ered.view(), ¢ered.view());
let variance = F::simd_sum(&squared.view()) / n_samples_f;
global_stds[j] = variance.sqrt();
if global_stds[j] <= F::from(EPSILON).expect("Failed to convert to float") {
global_stds[j] = F::one();
}
}
}
for block_start in (0..shape[1]).step_by(block_size) {
let block_end = (block_start + block_size).min(shape[1]);
for j in block_start..block_end {
let col = array.column(j);
let colarray = col.to_owned();
if global_stds[j] <= F::from(EPSILON).expect("Failed to convert to float") {
for i in 0..shape[0] {
normalized[[i, j]] = F::zero();
}
} else {
let meanarray = Array1::from_elem(shape[0], global_means[j]);
let stdarray = Array1::from_elem(shape[0], global_stds[j]);
let centered = F::simd_sub(&colarray.view(), &meanarray.view());
let norm_col = F::simd_div(¢ered.view(), &stdarray.view());
for i in 0..shape[0] {
normalized[[i, j]] = norm_col[i];
}
}
}
}
} else {
for block_start in (0..shape[0]).step_by(block_size) {
let block_end = (block_start + block_size).min(shape[0]);
for i in block_start..block_end {
let row = array.row(i);
let rowarray = row.to_owned();
let norm_row = simd_zscore_normalize_1d(&rowarray)?;
for j in 0..shape[1] {
normalized[[i, j]] = norm_row[j];
}
}
}
}
Ok(())
}
#[allow(dead_code)]
fn simd_normalize_block_l2<S, F>(
array: &ArrayBase<S, Ix2>,
normalized: &mut Array2<F>,
axis: usize,
) -> Result<()>
where
S: Data<Elem = F>,
F: Float + NumCast + SimdUnifiedOps,
{
let shape = array.shape();
let block_sizer = AdaptiveBlockSizer::new::<F>(&[shape[0], shape[1]]);
let block_size =
block_sizer.get_aligned_block_size(if axis == 0 { shape[1] } else { shape[0] });
if axis == 0 {
for block_start in (0..shape[1]).step_by(block_size) {
let block_end = (block_start + block_size).min(shape[1]);
for j in block_start..block_end {
let col = array.column(j);
let colarray = col.to_owned();
let norm_col = simd_l2_normalize_1d(&colarray)?;
for i in 0..shape[0] {
normalized[[i, j]] = norm_col[i];
}
}
}
} else {
for block_start in (0..shape[0]).step_by(block_size) {
let block_end = (block_start + block_size).min(shape[0]);
for i in block_start..block_end {
let row = array.row(i);
let rowarray = row.to_owned();
let l2_norm = F::simd_norm(&rowarray.view());
if l2_norm <= F::from(EPSILON).expect("Failed to convert to float") {
for j in 0..shape[1] {
normalized[[i, j]] = F::zero();
}
} else {
let normarray = Array1::from_elem(shape[1], l2_norm);
let norm_row = F::simd_div(&rowarray.view(), &normarray.view());
for j in 0..shape[1] {
normalized[[i, j]] = norm_row[j];
}
}
}
}
}
Ok(())
}
#[allow(dead_code)]
fn simd_normalize_block_maxabs<S, F>(
array: &ArrayBase<S, Ix2>,
normalized: &mut Array2<F>,
axis: usize,
) -> Result<()>
where
S: Data<Elem = F>,
F: Float + NumCast + SimdUnifiedOps,
{
let shape = array.shape();
let block_sizer = AdaptiveBlockSizer::new::<F>(&[shape[0], shape[1]]);
let block_size =
block_sizer.get_aligned_block_size(if axis == 0 { shape[1] } else { shape[0] });
if axis == 0 {
for block_start in (0..shape[1]).step_by(block_size) {
let block_end = (block_start + block_size).min(shape[1]);
for j in block_start..block_end {
let col = array.column(j);
let colarray = col.to_owned();
let norm_col = simd_maxabs_normalize_1d(&colarray)?;
for i in 0..shape[0] {
normalized[[i, j]] = norm_col[i];
}
}
}
} else {
for block_start in (0..shape[0]).step_by(block_size) {
let block_end = (block_start + block_size).min(shape[0]);
for i in block_start..block_end {
let row = array.row(i);
let rowarray = row.to_owned();
let absarray = F::simd_abs(&rowarray.view());
let max_abs = F::simd_max_element(&absarray.view());
if max_abs <= F::from(EPSILON).expect("Failed to convert to float") {
for j in 0..shape[1] {
normalized[[i, j]] = F::zero();
}
} else {
let max_absarray = Array1::from_elem(shape[1], max_abs);
let norm_row = F::simd_div(&rowarray.view(), &max_absarray.view());
for j in 0..shape[1] {
normalized[[i, j]] = norm_row[j];
}
}
}
}
}
Ok(())
}
#[allow(dead_code)]
pub fn simd_normalize_adaptive<S, F>(
array: &ArrayBase<S, Ix2>,
method: NormalizationMethod,
axis: usize,
) -> Result<Array2<F>>
where
S: Data<Elem = F>,
F: Float + NumCast + SimdUnifiedOps,
{
let shape = array.shape();
let data_size = shape[0] * shape[1] * std::mem::size_of::<F>();
if data_size > L2_CACHE_SIZE {
simd_normalize_chunked(array, method, axis)
} else if shape[0] > shape[1] * 4 {
simd_normalize_optimized_tall(array, method, axis)
} else if shape[1] > shape[0] * 4 {
simd_normalize_optimized_wide(array, method, axis)
} else {
simd_normalizearray(array, method, axis)
}
}
#[allow(dead_code)]
pub fn simd_normalize_batch<S, F>(
array: &ArrayBase<S, Ix2>,
method: NormalizationMethod,
axis: usize,
batch_size_mb: usize,
) -> Result<Array2<F>>
where
S: Data<Elem = F>,
F: Float + NumCast + SimdUnifiedOps,
{
let shape = array.shape();
let element_size = std::mem::size_of::<F>();
let max_elements_per_batch = (batch_size_mb * 1024 * 1024) / element_size;
if shape[0] * shape[1] <= max_elements_per_batch {
return simd_normalize_adaptive(array, method, axis);
}
let mut normalized = Array2::zeros((shape[0], shape[1]));
if axis == 0 {
let cols_per_batch = max_elements_per_batch / shape[0];
for col_start in (0..shape[1]).step_by(cols_per_batch) {
let col_end = (col_start + cols_per_batch).min(shape[1]);
let batch_view = array.slice(scirs2_core::ndarray::s![.., col_start..col_end]);
let batch_normalized = simd_normalize_adaptive(&batch_view, method, axis)?;
for (j_local, j_global) in (col_start..col_end).enumerate() {
for i in 0..shape[0] {
normalized[[i, j_global]] = batch_normalized[[i, j_local]];
}
}
}
} else {
let rows_per_batch = max_elements_per_batch / shape[1];
for row_start in (0..shape[0]).step_by(rows_per_batch) {
let row_end = (row_start + rows_per_batch).min(shape[0]);
let batch_view = array.slice(scirs2_core::ndarray::s![row_start..row_end, ..]);
let batch_normalized = simd_normalize_adaptive(&batch_view, method, axis)?;
for (i_local, i_global) in (row_start..row_end).enumerate() {
for j in 0..shape[1] {
normalized[[i_global, j]] = batch_normalized[[i_local, j]];
}
}
}
}
Ok(normalized)
}
#[allow(dead_code)]
fn simd_normalize_chunked<S, F>(
array: &ArrayBase<S, Ix2>,
method: NormalizationMethod,
axis: usize,
) -> Result<Array2<F>>
where
S: Data<Elem = F>,
F: Float + NumCast + SimdUnifiedOps,
{
let shape = array.shape();
let mut normalized = Array2::zeros((shape[0], shape[1]));
let block_sizer = AdaptiveBlockSizer::new::<F>(&[shape[0], shape[1]]);
let chunk_size = block_sizer.optimal_block_size * 4;
if axis == 0 {
for chunk_start in (0..shape[1]).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(shape[1]);
let chunk_view = array.slice(scirs2_core::ndarray::s![.., chunk_start..chunk_end]);
let chunk_normalized = simd_normalizearray(&chunk_view, method, axis)?;
for (j_local, j_global) in (chunk_start..chunk_end).enumerate() {
for i in 0..shape[0] {
normalized[[i, j_global]] = chunk_normalized[[i, j_local]];
}
}
}
} else {
for chunk_start in (0..shape[0]).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(shape[0]);
let chunk_view = array.slice(scirs2_core::ndarray::s![chunk_start..chunk_end, ..]);
let chunk_normalized = simd_normalizearray(&chunk_view, method, axis)?;
for (i_local, i_global) in (chunk_start..chunk_end).enumerate() {
for j in 0..shape[1] {
normalized[[i_global, j]] = chunk_normalized[[i_local, j]];
}
}
}
}
Ok(normalized)
}
#[allow(dead_code)]
fn simd_normalize_optimized_tall<S, F>(
array: &ArrayBase<S, Ix2>,
method: NormalizationMethod,
axis: usize,
) -> Result<Array2<F>>
where
S: Data<Elem = F>,
F: Float + NumCast + SimdUnifiedOps,
{
if axis == 0 {
simd_normalizearray(array, method, axis)
} else {
let shape = array.shape();
let mut normalized = Array2::zeros((shape[0], shape[1]));
let small_block_size = 32;
for block_start in (0..shape[0]).step_by(small_block_size) {
let block_end = (block_start + small_block_size).min(shape[0]);
for i in block_start..block_end {
let row = array.row(i);
let rowarray = row.to_owned();
let norm_row = match method {
NormalizationMethod::MinMax => simd_minmax_normalize_1d(&rowarray)?,
NormalizationMethod::ZScore => simd_zscore_normalize_1d(&rowarray)?,
NormalizationMethod::L2 => simd_l2_normalize_1d(&rowarray)?,
NormalizationMethod::MaxAbs => simd_maxabs_normalize_1d(&rowarray)?,
_ => {
return Err(TransformError::InvalidInput(
"Unsupported normalization method for tall matrix optimization"
.to_string(),
))
}
};
for j in 0..shape[1] {
normalized[[i, j]] = norm_row[j];
}
}
}
Ok(normalized)
}
}
#[allow(dead_code)]
fn simd_normalize_optimized_wide<S, F>(
array: &ArrayBase<S, Ix2>,
method: NormalizationMethod,
axis: usize,
) -> Result<Array2<F>>
where
S: Data<Elem = F>,
F: Float + NumCast + SimdUnifiedOps,
{
if axis == 1 {
simd_normalizearray(array, method, axis)
} else {
let shape = array.shape();
let mut normalized = Array2::zeros((shape[0], shape[1]));
let wide_block_size = 128;
for block_start in (0..shape[1]).step_by(wide_block_size) {
let block_end = (block_start + wide_block_size).min(shape[1]);
for j in block_start..block_end {
let col = array.column(j);
let colarray = col.to_owned();
let norm_col = match method {
NormalizationMethod::MinMax => simd_minmax_normalize_1d(&colarray)?,
NormalizationMethod::ZScore => simd_zscore_normalize_1d(&colarray)?,
NormalizationMethod::L2 => simd_l2_normalize_1d(&colarray)?,
NormalizationMethod::MaxAbs => simd_maxabs_normalize_1d(&colarray)?,
_ => {
return Err(TransformError::InvalidInput(
"Unsupported normalization method for wide matrix optimization"
.to_string(),
))
}
};
for i in 0..shape[0] {
normalized[[i, j]] = norm_col[i];
}
}
}
Ok(normalized)
}
}