1use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
13use scirs2_core::numeric::{Float, NumCast};
14use scirs2_core::simd_ops::SimdUnifiedOps;
15
16use crate::error::{Result, TransformError};
17use crate::normalize::{NormalizationMethod, EPSILON};
18
19const CACHE_LINE_SIZE: usize = 64;
21const L1_CACHE_SIZE: usize = 32_768;
23const L2_CACHE_SIZE: usize = 262_144;
25
26#[derive(Debug, Clone)]
28pub struct AdaptiveBlockSizer {
29 pub optimal_block_size: usize,
31 pub use_cache_alignment: bool,
33 pub prefetch_distance: usize,
35}
36
37impl AdaptiveBlockSizer {
38 pub fn new<F>(datashape: &[usize]) -> Self
40 where
41 F: Float + NumCast,
42 {
43 let element_size = std::mem::size_of::<F>();
44 let data_size = datashape.iter().product::<usize>() * element_size;
45
46 let optimal_block_size = if data_size <= L1_CACHE_SIZE / 4 {
48 256
50 } else if data_size <= L2_CACHE_SIZE / 2 {
51 128
53 } else {
54 64
56 };
57
58 let use_cache_alignment = data_size > L1_CACHE_SIZE;
59 let prefetch_distance = if data_size > L2_CACHE_SIZE { 8 } else { 4 };
60
61 AdaptiveBlockSizer {
62 optimal_block_size,
63 use_cache_alignment,
64 prefetch_distance,
65 }
66 }
67
68 pub fn get_aligned_block_size(&self, dimensionsize: usize) -> usize {
70 if self.use_cache_alignment {
71 let cache_aligned =
73 (self.optimal_block_size + CACHE_LINE_SIZE - 1) & !(CACHE_LINE_SIZE - 1);
74 cache_aligned.min(dimensionsize)
75 } else {
76 self.optimal_block_size.min(dimensionsize)
77 }
78 }
79}
80
81#[allow(dead_code)]
83pub fn simd_minmax_normalize_1d<F>(array: &Array1<F>) -> Result<Array1<F>>
84where
85 F: Float + NumCast + SimdUnifiedOps,
86{
87 if array.is_empty() {
88 return Err(TransformError::InvalidInput(
89 "Input array is empty".to_string(),
90 ));
91 }
92
93 let min = F::simd_min_element(&array.view());
94 let max = F::simd_max_element(&array.view());
95 let range = max - min;
96
97 if range.abs() <= F::from(EPSILON).expect("Failed to convert to float") {
98 return Ok(Array1::from_elem(
100 array.len(),
101 F::from(0.5).expect("Failed to convert constant to float"),
102 ));
103 }
104
105 let minarray = Array1::from_elem(array.len(), min);
107 let normalized = F::simd_sub(&array.view(), &minarray.view());
108 let rangearray = Array1::from_elem(array.len(), range);
109 let result = F::simd_div(&normalized.view(), &rangearray.view());
110
111 Ok(result)
112}
113
114#[allow(dead_code)]
116pub fn simd_zscore_normalize_1d<F>(array: &Array1<F>) -> Result<Array1<F>>
117where
118 F: Float + NumCast + SimdUnifiedOps,
119{
120 if array.is_empty() {
121 return Err(TransformError::InvalidInput(
122 "Input array is empty".to_string(),
123 ));
124 }
125
126 let mean = F::simd_mean(&array.view());
127 let n = F::from(array.len()).expect("Operation failed");
128
129 let meanarray = Array1::from_elem(array.len(), mean);
131 let centered = F::simd_sub(&array.view(), &meanarray.view());
132 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
133 let variance = F::simd_sum(&squared.view()) / n;
134 let std_dev = variance.sqrt();
135
136 if std_dev <= F::from(EPSILON).expect("Failed to convert to float") {
137 return Ok(Array1::zeros(array.len()));
139 }
140
141 let stdarray = Array1::from_elem(array.len(), std_dev);
143 let result = F::simd_div(¢ered.view(), &stdarray.view());
144
145 Ok(result)
146}
147
148#[allow(dead_code)]
150pub fn simd_l2_normalize_1d<F>(array: &Array1<F>) -> Result<Array1<F>>
151where
152 F: Float + NumCast + SimdUnifiedOps,
153{
154 if array.is_empty() {
155 return Err(TransformError::InvalidInput(
156 "Input array is empty".to_string(),
157 ));
158 }
159
160 let l2_norm = F::simd_norm(&array.view());
161
162 if l2_norm <= F::from(EPSILON).expect("Failed to convert to float") {
163 return Ok(Array1::zeros(array.len()));
165 }
166
167 let normarray = Array1::from_elem(array.len(), l2_norm);
169 let result = F::simd_div(&array.view(), &normarray.view());
170
171 Ok(result)
172}
173
174#[allow(dead_code)]
176pub fn simd_maxabs_normalize_1d<F>(array: &Array1<F>) -> Result<Array1<F>>
177where
178 F: Float + NumCast + SimdUnifiedOps,
179{
180 if array.is_empty() {
181 return Err(TransformError::InvalidInput(
182 "Input array is empty".to_string(),
183 ));
184 }
185
186 let absarray = F::simd_abs(&array.view());
187 let max_abs = F::simd_max_element(&absarray.view());
188
189 if max_abs <= F::from(EPSILON).expect("Failed to convert to float") {
190 return Ok(Array1::zeros(array.len()));
192 }
193
194 let max_absarray = Array1::from_elem(array.len(), max_abs);
196 let result = F::simd_div(&array.view(), &max_absarray.view());
197
198 Ok(result)
199}
200
201#[allow(dead_code)]
203pub fn simd_normalizearray<S, F>(
204 array: &ArrayBase<S, Ix2>,
205 method: NormalizationMethod,
206 axis: usize,
207) -> Result<Array2<F>>
208where
209 S: Data<Elem = F>,
210 F: Float + NumCast + SimdUnifiedOps,
211{
212 if !array.is_standard_layout() {
213 return Err(TransformError::InvalidInput(
214 "Input array must be in standard memory layout".to_string(),
215 ));
216 }
217
218 if array.ndim() != 2 {
219 return Err(TransformError::InvalidInput(
220 "Only 2D arrays are supported".to_string(),
221 ));
222 }
223
224 if axis >= array.ndim() {
225 return Err(TransformError::InvalidInput(format!(
226 "Invalid axis {} for array with {} dimensions",
227 axis,
228 array.ndim()
229 )));
230 }
231
232 let shape = array.shape();
233 let mut normalized = Array2::zeros((shape[0], shape[1]));
234
235 match method {
236 NormalizationMethod::MinMax => simd_normalize_block_minmax(array, &mut normalized, axis)?,
237 NormalizationMethod::ZScore => simd_normalize_block_zscore(array, &mut normalized, axis)?,
238 NormalizationMethod::L2 => simd_normalize_block_l2(array, &mut normalized, axis)?,
239 NormalizationMethod::MaxAbs => simd_normalize_block_maxabs(array, &mut normalized, axis)?,
240 _ => {
241 return Err(TransformError::InvalidInput(
243 "SIMD implementation not available for this normalization method".to_string(),
244 ));
245 }
246 }
247
248 Ok(normalized)
249}
250
251#[allow(dead_code)]
253fn simd_normalize_block_minmax<S, F>(
254 array: &ArrayBase<S, Ix2>,
255 normalized: &mut Array2<F>,
256 axis: usize,
257) -> Result<()>
258where
259 S: Data<Elem = F>,
260 F: Float + NumCast + SimdUnifiedOps,
261{
262 let shape = array.shape();
263 let block_sizer = AdaptiveBlockSizer::new::<F>(&[shape[0], shape[1]]);
264 let block_size =
265 block_sizer.get_aligned_block_size(if axis == 0 { shape[1] } else { shape[0] });
266
267 if axis == 0 {
268 let mut global_mins = Array1::zeros(shape[1]);
270 let mut global_maxs = Array1::zeros(shape[1]);
271
272 for block_start in (0..shape[1]).step_by(block_size) {
274 let block_end = (block_start + block_size).min(shape[1]);
275
276 for j in block_start..block_end {
277 let col = array.column(j);
278 let colarray = col.to_owned();
279 global_mins[j] = F::simd_min_element(&colarray.view());
280 global_maxs[j] = F::simd_max_element(&colarray.view());
281 }
282 }
283
284 for block_start in (0..shape[1]).step_by(block_size) {
286 let block_end = (block_start + block_size).min(shape[1]);
287
288 for j in block_start..block_end {
289 let col = array.column(j);
290 let colarray = col.to_owned();
291 let range = global_maxs[j] - global_mins[j];
292
293 if range.abs() <= F::from(EPSILON).expect("Failed to convert to float") {
294 for i in 0..shape[0] {
296 normalized[[i, j]] =
297 F::from(0.5).expect("Failed to convert constant to float");
298 }
299 } else {
300 let minarray = Array1::from_elem(shape[0], global_mins[j]);
302 let rangearray = Array1::from_elem(shape[0], range);
303 let centered = F::simd_sub(&colarray.view(), &minarray.view());
304 let norm_col = F::simd_div(¢ered.view(), &rangearray.view());
305
306 for i in 0..shape[0] {
307 normalized[[i, j]] = norm_col[i];
308 }
309 }
310 }
311 }
312 } else {
313 for block_start in (0..shape[0]).step_by(block_size) {
315 let block_end = (block_start + block_size).min(shape[0]);
316
317 for i in block_start..block_end {
318 let row = array.row(i);
319 let rowarray = row.to_owned();
320 let norm_row = simd_minmax_normalize_1d(&rowarray)?;
321
322 for j in 0..shape[1] {
323 normalized[[i, j]] = norm_row[j];
324 }
325 }
326 }
327 }
328 Ok(())
329}
330
331#[allow(dead_code)]
333fn simd_normalize_block_zscore<S, F>(
334 array: &ArrayBase<S, Ix2>,
335 normalized: &mut Array2<F>,
336 axis: usize,
337) -> Result<()>
338where
339 S: Data<Elem = F>,
340 F: Float + NumCast + SimdUnifiedOps,
341{
342 let shape = array.shape();
343 let block_sizer = AdaptiveBlockSizer::new::<F>(&[shape[0], shape[1]]);
344 let block_size =
345 block_sizer.get_aligned_block_size(if axis == 0 { shape[1] } else { shape[0] });
346
347 if axis == 0 {
348 let mut global_means = Array1::zeros(shape[1]);
350 let mut global_stds = Array1::zeros(shape[1]);
351 let n_samples_f = F::from(shape[0]).expect("Failed to convert to float");
352
353 for block_start in (0..shape[1]).step_by(block_size) {
355 let block_end = (block_start + block_size).min(shape[1]);
356
357 for j in block_start..block_end {
358 let col = array.column(j);
359 let colarray = col.to_owned();
360
361 global_means[j] = F::simd_sum(&colarray.view()) / n_samples_f;
363
364 let meanarray = Array1::from_elem(shape[0], global_means[j]);
366 let centered = F::simd_sub(&colarray.view(), &meanarray.view());
367 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
368 let variance = F::simd_sum(&squared.view()) / n_samples_f;
369 global_stds[j] = variance.sqrt();
370
371 if global_stds[j] <= F::from(EPSILON).expect("Failed to convert to float") {
373 global_stds[j] = F::one();
374 }
375 }
376 }
377
378 for block_start in (0..shape[1]).step_by(block_size) {
380 let block_end = (block_start + block_size).min(shape[1]);
381
382 for j in block_start..block_end {
383 let col = array.column(j);
384 let colarray = col.to_owned();
385
386 if global_stds[j] <= F::from(EPSILON).expect("Failed to convert to float") {
387 for i in 0..shape[0] {
389 normalized[[i, j]] = F::zero();
390 }
391 } else {
392 let meanarray = Array1::from_elem(shape[0], global_means[j]);
394 let stdarray = Array1::from_elem(shape[0], global_stds[j]);
395 let centered = F::simd_sub(&colarray.view(), &meanarray.view());
396 let norm_col = F::simd_div(¢ered.view(), &stdarray.view());
397
398 for i in 0..shape[0] {
399 normalized[[i, j]] = norm_col[i];
400 }
401 }
402 }
403 }
404 } else {
405 for block_start in (0..shape[0]).step_by(block_size) {
407 let block_end = (block_start + block_size).min(shape[0]);
408
409 for i in block_start..block_end {
410 let row = array.row(i);
411 let rowarray = row.to_owned();
412 let norm_row = simd_zscore_normalize_1d(&rowarray)?;
413
414 for j in 0..shape[1] {
415 normalized[[i, j]] = norm_row[j];
416 }
417 }
418 }
419 }
420 Ok(())
421}
422
423#[allow(dead_code)]
425fn simd_normalize_block_l2<S, F>(
426 array: &ArrayBase<S, Ix2>,
427 normalized: &mut Array2<F>,
428 axis: usize,
429) -> Result<()>
430where
431 S: Data<Elem = F>,
432 F: Float + NumCast + SimdUnifiedOps,
433{
434 let shape = array.shape();
435 let block_sizer = AdaptiveBlockSizer::new::<F>(&[shape[0], shape[1]]);
436 let block_size =
437 block_sizer.get_aligned_block_size(if axis == 0 { shape[1] } else { shape[0] });
438
439 if axis == 0 {
440 for block_start in (0..shape[1]).step_by(block_size) {
442 let block_end = (block_start + block_size).min(shape[1]);
443
444 for j in block_start..block_end {
445 let col = array.column(j);
446 let colarray = col.to_owned();
447 let norm_col = simd_l2_normalize_1d(&colarray)?;
448
449 for i in 0..shape[0] {
450 normalized[[i, j]] = norm_col[i];
451 }
452 }
453 }
454 } else {
455 for block_start in (0..shape[0]).step_by(block_size) {
457 let block_end = (block_start + block_size).min(shape[0]);
458
459 for i in block_start..block_end {
460 let row = array.row(i);
461 let rowarray = row.to_owned();
462 let l2_norm = F::simd_norm(&rowarray.view());
463
464 if l2_norm <= F::from(EPSILON).expect("Failed to convert to float") {
465 for j in 0..shape[1] {
467 normalized[[i, j]] = F::zero();
468 }
469 } else {
470 let normarray = Array1::from_elem(shape[1], l2_norm);
472 let norm_row = F::simd_div(&rowarray.view(), &normarray.view());
473
474 for j in 0..shape[1] {
475 normalized[[i, j]] = norm_row[j];
476 }
477 }
478 }
479 }
480 }
481 Ok(())
482}
483
484#[allow(dead_code)]
486fn simd_normalize_block_maxabs<S, F>(
487 array: &ArrayBase<S, Ix2>,
488 normalized: &mut Array2<F>,
489 axis: usize,
490) -> Result<()>
491where
492 S: Data<Elem = F>,
493 F: Float + NumCast + SimdUnifiedOps,
494{
495 let shape = array.shape();
496 let block_sizer = AdaptiveBlockSizer::new::<F>(&[shape[0], shape[1]]);
497 let block_size =
498 block_sizer.get_aligned_block_size(if axis == 0 { shape[1] } else { shape[0] });
499
500 if axis == 0 {
501 for block_start in (0..shape[1]).step_by(block_size) {
503 let block_end = (block_start + block_size).min(shape[1]);
504
505 for j in block_start..block_end {
506 let col = array.column(j);
507 let colarray = col.to_owned();
508 let norm_col = simd_maxabs_normalize_1d(&colarray)?;
509
510 for i in 0..shape[0] {
511 normalized[[i, j]] = norm_col[i];
512 }
513 }
514 }
515 } else {
516 for block_start in (0..shape[0]).step_by(block_size) {
518 let block_end = (block_start + block_size).min(shape[0]);
519
520 for i in block_start..block_end {
521 let row = array.row(i);
522 let rowarray = row.to_owned();
523 let absarray = F::simd_abs(&rowarray.view());
524 let max_abs = F::simd_max_element(&absarray.view());
525
526 if max_abs <= F::from(EPSILON).expect("Failed to convert to float") {
527 for j in 0..shape[1] {
529 normalized[[i, j]] = F::zero();
530 }
531 } else {
532 let max_absarray = Array1::from_elem(shape[1], max_abs);
534 let norm_row = F::simd_div(&rowarray.view(), &max_absarray.view());
535
536 for j in 0..shape[1] {
537 normalized[[i, j]] = norm_row[j];
538 }
539 }
540 }
541 }
542 }
543 Ok(())
544}
545
546#[allow(dead_code)]
548pub fn simd_normalize_adaptive<S, F>(
549 array: &ArrayBase<S, Ix2>,
550 method: NormalizationMethod,
551 axis: usize,
552) -> Result<Array2<F>>
553where
554 S: Data<Elem = F>,
555 F: Float + NumCast + SimdUnifiedOps,
556{
557 let shape = array.shape();
558 let data_size = shape[0] * shape[1] * std::mem::size_of::<F>();
559
560 if data_size > L2_CACHE_SIZE {
562 simd_normalize_chunked(array, method, axis)
564 } else if shape[0] > shape[1] * 4 {
565 simd_normalize_optimized_tall(array, method, axis)
567 } else if shape[1] > shape[0] * 4 {
568 simd_normalize_optimized_wide(array, method, axis)
570 } else {
571 simd_normalizearray(array, method, axis)
573 }
574}
575
576#[allow(dead_code)]
578pub fn simd_normalize_batch<S, F>(
579 array: &ArrayBase<S, Ix2>,
580 method: NormalizationMethod,
581 axis: usize,
582 batch_size_mb: usize,
583) -> Result<Array2<F>>
584where
585 S: Data<Elem = F>,
586 F: Float + NumCast + SimdUnifiedOps,
587{
588 let shape = array.shape();
589 let element_size = std::mem::size_of::<F>();
590 let max_elements_per_batch = (batch_size_mb * 1024 * 1024) / element_size;
591
592 if shape[0] * shape[1] <= max_elements_per_batch {
593 return simd_normalize_adaptive(array, method, axis);
595 }
596
597 let mut normalized = Array2::zeros((shape[0], shape[1]));
598
599 if axis == 0 {
600 let cols_per_batch = max_elements_per_batch / shape[0];
602 for col_start in (0..shape[1]).step_by(cols_per_batch) {
603 let col_end = (col_start + cols_per_batch).min(shape[1]);
604 let batch_view = array.slice(scirs2_core::ndarray::s![.., col_start..col_end]);
605 let batch_normalized = simd_normalize_adaptive(&batch_view, method, axis)?;
606
607 for (j_local, j_global) in (col_start..col_end).enumerate() {
608 for i in 0..shape[0] {
609 normalized[[i, j_global]] = batch_normalized[[i, j_local]];
610 }
611 }
612 }
613 } else {
614 let rows_per_batch = max_elements_per_batch / shape[1];
616 for row_start in (0..shape[0]).step_by(rows_per_batch) {
617 let row_end = (row_start + rows_per_batch).min(shape[0]);
618 let batch_view = array.slice(scirs2_core::ndarray::s![row_start..row_end, ..]);
619 let batch_normalized = simd_normalize_adaptive(&batch_view, method, axis)?;
620
621 for (i_local, i_global) in (row_start..row_end).enumerate() {
622 for j in 0..shape[1] {
623 normalized[[i_global, j]] = batch_normalized[[i_local, j]];
624 }
625 }
626 }
627 }
628
629 Ok(normalized)
630}
631
632#[allow(dead_code)]
634fn simd_normalize_chunked<S, F>(
635 array: &ArrayBase<S, Ix2>,
636 method: NormalizationMethod,
637 axis: usize,
638) -> Result<Array2<F>>
639where
640 S: Data<Elem = F>,
641 F: Float + NumCast + SimdUnifiedOps,
642{
643 let shape = array.shape();
644 let mut normalized = Array2::zeros((shape[0], shape[1]));
645
646 let block_sizer = AdaptiveBlockSizer::new::<F>(&[shape[0], shape[1]]);
647 let chunk_size = block_sizer.optimal_block_size * 4; if axis == 0 {
650 for chunk_start in (0..shape[1]).step_by(chunk_size) {
652 let chunk_end = (chunk_start + chunk_size).min(shape[1]);
653 let chunk_view = array.slice(scirs2_core::ndarray::s![.., chunk_start..chunk_end]);
654 let chunk_normalized = simd_normalizearray(&chunk_view, method, axis)?;
655
656 for (j_local, j_global) in (chunk_start..chunk_end).enumerate() {
657 for i in 0..shape[0] {
658 normalized[[i, j_global]] = chunk_normalized[[i, j_local]];
659 }
660 }
661 }
662 } else {
663 for chunk_start in (0..shape[0]).step_by(chunk_size) {
665 let chunk_end = (chunk_start + chunk_size).min(shape[0]);
666 let chunk_view = array.slice(scirs2_core::ndarray::s![chunk_start..chunk_end, ..]);
667 let chunk_normalized = simd_normalizearray(&chunk_view, method, axis)?;
668
669 for (i_local, i_global) in (chunk_start..chunk_end).enumerate() {
670 for j in 0..shape[1] {
671 normalized[[i_global, j]] = chunk_normalized[[i_local, j]];
672 }
673 }
674 }
675 }
676
677 Ok(normalized)
678}
679
680#[allow(dead_code)]
682fn simd_normalize_optimized_tall<S, F>(
683 array: &ArrayBase<S, Ix2>,
684 method: NormalizationMethod,
685 axis: usize,
686) -> Result<Array2<F>>
687where
688 S: Data<Elem = F>,
689 F: Float + NumCast + SimdUnifiedOps,
690{
691 if axis == 0 {
693 simd_normalizearray(array, method, axis)
695 } else {
696 let shape = array.shape();
698 let mut normalized = Array2::zeros((shape[0], shape[1]));
699 let small_block_size = 32; for block_start in (0..shape[0]).step_by(small_block_size) {
702 let block_end = (block_start + small_block_size).min(shape[0]);
703
704 for i in block_start..block_end {
705 let row = array.row(i);
706 let rowarray = row.to_owned();
707 let norm_row = match method {
708 NormalizationMethod::MinMax => simd_minmax_normalize_1d(&rowarray)?,
709 NormalizationMethod::ZScore => simd_zscore_normalize_1d(&rowarray)?,
710 NormalizationMethod::L2 => simd_l2_normalize_1d(&rowarray)?,
711 NormalizationMethod::MaxAbs => simd_maxabs_normalize_1d(&rowarray)?,
712 _ => {
713 return Err(TransformError::InvalidInput(
714 "Unsupported normalization method for tall matrix optimization"
715 .to_string(),
716 ))
717 }
718 };
719
720 for j in 0..shape[1] {
721 normalized[[i, j]] = norm_row[j];
722 }
723 }
724 }
725
726 Ok(normalized)
727 }
728}
729
730#[allow(dead_code)]
732fn simd_normalize_optimized_wide<S, F>(
733 array: &ArrayBase<S, Ix2>,
734 method: NormalizationMethod,
735 axis: usize,
736) -> Result<Array2<F>>
737where
738 S: Data<Elem = F>,
739 F: Float + NumCast + SimdUnifiedOps,
740{
741 if axis == 1 {
743 simd_normalizearray(array, method, axis)
745 } else {
746 let shape = array.shape();
748 let mut normalized = Array2::zeros((shape[0], shape[1]));
749 let wide_block_size = 128; for block_start in (0..shape[1]).step_by(wide_block_size) {
752 let block_end = (block_start + wide_block_size).min(shape[1]);
753
754 for j in block_start..block_end {
755 let col = array.column(j);
756 let colarray = col.to_owned();
757 let norm_col = match method {
758 NormalizationMethod::MinMax => simd_minmax_normalize_1d(&colarray)?,
759 NormalizationMethod::ZScore => simd_zscore_normalize_1d(&colarray)?,
760 NormalizationMethod::L2 => simd_l2_normalize_1d(&colarray)?,
761 NormalizationMethod::MaxAbs => simd_maxabs_normalize_1d(&colarray)?,
762 _ => {
763 return Err(TransformError::InvalidInput(
764 "Unsupported normalization method for wide matrix optimization"
765 .to_string(),
766 ))
767 }
768 };
769
770 for i in 0..shape[0] {
771 normalized[[i, j]] = norm_col[i];
772 }
773 }
774 }
775
776 Ok(normalized)
777 }
778}