ferrolearn-preprocess 0.5.0

Preprocessing transformers for the ferrolearn ML framework
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
//! Robust scaler: median and IQR-based scaling.
//!
//! Each feature is transformed as `(x - median) / IQR` where
//! `IQR = Q75 - Q25`. This scaler is robust to outliers.
//!
//! A zero-IQR column is centered (`x - median`) then divided by an effective
//! scale of 1, matching scikit-learn `_handle_zeros_in_scale`
//! (`_data.py:88`, `:1635`, `:1673-1675`); a constant column therefore maps to 0.
//!
//! NaN is tolerated (ignored in `fit` via `np.nanmedian`/`np.nanpercentile`
//! semantics, passed through `transform`); +/-inf is rejected (sklearn
//! `force_all_finite="allow-nan"`, `_data.py:1601`,`:1665`).
//!
//! Translation target: scikit-learn 1.5.2 `class RobustScaler`
//! (`sklearn/preprocessing/_data.py:1445`) + `robust_scale` (`:1719`). Design:
//! `.design/preprocess/robust_scaler.md`. Tracking: #1247.
//!
//! `## REQ status`
//!
//! | REQ | Status | Anchor |
//! |---|---|---|
//! | REQ-1 per-column median/IQR value match (non-constant) | SHIPPED | `RobustScaler::fit` / `FittedRobustScaler::transform`; sklearn `_data.py:1614`,`:1630-1634`,`:1672-1675` |
//! | REQ-2 zero-IQR / constant column → 0 (center, scale_eff=1) | SHIPPED (#1248) | `transform` `scale_eff = if iqr==0 {1} else {iqr}`; sklearn `_data.py:88`,`:1635` |
//! | REQ-3 InsufficientSamples / ShapeMismatch error contracts | SHIPPED | `fit` / `transform` guards; sklearn `_data.py:1658-1666` |
//! | REQ-11 PyO3 binding (`_RsRobustScaler`) | SHIPPED | `ferrolearn-python/src/extras.rs:1163`, `lib.rs:83` |
//! | REQ-4 quantile_range ctor param + validation | SHIPPED (#1249) | `RobustScaler::quantile_range` field (default `(25,75)`) + `with_quantile_range` validating builder (non-strict `0 <= q_min <= q_max <= 100`, matching sklearn's `if not 0 <= q_min <= q_max <= 100`); `fit` uses `Q(q_max_frac) − Q(q_min_frac)`; sklearn `_data.py:1567`,`:1604-1606`,`:1630` |
//! | REQ-5 with_centering / with_scaling ctor params | SHIPPED (#1250) | `RobustScaler::with_centering`/`with_scaling` fields (default `true`) + `with_with_centering`/`with_with_scaling` builders, threaded into `FittedRobustScaler`; `transform` is conditional `if with_centering: X -= center_` then `if with_scaling: X /= scale_`, mirroring sklearn `_data.py:1672-1675`,`:1616`,`:1640`. R-DEV-4: ferrolearn always materializes `median`/`iqr`; sklearn nulls `center_`/`scale_` when the flag is `False` (flags govern transform APPLICATION so OUTPUT matches sklearn exactly). |
//! | REQ-6 unit_variance ctor param | NOT-STARTED (#1251) | sklearn `_data.py:1636-1638` |
//! | REQ-7 inverse_transform | SHIPPED (#1252) | `FittedRobustScaler::inverse_transform` reconstructs per column `x_orig = x_scaled * scale_ + center_` in sklearn's order (scaling FIRST then centering: `if with_scaling: X *= scale_` then `if with_centering: X += center_`, `_data.py:1706-1708`), using the stored `scale_` (`_handle_zeros_in_scale(iqr)`, so a near-constant column inverts the centered value). `force_all_finite="allow-nan"`: NaN passes through (`nan*scale+center=nan`); +/-inf rejected (`InvalidParameter`, #2200 precedent); `ShapeMismatch` on column-count mismatch. Live-oracle tests (`tests/divergence_robust_scaler.rs`): `req7_inverse_roundtrip`, `req7_inverse_holdout`, `req7_inverse_nan_passthrough`, `req7_inverse_with_centering_false`/`with_scaling_false`, `req7_inverse_inf_rejected`. Consumer: crate re-export + PyO3 `_RsRobustScaler` (`extras.rs:3228`). sklearn `_data.py:1678`,`:1706-1708` |
//! | REQ-8 `robust_scale` free function + axis | SHIPPED (#1253) | `robust_scale` free fn delegating to `RobustScaler` (`axis=0` native, `axis=1` transpose→fit_transform→transpose); sklearn `_data.py:1719`,`:1845-1848` |
//! | REQ-9 NaN tolerance (allow-nan / nanmedian / nanpercentile) | SHIPPED (#1254) | `RobustScaler::fit` filters `is_nan()` out of each column BEFORE the sort + `quantile_sorted` (median = `Q50`, IQR = `Q(q_max)−Q(q_min)`), so center_/iqr/scale_ are computed over the FINITE values only — mirroring sklearn `np.nanmedian(X, axis=0)` (`_data.py:1614`) + per-column `np.nanpercentile(column_data, quantile_range)` (`:1630`) under `force_all_finite="allow-nan"` (`:1601`). An ALL-NaN column collects no finite values -> the sorted slice is EMPTY -> `quantile_sorted` returns `F::nan()` (made total / non-panicking, R-CODE-2) -> center_/iqr/scale_ = NaN (`_handle_zeros_in_scale` leaves NaN since `NaN < 10*eps` is false). NaN inputs pass through `transform` (`(nan-center)/scale = nan`). inf-rejection (allow-nan REJECTS +/-inf, MinMaxScaler #2200 precedent): `fit` AND `transform` return `InvalidParameter` ("Input X contains infinity...") on any `is_infinite()` element. Finite-data behavior is byte-identical (same sort + linear-interp quantile; REQ-1/#2204 green guards unchanged). Live-oracle tests (`tests/divergence_robust_scaler.rs`): `req9_nan_fit_single_column_ignored` (center_=4.0/scale_=3.0/transform `[[-1],[nan],[-1/3],[1/3],[1]]`), `req9_nan_fit_multi_column_scattered`, `req9_all_nan_column_yields_nan_no_panic`, `req9_with_centering_false_nan`, `req9_with_scaling_false_nan`, `inf_rejected_fit`, `inf_rejected_transform`, `nan_only_still_fits`. Non-test consumer: PyO3 `_RsRobustScaler` (`ferrolearn-python/src/extras.rs:3228`, registered `lib.rs:94`) marshals `fit`/`transform` over `FittedRobustScaler<f64>` + `FittedPipelineTransformer` impl + crate re-export. **f32 caveat (#2206):** numpy `np.nanpercentile` UPCASTS float32 input to float64 for the linear interpolation (returning float64), so sklearn's `scale_`/`center_` are float64 even for f32 input; ferrolearn's `quantile_sorted::<F>` interpolates in `F`, so on f32 a non-trivial interpolation fraction at larger magnitudes diverges (~3e-4 abs). Same class as StandardScaler #2205 / OPTICS #2195 (generic-F vs sklearn-float64-upcast); the f64 path is bit-exact (2000-fixture fuzz, worst diff 7e-15). Tracked #2206, pinned `#[ignore]` in `tests/divergence_robust_scaler.rs::divergence_f32_nanpercentile_upcasts_to_float64`. Future fix: interpolate the quantile in f64 regardless of `F`. sklearn `_data.py:1601`,`:1614`,`:1630`,`:1665` |
//! | REQ-10 center_ / scale_ attribute names + _handle_zeros scale_ | SHIPPED (#1255) | `FittedRobustScaler::center` (= `median`) / `scale` (= `scale_` field = `_handle_zeros_in_scale(iqr)`, `1.0` on zero-IQR columns); `fit` sets `scale_ = iqr.mapv(\|v\| if v==0 {1} else {v})`; sklearn `_data.py:1505-1514`,`:1634-1635` |
//! | REQ-12 copy ctor param + in-place semantics | NOT-STARTED (#1256) | sklearn `_data.py:1568`,`:1661` |
//! | REQ-13 sparse CSC/CSR support | NOT-STARTED (#1257) | sklearn `_data.py:1609-1612`,`:1668-1670` |
//! | REQ-14 get_feature_names_out / n_features_in_ | NOT-STARTED (#1258) | sklearn `OneToOneFeatureMixin` |
//! | REQ-15 ferray substrate | NOT-STARTED (#1259) | R-SUBSTRATE |

use ferrolearn_core::error::FerroError;
use ferrolearn_core::pipeline::{FittedPipelineTransformer, PipelineTransformer};
use ferrolearn_core::traits::{Fit, FitTransform, Transform};
use ndarray::{Array1, Array2};
use num_traits::Float;

// ---------------------------------------------------------------------------
// Helper: compute quantile of a sorted slice
// ---------------------------------------------------------------------------

/// Compute the `q`-th quantile (0.0–1.0) of a sorted f64 slice using numpy's
/// default `'linear'` interpolation at the virtual index `(n-1)*q`, **in f64**.
///
/// The input column values are pre-upcast to `f64` (`to_f64()`), the virtual
/// index, fraction, and the interpolation `sorted[lo] + frac*(sorted[hi]-sorted[lo])`
/// are all computed in f64, and the result is f64 — matching numpy's
/// `np.nanpercentile`/`np.nanmedian`, which UPCASTS float32 input to float64 for
/// the interpolation and returns a float64 result (`_data.py:1614`,`:1630`,
/// `:1634-1635`; #2206). For f64 input this is byte-identical to the prior
/// in-`F` path (the upcast is a no-op); for f32 input the fraction no longer
/// rounds in f32, so `scale_`/`center_` match sklearn's float64 nanpercentile.
///
/// Total / non-panicking (R-CODE-2): an EMPTY slice returns `f64::NAN` rather
/// than indexing out of bounds. This is the all-NaN-column path — sklearn's
/// `np.nanpercentile` / `np.nanmedian` over a column with no finite values also
/// yields `nan`. Finite, non-empty input is unchanged.
fn quantile_sorted(sorted: &[f64], q: f64) -> f64 {
    let n = sorted.len();
    if n == 0 {
        // Empty slice (an all-NaN column once NaNs are filtered out): no finite
        // value to interpolate -> NaN, matching np.nanpercentile([], q) -> nan.
        return f64::NAN;
    }
    if n == 1 {
        return sorted[0];
    }
    let idx = q * (n - 1) as f64;
    let lo = idx.floor() as usize;
    let hi = idx.ceil() as usize;
    if lo == hi {
        return sorted[lo];
    }
    let frac = idx - lo as f64;
    sorted[lo] + (sorted[hi] - sorted[lo]) * frac
}

// ---------------------------------------------------------------------------
// RobustScaler (unfitted)
// ---------------------------------------------------------------------------

/// An unfitted robust scaler.
///
/// Calling [`Fit::fit`] learns the per-column medians and interquartile ranges
/// (IQR = Q75 − Q25) and returns a [`FittedRobustScaler`] that can transform
/// new data.
///
/// A zero-IQR column is centered then divided by an effective scale of 1
/// (matching scikit-learn `_handle_zeros_in_scale`), so a constant column
/// maps to 0.
///
/// # Examples
///
/// ```
/// use ferrolearn_preprocess::RobustScaler;
/// use ferrolearn_core::traits::{Fit, Transform};
/// use ndarray::array;
///
/// let scaler = RobustScaler::<f64>::new();
/// let x = array![[1.0, 10.0], [2.0, 20.0], [3.0, 30.0], [100.0, 40.0]];
/// let fitted = scaler.fit(&x, &()).unwrap();
/// let scaled = fitted.transform(&x).unwrap();
/// ```
#[derive(Debug, Clone)]
pub struct RobustScaler<F> {
    /// The `(q_min, q_max)` percentile pair (each in `0..=100`) whose difference
    /// `Q(q_max) − Q(q_min)` defines the per-column scale. Defaults to
    /// `(25.0, 75.0)` (the interquartile range), mirroring sklearn
    /// `RobustScaler(quantile_range=(25.0, 75.0))` (`_data.py:1567`).
    pub quantile_range: (F, F),
    /// Whether to center the data (subtract the per-column median) on transform.
    ///
    /// Mirrors sklearn `RobustScaler(with_centering=True)` (`_data.py:1616`):
    /// when `false`, [`Transform::transform`] does NOT subtract the median
    /// (`if self.with_centering: X -= self.center_`, `_data.py:1672-1673`).
    /// Default `true`.
    pub with_centering: bool,
    /// Whether to scale the data (divide by the per-column IQR) on transform.
    ///
    /// Mirrors sklearn `RobustScaler(with_scaling=True)` (`_data.py:1640`):
    /// when `false`, [`Transform::transform`] does NOT divide by the scale
    /// (`if self.with_scaling: X /= self.scale_`, `_data.py:1674-1675`).
    /// Default `true`.
    pub with_scaling: bool,
    _marker: std::marker::PhantomData<F>,
}

impl<F: Float + Send + Sync + 'static> RobustScaler<F> {
    /// Create a new `RobustScaler` with the default quantile range `(25.0, 75.0)`
    /// (the interquartile range).
    #[must_use]
    pub fn new() -> Self {
        // `F::from` on these in-range literals is infallible for f32/f64; fall
        // back to (25, 75) via integer `from` (also infallible) if it ever isn't.
        let q_min = F::from(25.0).unwrap_or_else(|| F::from(25u8).unwrap_or_else(F::zero));
        let q_max = F::from(75.0).unwrap_or_else(|| F::from(75u8).unwrap_or_else(F::zero));
        Self {
            quantile_range: (q_min, q_max),
            with_centering: true,
            with_scaling: true,
            _marker: std::marker::PhantomData,
        }
    }

    /// Set whether to center (subtract the median) on transform, returning the
    /// reconfigured scaler.
    ///
    /// Mirrors sklearn's `with_centering` constructor parameter
    /// (`_data.py:1616`): `false` disables centering
    /// (`if self.with_centering: X -= self.center_`, `_data.py:1672-1673`).
    #[must_use]
    pub fn with_with_centering(mut self, with_centering: bool) -> Self {
        self.with_centering = with_centering;
        self
    }

    /// Set whether to scale (divide by the IQR) on transform, returning the
    /// reconfigured scaler.
    ///
    /// Mirrors sklearn's `with_scaling` constructor parameter (`_data.py:1640`):
    /// `false` disables scaling
    /// (`if self.with_scaling: X /= self.scale_`, `_data.py:1674-1675`).
    #[must_use]
    pub fn with_with_scaling(mut self, with_scaling: bool) -> Self {
        self.with_scaling = with_scaling;
        self
    }

    /// Set the `(q_min, q_max)` percentile pair used to compute the per-column
    /// scale `Q(q_max) − Q(q_min)`, returning the reconfigured scaler.
    ///
    /// Mirrors sklearn's `quantile_range` constructor parameter (`_data.py:1567`)
    /// and its non-strict `0 <= q_min <= q_max <= 100` validation
    /// (`_data.py:1604-1606`, which raises only on `not 0 <= q_min <= q_max <= 100`).
    /// A degenerate equal range (`q_min == q_max`) is accepted: it yields a zero
    /// scale on every column, which the transform then handles via
    /// `_handle_zeros_in_scale` (effective scale 1). The median (`center_`, `Q50`)
    /// is unaffected by `quantile_range`.
    ///
    /// # Errors
    ///
    /// Returns [`FerroError::InvalidParameter`] unless `0 <= q_min <= q_max <= 100`.
    #[must_use = "with_quantile_range returns a reconfigured scaler; use the returned value"]
    pub fn with_quantile_range(mut self, q_min: F, q_max: F) -> Result<Self, FerroError> {
        let hundred = F::from(100.0).unwrap_or_else(|| F::from(100u8).unwrap_or_else(F::zero));
        if !(q_min >= F::zero() && q_min <= q_max && q_max <= hundred) {
            return Err(FerroError::InvalidParameter {
                name: "quantile_range".into(),
                reason: "must satisfy 0 <= q_min <= q_max <= 100".into(),
            });
        }
        self.quantile_range = (q_min, q_max);
        Ok(self)
    }
}

impl<F: Float + Send + Sync + 'static> Default for RobustScaler<F> {
    fn default() -> Self {
        Self::new()
    }
}

// ---------------------------------------------------------------------------
// FittedRobustScaler
// ---------------------------------------------------------------------------

/// A fitted robust scaler holding per-column medians and IQRs.
///
/// Created by calling [`Fit::fit`] on a [`RobustScaler`].
///
/// # Deviation (R-DEV-4): fitted attributes are always materialized
///
/// sklearn sets `center_` / `scale_` to `None` when the corresponding flag
/// (`with_centering` / `with_scaling`) is `False` (`_data.py:1616`,`:1640`).
/// ferrolearn always materializes `median`/`iqr` regardless of the flags,
/// because the `&Array1<F>` getters cannot return `None` without changing their
/// return type. The `with_centering`/`with_scaling` flags instead govern
/// *application* in [`Transform::transform`] (`if with_centering: X -= center_`,
/// `if with_scaling: X /= scale_`, `_data.py:1672-1675`), so the transform
/// OUTPUT matches sklearn exactly. The getters are intentionally NOT `Option`.
#[derive(Debug, Clone)]
pub struct FittedRobustScaler<F> {
    /// Per-column medians (`center_`) learned during fitting.
    ///
    /// Stored as `F` — the INPUT dtype. sklearn's `center_ = np.nanmedian(X, axis=0)`
    /// (`_data.py:1614`) returns the input dtype (float32 for f32 input, float64 for
    /// f64), UNLIKE `scale_`/`np.nanpercentile` which upcasts to float64 (`:1630`,
    /// `#2206`). The median is still COMPUTED in f64 (np.nanmedian upcasts internally
    /// for the q=50 interpolation), then ROUNDED to `F` here — so the stored value
    /// matches numpy's float32 `center_`. `transform` then does `X(F) -= center_(F)`
    /// (`:1672-1673`) with both operands in `F`, matching sklearn bit-for-bit (#2306).
    /// For `F=f64` `F::from(f64)` is exact → identical to the prior path.
    pub(crate) median: Array1<F>,
    /// Per-column interquartile ranges (Q75 − Q25) learned during fitting (f64).
    pub(crate) iqr: Array1<f64>,
    /// Per-column effective scale: `_handle_zeros_in_scale(iqr)`, i.e. the raw
    /// IQR on non-constant columns and `1.0` on zero-IQR/constant columns (f64).
    ///
    /// Mirrors sklearn `RobustScaler.scale_`
    /// (`_data.py:1634-1635`: `self.scale_ = quantiles[1] - quantiles[0];`
    /// `self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)`).
    pub(crate) scale_: Array1<f64>,
    /// Whether `transform` centers (subtracts the median).
    ///
    /// Copied from the unfitted [`RobustScaler::with_centering`] in [`Fit::fit`];
    /// governs `if with_centering: X -= center_` (`_data.py:1672-1673`).
    pub(crate) with_centering: bool,
    /// Whether `transform` scales (divides by the IQR).
    ///
    /// Copied from the unfitted [`RobustScaler::with_scaling`] in [`Fit::fit`];
    /// governs `if with_scaling: X /= scale_` (`_data.py:1674-1675`).
    pub(crate) with_scaling: bool,
    /// `center_` (`median`) is stored in `F` (sklearn `np.nanmedian` returns the
    /// input dtype, `_data.py:1614`; #2306); `scale_`/`iqr` are f64 (sklearn float64
    /// attributes, np.nanpercentile, #2206). `F` also governs the dtype of the
    /// `transform`/`inverse_transform` I/O arrays.
    pub(crate) _marker: std::marker::PhantomData<F>,
}

impl<F: Float + Send + Sync + 'static> FittedRobustScaler<F> {
    /// Inverse the robust scaling, reconstructing the original feature values.
    ///
    /// Per column `x_orig = x_scaled * scale_ + center_`, applied in sklearn's
    /// order — scaling FIRST, then centering: `if with_scaling: X *= scale_`
    /// then `if with_centering: X += center_` (`_data.py:1706-1708`), the
    /// reverse of [`transform`](Transform::transform)'s center-then-scale. Uses
    /// the stored `scale_` (= `_handle_zeros_in_scale(iqr)`, so a near-constant
    /// column's scale is 1 and its inverse leaves the centered value), matching
    /// sklearn's `X *= self.scale_`.
    ///
    /// `force_all_finite="allow-nan"` (`_data.py`): NaN passes through
    /// (`nan*scale + center = nan`); +/-inf is rejected (#2200 precedent).
    ///
    /// # Errors
    ///
    /// Returns [`FerroError::ShapeMismatch`] if the column count differs from
    /// the fitted feature count, or [`FerroError::InvalidParameter`] if the
    /// input contains +/-inf.
    pub fn inverse_transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
        let n_features = self.median.len();
        if x.ncols() != n_features {
            return Err(FerroError::ShapeMismatch {
                expected: vec![x.nrows(), n_features],
                actual: vec![x.nrows(), x.ncols()],
                context: "FittedRobustScaler::inverse_transform".into(),
            });
        }
        if x.iter().any(|v| v.is_infinite()) {
            return Err(FerroError::InvalidParameter {
                name: "X".into(),
                reason: "Input X contains infinity or a value too large for dtype.".into(),
            });
        }

        let mut out = x.to_owned();
        for (j, mut col) in out.columns_mut().into_iter().enumerate() {
            // `center_` (`median`) is `F` — the input dtype (np.nanmedian,
            // `_data.py:1614`; #2306). Promote to f64 for the per-op arithmetic.
            let med = self.median[j].to_f64().unwrap_or(f64::NAN);
            let scale = self.scale_[j];
            // sklearn `inverse_transform`: `if with_scaling: X *= scale_` then
            // `if with_centering: X += center_` (`_data.py:1706-1708`). `scale_` is
            // f64 (np.nanpercentile, #2206), `center_` is `F` (np.nanmedian, #2306);
            // numpy applies
            // each in-place op as a SEPARATE store back into the `F`-typed array,
            // so the per-element result of EACH op rounds to `F` — two roundings
            // in the REVERSE order of `transform` (multiply-then-round,
            // add-then-round, #2305). For `F=f64` `F::from(f64)` is exact →
            // identical to the prior path; for f32 each step rounds to f32. NaN
            // propagates.
            for v in &mut col {
                if self.with_scaling {
                    *v = F::from(v.to_f64().unwrap_or(f64::NAN) * scale).unwrap_or_else(F::nan);
                }
                if self.with_centering {
                    *v = F::from(v.to_f64().unwrap_or(f64::NAN) + med).unwrap_or_else(F::nan);
                }
            }
        }
        Ok(out)
    }

    /// Return the per-column medians learned during fitting.
    ///
    /// Stored as `F` — the input dtype, matching sklearn's
    /// `center_ = np.nanmedian(X, axis=0)` (`_data.py:1614`), which returns the
    /// input dtype (float32 for f32, float64 for f64), UNLIKE `scale_` /
    /// [`scale`](Self::scale) which stays f64 (#2306). Equal to the prior value for
    /// `F=f64`.
    #[must_use]
    pub fn median(&self) -> &Array1<F> {
        &self.median
    }

    /// Return the per-column IQR values learned during fitting (f64).
    #[must_use]
    pub fn iqr(&self) -> &Array1<f64> {
        &self.iqr
    }

    /// Return the per-column centers (the medians) learned during fitting.
    ///
    /// Mirrors sklearn `RobustScaler.center_` — "The median value for each
    /// feature in the training set" (`_data.py:1505-1514`). Equal to
    /// [`median`](Self::median). `F` — the input dtype, matching
    /// `np.nanmedian(X, axis=0)` (`_data.py:1614`; #2306).
    #[must_use]
    pub fn center(&self) -> &Array1<F> {
        &self.median
    }

    /// Return the per-column scales learned during fitting.
    ///
    /// Mirrors sklearn `RobustScaler.scale_` = `_handle_zeros_in_scale(iqr)`
    /// (`_data.py:1634-1635`, `:88`): equal to the raw [`iqr`](Self::iqr) on
    /// non-constant columns, but `1.0` on zero-IQR/constant columns (so it
    /// differs from [`iqr`](Self::iqr) there). f64 (sklearn float64 attribute,
    /// #2206).
    #[must_use]
    pub fn scale(&self) -> &Array1<f64> {
        &self.scale_
    }
}

// ---------------------------------------------------------------------------
// Trait implementations
// ---------------------------------------------------------------------------

impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, ()> for RobustScaler<F> {
    type Fitted = FittedRobustScaler<F>;
    type Error = FerroError;

    /// Fit the scaler by computing per-column medians and IQRs.
    ///
    /// NaN inputs are tolerated and IGNORED: the per-column median and the
    /// percentile pair are computed over the FINITE values only, mirroring
    /// sklearn's `np.nanmedian` / `np.nanpercentile` under
    /// `force_all_finite="allow-nan"` (`_data.py:1601`,`:1614`,`:1630`). An
    /// all-NaN column yields `center_`/`iqr`/`scale_` = NaN (no panic).
    ///
    /// # Errors
    ///
    /// Returns [`FerroError::InsufficientSamples`] if the input has zero rows, or
    /// [`FerroError::InvalidParameter`] if any input element is +/-inf (sklearn
    /// `force_all_finite="allow-nan"` rejects infinity, `_data.py:1601`).
    fn fit(&self, x: &Array2<F>, _y: &()) -> Result<FittedRobustScaler<F>, FerroError> {
        let n_samples = x.nrows();
        if n_samples == 0 {
            return Err(FerroError::InsufficientSamples {
                required: 1,
                actual: 0,
                context: "RobustScaler::fit".into(),
            });
        }
        // sklearn validates X with `force_all_finite="allow-nan"`
        // (`_data.py:1601`): NaN is permitted (ignored by `np.nanmedian` /
        // `np.nanpercentile`), but +/-inf raises ValueError ("Input X contains
        // infinity or a value too large for dtype('...')"). Mirrors the
        // MinMaxScaler #2200 / StandardScaler #2205 precedent (allow-nan rejects
        // inf).
        if x.iter().any(|v| v.is_infinite()) {
            return Err(FerroError::InvalidParameter {
                name: "X".into(),
                reason: "Input X contains infinity or a value too large for dtype.".into(),
            });
        }

        let n_features = x.ncols();
        // `median_arr` holds the f64-computed medians; it is ROUNDED to `F` when
        // stored on the fitted scaler (see below) to match np.nanmedian returning
        // the input dtype (#2306).
        let mut median_arr = Array1::<f64>::zeros(n_features);
        let mut iqr_arr = Array1::zeros(n_features);

        // Convert the F percentiles (0..=100) to the f64 fractions (0..=1) that
        // `quantile_sorted` expects, mirroring sklearn's per-column
        // `np.nanpercentile(column_data, quantile_range)` (`_data.py:1630`).
        let q_min_frac = self.quantile_range.0.to_f64().unwrap_or(25.0) / 100.0;
        let q_max_frac = self.quantile_range.1.to_f64().unwrap_or(75.0) / 100.0;

        for j in 0..n_features {
            // NaN-ignoring median + percentiles over the FINITE values only,
            // mirroring sklearn's `np.nanmedian(X, axis=0)` (`_data.py:1614`) and
            // per-column `np.nanpercentile(column_data, quantile_range)`
            // (`_data.py:1630`) under `force_all_finite="allow-nan"` (`:1601`):
            // NaN entries are removed BEFORE sorting + interpolating. An ALL-NaN
            // column collects no finite values -> the sorted slice is empty ->
            // `quantile_sorted` returns NaN (no panic, no OOB index), matching
            // `np.nanmedian`/`np.nanpercentile` -> nan on an all-NaN column.
            // UPCAST the finite column values to f64 BEFORE sorting +
            // interpolating, mirroring `np.nanpercentile`/`np.nanmedian`, which
            // upcast float32 input to float64 for the interpolation and return a
            // float64 result (`_data.py:1614`,`:1630`,`:1634-1635`; #2206). For
            // `F=f64` `to_f64()` is a no-op → byte-identical to the prior path.
            let mut col: Vec<f64> = x
                .column(j)
                .iter()
                .filter(|v| !v.is_nan())
                .map(|v| v.to_f64().unwrap_or(f64::NAN))
                .collect();
            col.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));

            let med = quantile_sorted(&col, 0.5);
            let q_lo = quantile_sorted(&col, q_min_frac);
            let q_hi = quantile_sorted(&col, q_max_frac);

            median_arr[j] = med;
            // scale_ = Q(q_max) − Q(q_min) (sklearn `_data.py:1634`). An all-NaN
            // column has med/q_lo/q_hi = NaN -> iqr = NaN -> scale_ = NaN below.
            iqr_arr[j] = q_hi - q_lo;
        }

        // scale_ = _handle_zeros_in_scale(iqr): the raw IQR on non-constant
        // columns, 1.0 on near-constant columns. sklearn `_handle_zeros_in_scale`
        // (`_data.py:114-119`, called `:1635`) uses `constant_mask = scale <
        // 10 * finfo(dtype).eps` — a tiny-but-nonzero IQR (NOT just exactly 0) is
        // replaced by 1.0 (#2204). `scale_` is FLOAT64 (np.nanpercentile upcasts,
        // #2206), so the eps is f64 eps regardless of `F`.
        let ten_eps = f64::EPSILON * 10.0;
        let scale_arr = iqr_arr.mapv(|v| if v < ten_eps { 1.0 } else { v });

        // ROUND the f64-computed median to `F` — sklearn's `center_ = np.nanmedian`
        // returns the INPUT dtype (float32 for f32, float64 for f64), so the stored
        // `center_` is `F`-precision; only `scale_` stays float64 (`_data.py:1614`
        // vs `:1630`; #2306). For `F=f64` `F::from(f64)` is exact (no-op); for f32 a
        // non-f32-representable median rounds to the same float32 bits as
        // np.nanmedian, so `transform`'s `X(f32) -= center_(f32)` matches sklearn.
        let median_f = median_arr.mapv(|v| F::from(v).unwrap_or_else(F::nan));

        Ok(FittedRobustScaler {
            median: median_f,
            iqr: iqr_arr,
            scale_: scale_arr,
            with_centering: self.with_centering,
            with_scaling: self.with_scaling,
            _marker: std::marker::PhantomData,
        })
    }
}

impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for FittedRobustScaler<F> {
    type Output = Array2<F>;
    type Error = FerroError;

    /// Transform data by subtracting the median and dividing by the IQR.
    ///
    /// A zero-IQR column uses an effective scale of 1 (matching scikit-learn
    /// `_handle_zeros_in_scale`, `_data.py:88`, `:1635`, `:1673-1675`), so it is
    /// still centered: a constant column maps to 0 and a non-constant zero-IQR
    /// column maps to `x - median`.
    ///
    /// NaN inputs pass through (`(nan - median)/scale = nan`), and an all-NaN
    /// fitted column (whose `median`/`scale_` are NaN) transforms to NaN,
    /// matching sklearn's `allow-nan` contract (`_data.py:1665`,`:1672-1675`).
    ///
    /// # Errors
    ///
    /// Returns [`FerroError::ShapeMismatch`] if the number of columns does not
    /// match the number of features seen during fitting, or
    /// [`FerroError::InvalidParameter`] if any input element is +/-inf (sklearn
    /// `force_all_finite="allow-nan"` rejects infinity, `_data.py:1665`).
    fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
        let n_features = self.median.len();
        if x.ncols() != n_features {
            return Err(FerroError::ShapeMismatch {
                expected: vec![x.nrows(), n_features],
                actual: vec![x.nrows(), x.ncols()],
                context: "FittedRobustScaler::transform".into(),
            });
        }
        // sklearn `transform` validates with `force_all_finite="allow-nan"`
        // (`_data.py:1665`): NaN passes through the conditional center/scale
        // (`(nan - center)/scale = nan`), but +/-inf raises ValueError
        // (MinMaxScaler #2200 precedent; allow-nan rejects inf).
        if x.iter().any(|v| v.is_infinite()) {
            return Err(FerroError::InvalidParameter {
                name: "X".into(),
                reason: "Input X contains infinity or a value too large for dtype.".into(),
            });
        }

        let mut out = x.to_owned();
        for (j, mut col) in out.columns_mut().into_iter().enumerate() {
            // `center_` (`median`) is `F` — the input dtype, matching np.nanmedian
            // (`_data.py:1614`; #2306). Promote to f64 for the per-op arithmetic.
            let med = self.median[j].to_f64().unwrap_or(f64::NAN);
            // Use the STORED `scale_` (= `_handle_zeros_in_scale(iqr)`, computed
            // in `fit`: the raw IQR on non-constant columns, `1.0` on near-constant
            // columns where `iqr < 10*f64::EPSILON`, #2204). `scale_` is f64
            // (np.nanpercentile upcasts f32 -> f64, #2206).
            let scale_eff = self.scale_[j];
            // Conditional center/scale mirroring sklearn `transform`
            // (`_data.py:1672-1675`): `if with_centering: X -= center_` then
            // `if with_scaling: X /= scale_`. sklearn does `X(F) -= center_(F)` —
            // BOTH operands the input dtype `F` (center_ is float32 for f32 input),
            // so the subtraction's f64 result rounds to `F` the SAME as sklearn's
            // f32 in-place subtract: because `med` is already the `F`-rounded
            // median, `v.to_f64() - med` followed by `F::from(..)` reproduces
            // sklearn's `X(f32) -= center_(f32)` bit-for-bit (#2306). `scale_` is
            // f64, applied as a SEPARATE in-place op, again rounding to `F` (#2305).
            // For `F=f64` `F::from(f64)` is exact → byte-identical to the prior
            // path. NaN propagates.
            for v in &mut col {
                if self.with_centering {
                    // X -= center_ (operand is the F-rounded median; round to F)
                    *v = F::from(v.to_f64().unwrap_or(f64::NAN) - med).unwrap_or_else(F::nan);
                }
                if self.with_scaling {
                    // X /= scale_ (round to F)
                    *v = F::from(v.to_f64().unwrap_or(f64::NAN) / scale_eff).unwrap_or_else(F::nan);
                }
            }
        }
        Ok(out)
    }
}

/// Implement `Transform` on the unfitted scaler to satisfy the `FitTransform: Transform`
/// supertrait bound. Calling `transform` on an unfitted scaler always returns an error.
impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for RobustScaler<F> {
    type Output = Array2<F>;
    type Error = FerroError;

    /// Always returns an error — the scaler must be fitted first.
    ///
    /// Use [`Fit::fit`] to produce a [`FittedRobustScaler`], then call
    /// [`Transform::transform`] on that.
    fn transform(&self, _x: &Array2<F>) -> Result<Array2<F>, FerroError> {
        Err(FerroError::InvalidParameter {
            name: "RobustScaler".into(),
            reason: "scaler must be fitted before calling transform; use fit() first".into(),
        })
    }
}

impl<F: Float + Send + Sync + 'static> FitTransform<Array2<F>> for RobustScaler<F> {
    type FitError = FerroError;

    /// Fit the scaler on `x` and return the scaled output in one step.
    ///
    /// # Errors
    ///
    /// Returns an error if fitting fails (e.g., zero rows).
    fn fit_transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
        let fitted = self.fit(x, &())?;
        fitted.transform(x)
    }
}

// ---------------------------------------------------------------------------
// robust_scale (free function)
// ---------------------------------------------------------------------------

/// Standardize a dataset along an axis by centering to the median and scaling
/// by the interquartile range — the functional form of [`RobustScaler`].
///
/// This is the translation of scikit-learn's `robust_scale`
/// (`sklearn/preprocessing/_data.py:1719`). It delegates to [`RobustScaler`]
/// rather than reimplementing the quantile math:
///
/// - `axis == 0` (default in sklearn): independently scale each FEATURE/column —
///   the estimator's native behavior, `s.fit_transform(X)` (`_data.py:1846`).
/// - `axis == 1`: independently scale each SAMPLE/row by transposing first,
///   running the column-wise scaler, then transposing back —
///   `s.fit_transform(X.T).T` (`_data.py:1848`).
///
/// sklearn's `unit_variance` (#1251) and `copy` (#1256) parameters are NOT
/// exposed here; they are tracked as separate REQs.
///
/// # Errors
///
/// Returns [`FerroError::InvalidParameter`] if `axis` is not `0` or `1`, if the
/// `quantile_range` is invalid (propagated from
/// [`RobustScaler::with_quantile_range`]), or [`FerroError::InsufficientSamples`]
/// if the input (or its transpose, for `axis == 1`) has zero rows.
#[must_use = "robust_scale returns the scaled array; use the returned value"]
pub fn robust_scale<F: Float + Send + Sync + 'static>(
    x: &Array2<F>,
    axis: usize,
    with_centering: bool,
    with_scaling: bool,
    quantile_range: (F, F),
) -> Result<Array2<F>, FerroError> {
    if axis != 0 && axis != 1 {
        return Err(FerroError::InvalidParameter {
            name: "axis".into(),
            reason: "must be 0 or 1".into(),
        });
    }

    let scaler = RobustScaler::new()
        .with_with_centering(with_centering)
        .with_with_scaling(with_scaling)
        .with_quantile_range(quantile_range.0, quantile_range.1)?;

    if axis == 0 {
        // Column-wise = the estimator's native behavior (`_data.py:1846`).
        let fitted = scaler.fit(x, &())?;
        fitted.transform(x)
    } else {
        // axis == 1: sklearn's `s.fit_transform(X.T).T` (`_data.py:1848`).
        let xt = x.t().to_owned();
        let fitted = scaler.fit(&xt, &())?;
        let result = fitted.transform(&xt)?;
        Ok(result.t().to_owned())
    }
}

// ---------------------------------------------------------------------------
// Pipeline integration (generic)
// ---------------------------------------------------------------------------

impl<F: Float + Send + Sync + 'static> PipelineTransformer<F> for RobustScaler<F> {
    /// Fit the scaler using the pipeline interface.
    ///
    /// The `y` argument is ignored; it exists only for API compatibility.
    ///
    /// # Errors
    ///
    /// Propagates errors from [`Fit::fit`].
    fn fit_pipeline(
        &self,
        x: &Array2<F>,
        _y: &Array1<F>,
    ) -> Result<Box<dyn FittedPipelineTransformer<F>>, FerroError> {
        let fitted = self.fit(x, &())?;
        Ok(Box::new(fitted))
    }
}

impl<F: Float + Send + Sync + 'static> FittedPipelineTransformer<F> for FittedRobustScaler<F> {
    /// Transform data using the pipeline interface.
    ///
    /// # Errors
    ///
    /// Propagates errors from [`Transform::transform`].
    fn transform_pipeline(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
        self.transform(x)
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use approx::assert_abs_diff_eq;
    use ndarray::array;

    #[test]
    fn test_robust_scaler_basic() {
        let scaler = RobustScaler::<f64>::new();
        // Symmetric distribution: median = 3, Q25 = 2, Q75 = 4, IQR = 2
        let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
        let fitted = scaler.fit(&x, &()).unwrap();
        assert_abs_diff_eq!(fitted.median()[0], 3.0, epsilon = 1e-10);
        assert_abs_diff_eq!(fitted.iqr()[0], 2.0, epsilon = 1e-10);

        let scaled = fitted.transform(&x).unwrap();
        // Median should be 0 after scaling
        assert_abs_diff_eq!(scaled[[2, 0]], 0.0, epsilon = 1e-10);
    }

    #[test]
    fn test_zero_iqr_column_centered_to_zero() {
        // A constant (zero-IQR) column is centered then divided by an effective
        // scale of 1 (sklearn `_handle_zeros_in_scale`), so it maps to 0.
        //
        // LIVE ORACLE (sklearn 1.5.2, run from /tmp):
        //   RobustScaler().fit_transform([[7.,1.],[7.,2.],[7.,3.]]).tolist()
        //   == [[0.0, -1.0], [0.0, 0.0], [0.0, 1.0]]
        let scaler = RobustScaler::<f64>::new();
        // Column 0 is constant: IQR = 0
        let x = array![[7.0, 1.0], [7.0, 2.0], [7.0, 3.0]];
        let fitted = scaler.fit(&x, &()).unwrap();
        assert_abs_diff_eq!(fitted.iqr()[0], 0.0, epsilon = 1e-15);
        let scaled = fitted.transform(&x).unwrap();
        // Constant column maps to 0 (centered, effective scale 1).
        for i in 0..3 {
            assert_abs_diff_eq!(scaled[[i, 0]], 0.0, epsilon = 1e-10);
        }
    }

    #[test]
    fn test_outlier_robustness() {
        let scaler = RobustScaler::<f64>::new();
        // Add a large outlier; median should not shift much
        let x = array![[1.0], [2.0], [3.0], [4.0], [1000.0]];
        let fitted = scaler.fit(&x, &()).unwrap();
        // Median of sorted [1,2,3,4,1000] = 3.0
        assert_abs_diff_eq!(fitted.median()[0], 3.0, epsilon = 1e-10);
    }

    #[test]
    fn test_fit_transform_equivalence() {
        let scaler = RobustScaler::<f64>::new();
        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
        let via_fit_transform = scaler.fit_transform(&x).unwrap();
        let fitted = scaler.fit(&x, &()).unwrap();
        let via_separate = fitted.transform(&x).unwrap();
        for (a, b) in via_fit_transform.iter().zip(via_separate.iter()) {
            assert_abs_diff_eq!(a, b, epsilon = 1e-15);
        }
    }

    #[test]
    fn test_shape_mismatch_error() {
        let scaler = RobustScaler::<f64>::new();
        let x_train = array![[1.0, 2.0], [3.0, 4.0]];
        let fitted = scaler.fit(&x_train, &()).unwrap();
        let x_bad = array![[1.0, 2.0, 3.0]];
        assert!(fitted.transform(&x_bad).is_err());
    }

    #[test]
    fn test_insufficient_samples_error() {
        let scaler = RobustScaler::<f64>::new();
        let x: Array2<f64> = Array2::zeros((0, 3));
        assert!(scaler.fit(&x, &()).is_err());
    }

    // -----------------------------------------------------------------------
    // REQ-4: quantile_range constructor parameter (sklearn _data.py:1567,
    // :1604-1606, :1630-1634). Live sklearn 1.5.2 oracle values (R-CHAR-3),
    // computed from /tmp, never copied from the ferrolearn side.
    //
    // Oracle X: col0 = [1,2,...,10], col1 = [100,200,...,1000] (10 rows).
    //   RobustScaler().fit(X)
    //     -> center_ = [5.5, 550.0], scale_ = [4.5, 450.0]    (IQR 25-75)
    //   RobustScaler(quantile_range=(10.,90.)).fit(X)
    //     -> center_ = [5.5, 550.0], scale_ = [7.2, 720.0]
    //     transform([[1.,100.]]) -> [[-0.625, -0.625]]  (= (1-5.5)/7.2)
    // -----------------------------------------------------------------------

    /// Default scaler is byte-identical to the prior default (25/75) behavior:
    /// `center_ ≈ [5.5, 550]`, `scale_ ≈ [4.5, 450]`.
    #[test]
    fn robust_quantile_range_default_unchanged() -> Result<(), FerroError> {
        let mut x = Array2::<f64>::zeros((10, 2));
        for i in 0..10 {
            x[[i, 0]] = (i + 1) as f64;
            x[[i, 1]] = ((i + 1) * 100) as f64;
        }
        let fitted = RobustScaler::<f64>::new().fit(&x, &())?;
        assert_abs_diff_eq!(fitted.median()[0], 5.5, epsilon = 1e-9);
        assert_abs_diff_eq!(fitted.median()[1], 550.0, epsilon = 1e-9);
        assert_abs_diff_eq!(fitted.iqr()[0], 4.5, epsilon = 1e-9);
        assert_abs_diff_eq!(fitted.iqr()[1], 450.0, epsilon = 1e-9);
        Ok(())
    }

    /// `with_quantile_range(10, 90)` matches the live sklearn oracle:
    /// `scale_ ≈ [7.2, 720]`, `center_ ≈ [5.5, 550]`,
    /// `transform(X[:1]) ≈ [[-0.625, -0.625]]`.
    #[test]
    fn robust_quantile_range_10_90_matches_sklearn() -> Result<(), FerroError> {
        let mut x = Array2::<f64>::zeros((10, 2));
        for i in 0..10 {
            x[[i, 0]] = (i + 1) as f64;
            x[[i, 1]] = ((i + 1) * 100) as f64;
        }
        let fitted = RobustScaler::<f64>::new()
            .with_quantile_range(10.0, 90.0)?
            .fit(&x, &())?;
        assert_abs_diff_eq!(fitted.median()[0], 5.5, epsilon = 1e-9);
        assert_abs_diff_eq!(fitted.median()[1], 550.0, epsilon = 1e-9);
        assert_abs_diff_eq!(fitted.iqr()[0], 7.2, epsilon = 1e-9);
        assert_abs_diff_eq!(fitted.iqr()[1], 720.0, epsilon = 1e-9);

        let row0 = array![[1.0, 100.0]];
        let scaled = fitted.transform(&row0)?;
        assert_abs_diff_eq!(scaled[[0, 0]], -0.625, epsilon = 1e-9);
        assert_abs_diff_eq!(scaled[[0, 1]], -0.625, epsilon = 1e-9);
        Ok(())
    }

    /// Validation rejects ranges outside the non-strict `0 <= q_min <= q_max <= 100`
    /// (sklearn `_data.py:1604-1606`: raises only on `not 0 <= q_min <= q_max <= 100`).
    #[test]
    fn robust_quantile_range_validation_rejects() {
        // q_min > q_max (live sklearn: (75,25) -> ValueError "Invalid quantile range")
        assert!(matches!(
            RobustScaler::<f64>::new().with_quantile_range(75.0, 25.0),
            Err(FerroError::InvalidParameter { .. })
        ));
        // q_min < 0
        assert!(matches!(
            RobustScaler::<f64>::new().with_quantile_range(-1.0, 50.0),
            Err(FerroError::InvalidParameter { .. })
        ));
        // q_max > 100
        assert!(matches!(
            RobustScaler::<f64>::new().with_quantile_range(50.0, 110.0),
            Err(FerroError::InvalidParameter { .. })
        ));
    }

    /// A degenerate equal range (`q_min == q_max`) is accepted, matching live
    /// sklearn 1.5.2: `RobustScaler(quantile_range=(50.,50.))` -> OK with
    /// `scale_ = [1.0, 1.0]` (zero IQR -> `_handle_zeros_in_scale` -> 1) and
    /// `center_ = [5.5, 550.0]` on the oracle X (col0 `[1..10]`, col1 `[100..1000]`).
    #[test]
    fn robust_quantile_range_equal_bounds_accepted() -> Result<(), FerroError> {
        let mut x = Array2::<f64>::zeros((10, 2));
        for i in 0..10 {
            x[[i, 0]] = (i + 1) as f64;
            x[[i, 1]] = ((i + 1) * 100) as f64;
        }
        let fitted = RobustScaler::<f64>::new()
            .with_quantile_range(50.0, 50.0)?
            .fit(&x, &())?;
        // center_ == median == [5.5, 550.0] (unaffected by quantile_range).
        assert_abs_diff_eq!(fitted.median()[0], 5.5, epsilon = 1e-9);
        assert_abs_diff_eq!(fitted.median()[1], 550.0, epsilon = 1e-9);
        // Raw IQR (Q50 - Q50) is exactly 0 on every column.
        assert_abs_diff_eq!(fitted.iqr()[0], 0.0, epsilon = 1e-12);
        assert_abs_diff_eq!(fitted.iqr()[1], 0.0, epsilon = 1e-12);
        // Transform uses the effective scale 1 (_handle_zeros), so scale_ == [1, 1]:
        // the centered values are not rescaled.
        let scaled = fitted.transform(&x)?;
        // Row with col0 == 1 (i = 0): (1 - 5.5) / 1 = -4.5; (100 - 550) / 1 = -450.
        assert_abs_diff_eq!(scaled[[0, 0]], -4.5, epsilon = 1e-9);
        assert_abs_diff_eq!(scaled[[0, 1]], -450.0, epsilon = 1e-9);
        Ok(())
    }

    // -----------------------------------------------------------------------
    // REQ-5: with_centering / with_scaling constructor parameters
    // (sklearn _data.py:1616, :1640, :1672-1675). Live sklearn 1.5.2 oracle
    // (R-CHAR-3), X col0 = [1,3,5,7,9], col1 = [100,300,500,700,900] (5 rows):
    //   center_ = [5, 500], scale_ = [4, 400].
    //   RobustScaler(with_centering=True,  with_scaling=True ).transform(X[:2])
    //     -> [[-1, -1], [-0.5, -0.5]]
    //   RobustScaler(with_centering=True,  with_scaling=False).transform(X[:2])
    //     -> [[-4, -400], [-2, -200]]    (center only)
    //   RobustScaler(with_centering=False, with_scaling=True ).transform(X[:2])
    //     -> [[0.25, 0.25], [0.75, 0.75]] (scale only)
    //   RobustScaler(with_centering=False, with_scaling=False).transform(X[:2])
    //     -> [[1, 100], [3, 300]]         (identity)
    // -----------------------------------------------------------------------

    fn oracle_x() -> Array2<f64> {
        array![
            [1.0, 100.0],
            [3.0, 300.0],
            [5.0, 500.0],
            [7.0, 700.0],
            [9.0, 900.0]
        ]
    }

    /// Default (with_centering=true, with_scaling=true) matches the live sklearn
    /// oracle AND is byte-identical to a pre-existing default fit.
    #[test]
    fn robust_with_centering_scaling_default_matches_sklearn() -> Result<(), FerroError> {
        let x = oracle_x();
        let fitted = RobustScaler::<f64>::new().fit(&x, &())?;
        let head = x.slice(ndarray::s![0..2, ..]).to_owned();
        let scaled = fitted.transform(&head)?;

        let expected = array![[-1.0, -1.0], [-0.5, -0.5]];
        for (a, b) in scaled.iter().zip(expected.iter()) {
            assert_abs_diff_eq!(a, b, epsilon = 1e-9);
        }

        // Regression guard: default config must be BYTE-identical to a plain
        // default fit produced the same way.
        let default_fitted = RobustScaler::<f64>::new().fit(&x, &())?;
        let default_scaled = default_fitted.transform(&head)?;
        for (a, b) in scaled.iter().zip(default_scaled.iter()) {
            assert_eq!(a.to_bits(), b.to_bits());
        }
        Ok(())
    }

    /// `with_with_scaling(false)` (T,F) centers only.
    #[test]
    fn robust_with_scaling_false() -> Result<(), FerroError> {
        let x = oracle_x();
        let fitted = RobustScaler::<f64>::new()
            .with_with_scaling(false)
            .fit(&x, &())?;
        let head = x.slice(ndarray::s![0..2, ..]).to_owned();
        let scaled = fitted.transform(&head)?;

        let expected = array![[-4.0, -400.0], [-2.0, -200.0]];
        for (a, b) in scaled.iter().zip(expected.iter()) {
            assert_abs_diff_eq!(a, b, epsilon = 1e-9);
        }
        Ok(())
    }

    /// `with_with_centering(false)` (F,T) scales only.
    #[test]
    fn robust_with_centering_false() -> Result<(), FerroError> {
        let x = oracle_x();
        let fitted = RobustScaler::<f64>::new()
            .with_with_centering(false)
            .fit(&x, &())?;
        let head = x.slice(ndarray::s![0..2, ..]).to_owned();
        let scaled = fitted.transform(&head)?;

        let expected = array![[0.25, 0.25], [0.75, 0.75]];
        for (a, b) in scaled.iter().zip(expected.iter()) {
            assert_abs_diff_eq!(a, b, epsilon = 1e-9);
        }
        Ok(())
    }

    /// Both `false` (F,F) is the identity.
    #[test]
    fn robust_both_false_identity() -> Result<(), FerroError> {
        let x = oracle_x();
        let fitted = RobustScaler::<f64>::new()
            .with_with_centering(false)
            .with_with_scaling(false)
            .fit(&x, &())?;
        let head = x.slice(ndarray::s![0..2, ..]).to_owned();
        let scaled = fitted.transform(&head)?;

        let expected = array![[1.0, 100.0], [3.0, 300.0]];
        for (a, b) in scaled.iter().zip(expected.iter()) {
            assert_abs_diff_eq!(a, b, epsilon = 1e-12);
        }
        Ok(())
    }

    // -----------------------------------------------------------------------
    // REQ-8: robust_scale free function + axis (sklearn _data.py:1719,
    // :1845-1848). Live sklearn 1.5.2 oracle (R-CHAR-3), computed from /tmp,
    // never copied from the ferrolearn side.
    //
    // Oracle X: col0 = [1,3,5,7,9], col1 = [100,300,500,700,900] (5 rows).
    //   from sklearn.preprocessing import robust_scale
    //   robust_scale(X, axis=0).tolist()
    //     -> [[-1,-1],[-0.5,-0.5],[0,0],[0.5,0.5],[1,1]]
    //   robust_scale(X, axis=0, with_centering=False).tolist()
    //     -> [[0.25,0.25],[0.75,0.75],[1.25,1.25],[1.75,1.75],[2.25,2.25]]
    //   robust_scale([[1,2,3,4,5]], axis=1).tolist()
    //     -> [[-1,-0.5,0,0.5,1]]
    //   robust_scale(X, axis=0, quantile_range=(10.,90.)).tolist()
    //     -> [[-0.625,-0.625],[-0.3125,-0.3125],[0,0],[0.3125,0.3125],[0.625,0.625]]
    // -----------------------------------------------------------------------

    /// `robust_scale(X, axis=0)` (defaults) matches the live sklearn oracle.
    #[test]
    fn robust_scale_axis0_default_matches_sklearn() -> Result<(), FerroError> {
        let x = oracle_x();
        let scaled = robust_scale(&x, 0, true, true, (25.0, 75.0))?;
        let expected = array![
            [-1.0, -1.0],
            [-0.5, -0.5],
            [0.0, 0.0],
            [0.5, 0.5],
            [1.0, 1.0]
        ];
        for (a, b) in scaled.iter().zip(expected.iter()) {
            assert_abs_diff_eq!(a, b, epsilon = 1e-9);
        }
        Ok(())
    }

    /// `robust_scale(X, axis=0, with_centering=False)` scales only.
    #[test]
    fn robust_scale_no_centering_matches_sklearn() -> Result<(), FerroError> {
        let x = oracle_x();
        let scaled = robust_scale(&x, 0, false, true, (25.0, 75.0))?;
        let expected = array![
            [0.25, 0.25],
            [0.75, 0.75],
            [1.25, 1.25],
            [1.75, 1.75],
            [2.25, 2.25]
        ];
        for (a, b) in scaled.iter().zip(expected.iter()) {
            assert_abs_diff_eq!(a, b, epsilon = 1e-9);
        }
        Ok(())
    }

    /// `robust_scale(row, axis=1)` scales each sample/row (transpose path).
    #[test]
    fn robust_scale_axis1_matches_sklearn() -> Result<(), FerroError> {
        let row = array![[1.0, 2.0, 3.0, 4.0, 5.0]];
        let scaled = robust_scale(&row, 1, true, true, (25.0, 75.0))?;
        let expected = array![[-1.0, -0.5, 0.0, 0.5, 1.0]];
        for (a, b) in scaled.iter().zip(expected.iter()) {
            assert_abs_diff_eq!(a, b, epsilon = 1e-9);
        }
        Ok(())
    }

    /// `robust_scale(X, axis=0, quantile_range=(10,90))` matches the oracle.
    #[test]
    fn robust_scale_quantile_range_matches_sklearn() -> Result<(), FerroError> {
        let x = oracle_x();
        let scaled = robust_scale(&x, 0, true, true, (10.0, 90.0))?;
        let expected = array![
            [-0.625, -0.625],
            [-0.3125, -0.3125],
            [0.0, 0.0],
            [0.3125, 0.3125],
            [0.625, 0.625]
        ];
        for (a, b) in scaled.iter().zip(expected.iter()) {
            assert_abs_diff_eq!(a, b, epsilon = 1e-9);
        }
        Ok(())
    }

    /// An out-of-range `axis` returns [`FerroError::InvalidParameter`].
    #[test]
    fn robust_scale_invalid_axis_errors() {
        let x = oracle_x();
        assert!(matches!(
            robust_scale(&x, 2, true, true, (25.0, 75.0)),
            Err(FerroError::InvalidParameter { .. })
        ));
    }

    // -----------------------------------------------------------------------
    // REQ-10: center_ / scale_ attribute names + _handle_zeros scale_
    // (sklearn _data.py:1505-1514, :1634-1635). Live sklearn 1.5.2 oracle
    // (R-CHAR-3), computed from /tmp, never copied from the ferrolearn side.
    //
    // Oracle X: col0 = [1,3,5,7,9], col1 = [5,5,5,5,5] (col1 constant -> IQR 0).
    //   from sklearn.preprocessing import RobustScaler
    //   r = RobustScaler().fit(X)
    //   r.center_.tolist() -> [5.0, 5.0]
    //   r.scale_.tolist()  -> [4.0, 1.0]   (constant col -> _handle_zeros -> 1.0)
    // -----------------------------------------------------------------------

    fn req10_oracle_x() -> Array2<f64> {
        array![[1.0, 5.0], [3.0, 5.0], [5.0, 5.0], [7.0, 5.0], [9.0, 5.0]]
    }

    /// `center()` / `scale()` match the live sklearn `center_` / `scale_`:
    /// `center_ = [5.0, 5.0]`, `scale_ = [4.0, 1.0]` (the constant column's
    /// scale is EXACTLY `1.0` via `_handle_zeros_in_scale`).
    #[test]
    fn robust_center_scale_match_sklearn() -> Result<(), FerroError> {
        let x = req10_oracle_x();
        let fitted = RobustScaler::<f64>::new().fit(&x, &())?;

        assert_abs_diff_eq!(fitted.center()[0], 5.0, epsilon = 1e-9);
        assert_abs_diff_eq!(fitted.center()[1], 5.0, epsilon = 1e-9);

        assert_abs_diff_eq!(fitted.scale()[0], 4.0, epsilon = 1e-12);
        assert_abs_diff_eq!(fitted.scale()[1], 1.0, epsilon = 1e-12);
        // The constant column's scale is EXACTLY 1.0 (_handle_zeros).
        assert_eq!(fitted.scale()[1], 1.0);
        Ok(())
    }

    /// `scale_` differs from the raw `iqr` on a constant column (`iqr==0` but
    /// `scale_==1.0`), is unchanged on a non-constant column, and `center_`
    /// equals `median`.
    #[test]
    fn robust_scale_differs_from_iqr_on_constant_col() -> Result<(), FerroError> {
        let x = req10_oracle_x();
        let fitted = RobustScaler::<f64>::new().fit(&x, &())?;

        // Constant column: raw IQR is 0, but scale_ is handled to 1.0.
        assert_eq!(fitted.iqr()[1], 0.0);
        assert_eq!(fitted.scale()[1], 1.0);

        // Non-constant column: scale_ == raw IQR (unchanged).
        assert_eq!(fitted.scale()[0], fitted.iqr()[0]);

        // center_ == median on every column.
        assert_eq!(fitted.center()[0], fitted.median()[0]);
        assert_eq!(fitted.center()[1], fitted.median()[1]);
        Ok(())
    }
}