1use ghostflow_core::Tensor;
4
5pub struct PolynomialFeatures {
7 pub degree: usize,
8 pub interaction_only: bool,
9 pub include_bias: bool,
10 n_input_features_: usize,
11 n_output_features_: usize,
12 powers_: Vec<Vec<usize>>,
13}
14
15impl PolynomialFeatures {
16 pub fn new(degree: usize) -> Self {
17 PolynomialFeatures {
18 degree,
19 interaction_only: false,
20 include_bias: true,
21 n_input_features_: 0,
22 n_output_features_: 0,
23 powers_: Vec::new(),
24 }
25 }
26
27 pub fn interaction_only(mut self, io: bool) -> Self {
28 self.interaction_only = io;
29 self
30 }
31
32 pub fn include_bias(mut self, ib: bool) -> Self {
33 self.include_bias = ib;
34 self
35 }
36
37 fn generate_powers(&mut self, n_features: usize) {
38 self.powers_.clear();
39
40 fn generate_combinations(
42 n_features: usize,
43 degree: usize,
44 interaction_only: bool,
45 current: &mut Vec<usize>,
46 start: usize,
47 remaining_degree: usize,
48 result: &mut Vec<Vec<usize>>,
49 ) {
50 if remaining_degree == 0 {
51 result.push(current.clone());
52 return;
53 }
54
55 for i in start..n_features {
56 let max_power = if interaction_only { 1 } else { remaining_degree };
57 for p in 1..=max_power {
58 if current.iter().sum::<usize>() + p <= degree {
59 current[i] += p;
60 generate_combinations(
61 n_features,
62 degree,
63 interaction_only,
64 current,
65 if interaction_only { i + 1 } else { i },
66 remaining_degree - p,
67 result,
68 );
69 current[i] -= p;
70 }
71 }
72 }
73 }
74
75 if self.include_bias {
77 self.powers_.push(vec![0; n_features]);
78 }
79
80 for d in 1..=self.degree {
82 let mut current = vec![0usize; n_features];
83 generate_combinations(
84 n_features,
85 self.degree,
86 self.interaction_only,
87 &mut current,
88 0,
89 d,
90 &mut self.powers_,
91 );
92 }
93
94 self.n_input_features_ = n_features;
95 self.n_output_features_ = self.powers_.len();
96 }
97
98 pub fn fit(&mut self, x: &Tensor) {
99 let n_features = x.dims()[1];
100 self.generate_powers(n_features);
101 }
102
103 pub fn transform(&self, x: &Tensor) -> Tensor {
104 let x_data = x.data_f32();
105 let n_samples = x.dims()[0];
106 let n_features = x.dims()[1];
107
108 let mut result = vec![0.0f32; n_samples * self.n_output_features_];
109
110 for i in 0..n_samples {
111 let xi = &x_data[i * n_features..(i + 1) * n_features];
112
113 for (j, powers) in self.powers_.iter().enumerate() {
114 let mut val = 1.0f32;
115 for (k, &p) in powers.iter().enumerate() {
116 if p > 0 {
117 val *= xi[k].powi(p as i32);
118 }
119 }
120 result[i * self.n_output_features_ + j] = val;
121 }
122 }
123
124 Tensor::from_slice(&result, &[n_samples, self.n_output_features_]).unwrap()
125 }
126
127 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
128 self.fit(x);
129 self.transform(x)
130 }
131
132 pub fn get_feature_names(&self, input_features: Option<&[String]>) -> Vec<String> {
133 let default_names: Vec<String> = (0..self.n_input_features_)
134 .map(|i| format!("x{}", i))
135 .collect();
136 let names = input_features.unwrap_or(&default_names);
137
138 self.powers_.iter()
139 .map(|powers| {
140 let terms: Vec<String> = powers.iter()
141 .enumerate()
142 .filter(|(_, &p)| p > 0)
143 .map(|(i, &p)| {
144 if p == 1 {
145 names[i].clone()
146 } else {
147 format!("{}^{}", names[i], p)
148 }
149 })
150 .collect();
151
152 if terms.is_empty() {
153 "1".to_string()
154 } else {
155 terms.join(" ")
156 }
157 })
158 .collect()
159 }
160}
161
162pub struct SplineTransformer {
164 pub n_knots: usize,
165 pub degree: usize,
166 pub knots: KnotPositions,
167 pub include_bias: bool,
168 knots_: Option<Vec<Vec<f32>>>,
169 n_features_: usize,
170 n_splines_per_feature_: usize,
171}
172
173#[derive(Clone)]
174pub enum KnotPositions {
175 Uniform,
176 Quantile,
177}
178
179impl SplineTransformer {
180 pub fn new(n_knots: usize) -> Self {
181 SplineTransformer {
182 n_knots,
183 degree: 3,
184 knots: KnotPositions::Uniform,
185 include_bias: true,
186 knots_: None,
187 n_features_: 0,
188 n_splines_per_feature_: 0,
189 }
190 }
191
192 pub fn degree(mut self, d: usize) -> Self {
193 self.degree = d;
194 self
195 }
196
197 fn compute_knots(&self, x: &[f32], n_samples: usize) -> Vec<f32> {
198 let mut sorted: Vec<f32> = x.to_vec();
199 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
200
201 let x_min = sorted[0];
202 let x_max = sorted[n_samples - 1];
203
204 match self.knots {
205 KnotPositions::Uniform => {
206 let step = (x_max - x_min) / (self.n_knots - 1) as f32;
207 (0..self.n_knots).map(|i| x_min + i as f32 * step).collect()
208 }
209 KnotPositions::Quantile => {
210 (0..self.n_knots)
211 .map(|i| {
212 let q = i as f32 / (self.n_knots - 1) as f32;
213 let idx = ((n_samples - 1) as f32 * q) as usize;
214 sorted[idx]
215 })
216 .collect()
217 }
218 }
219 }
220
221 fn bspline_basis(&self, x: f32, knots: &[f32], i: usize, k: usize) -> f32 {
222 if k == 0 {
223 if i < knots.len() - 1 && x >= knots[i] && x < knots[i + 1] {
224 return 1.0;
225 }
226 if i == knots.len() - 2 && x == knots[i + 1] {
227 return 1.0;
228 }
229 return 0.0;
230 }
231
232 let mut result = 0.0f32;
233
234 if i + k < knots.len() {
235 let denom1 = knots[i + k] - knots[i];
236 if denom1.abs() > 1e-10 {
237 result += (x - knots[i]) / denom1 * self.bspline_basis(x, knots, i, k - 1);
238 }
239 }
240
241 if i + k + 1 < knots.len() {
242 let denom2 = knots[i + k + 1] - knots[i + 1];
243 if denom2.abs() > 1e-10 {
244 result += (knots[i + k + 1] - x) / denom2 * self.bspline_basis(x, knots, i + 1, k - 1);
245 }
246 }
247
248 result
249 }
250
251 pub fn fit(&mut self, x: &Tensor) {
252 let x_data = x.data_f32();
253 let n_samples = x.dims()[0];
254 let n_features = x.dims()[1];
255
256 self.n_features_ = n_features;
257 self.n_splines_per_feature_ = self.n_knots + self.degree - 1;
258
259 let mut all_knots = Vec::with_capacity(n_features);
261 for j in 0..n_features {
262 let feature_values: Vec<f32> = (0..n_samples)
263 .map(|i| x_data[i * n_features + j])
264 .collect();
265
266 let mut knots = self.compute_knots(&feature_values, n_samples);
267
268 let x_min = knots[0];
270 let x_max = knots[knots.len() - 1];
271 let step = (x_max - x_min) / (self.n_knots - 1) as f32;
272
273 for i in 0..self.degree {
274 knots.insert(0, x_min - (i + 1) as f32 * step);
275 knots.push(x_max + (i + 1) as f32 * step);
276 }
277
278 all_knots.push(knots);
279 }
280
281 self.knots_ = Some(all_knots);
282 }
283
284 pub fn transform(&self, x: &Tensor) -> Tensor {
285 let x_data = x.data_f32();
286 let n_samples = x.dims()[0];
287 let n_features = x.dims()[1];
288
289 let knots = self.knots_.as_ref().expect("Not fitted");
290
291 let n_output = if self.include_bias {
292 n_features * self.n_splines_per_feature_
293 } else {
294 n_features * (self.n_splines_per_feature_ - 1)
295 };
296
297 let mut result = vec![0.0f32; n_samples * n_output];
298
299 for i in 0..n_samples {
300 let mut col = 0;
301 for j in 0..n_features {
302 let x_val = x_data[i * n_features + j];
303 let feature_knots = &knots[j];
304
305 let start_basis = if self.include_bias { 0 } else { 1 };
306 for b in start_basis..self.n_splines_per_feature_ {
307 result[i * n_output + col] = self.bspline_basis(x_val, feature_knots, b, self.degree);
308 col += 1;
309 }
310 }
311 }
312
313 Tensor::from_slice(&result, &[n_samples, n_output]).unwrap()
314 }
315
316 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
317 self.fit(x);
318 self.transform(x)
319 }
320}
321
322pub struct PowerTransformer {
324 pub method: PowerMethod,
325 pub standardize: bool,
326 lambdas_: Option<Vec<f32>>,
327 mean_: Option<Vec<f32>>,
328 std_: Option<Vec<f32>>,
329}
330
331#[derive(Clone, Copy)]
332pub enum PowerMethod {
333 YeoJohnson,
334 BoxCox,
335}
336
337impl PowerTransformer {
338 pub fn new(method: PowerMethod) -> Self {
339 PowerTransformer {
340 method,
341 standardize: true,
342 lambdas_: None,
343 mean_: None,
344 std_: None,
345 }
346 }
347
348 fn yeo_johnson_transform(&self, x: f32, lambda: f32) -> f32 {
349 if x >= 0.0 {
350 if (lambda - 0.0).abs() < 1e-10 {
351 (x + 1.0).ln()
352 } else {
353 ((x + 1.0).powf(lambda) - 1.0) / lambda
354 }
355 } else {
356 if (lambda - 2.0).abs() < 1e-10 {
357 -((-x + 1.0).ln())
358 } else {
359 -((-x + 1.0).powf(2.0 - lambda) - 1.0) / (2.0 - lambda)
360 }
361 }
362 }
363
364 fn box_cox_transform(&self, x: f32, lambda: f32) -> f32 {
365 if x <= 0.0 {
366 return f32::NAN;
367 }
368 if (lambda - 0.0).abs() < 1e-10 {
369 x.ln()
370 } else {
371 (x.powf(lambda) - 1.0) / lambda
372 }
373 }
374
375 fn optimize_lambda(&self, x: &[f32]) -> f32 {
376 let mut best_lambda = 1.0f32;
378 let mut best_score = f32::NEG_INFINITY;
379
380 for lambda_int in -20..=20 {
381 let lambda = lambda_int as f32 * 0.1;
382
383 let transformed: Vec<f32> = x.iter()
384 .map(|&xi| match self.method {
385 PowerMethod::YeoJohnson => self.yeo_johnson_transform(xi, lambda),
386 PowerMethod::BoxCox => self.box_cox_transform(xi, lambda),
387 })
388 .collect();
389
390 if transformed.iter().any(|&t| t.is_nan() || t.is_infinite()) {
391 continue;
392 }
393
394 let mean: f32 = transformed.iter().sum::<f32>() / transformed.len() as f32;
396 let var: f32 = transformed.iter().map(|&t| (t - mean).powi(2)).sum::<f32>() / transformed.len() as f32;
397
398 let score = -(var - 1.0).abs();
400
401 if score > best_score {
402 best_score = score;
403 best_lambda = lambda;
404 }
405 }
406
407 best_lambda
408 }
409
410 pub fn fit(&mut self, x: &Tensor) {
411 let x_data = x.data_f32();
412 let n_samples = x.dims()[0];
413 let n_features = x.dims()[1];
414
415 let mut lambdas = Vec::with_capacity(n_features);
416
417 for j in 0..n_features {
418 let feature_values: Vec<f32> = (0..n_samples)
419 .map(|i| x_data[i * n_features + j])
420 .collect();
421
422 let lambda = self.optimize_lambda(&feature_values);
423 lambdas.push(lambda);
424 }
425
426 self.lambdas_ = Some(lambdas);
427
428 if self.standardize {
430 let transformed = self.transform_internal(x);
431 let t_data = transformed.data_f32();
432
433 let mean: Vec<f32> = (0..n_features)
434 .map(|j| (0..n_samples).map(|i| t_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
435 .collect();
436
437 let std: Vec<f32> = (0..n_features)
438 .map(|j| {
439 let m = mean[j];
440 ((0..n_samples).map(|i| (t_data[i * n_features + j] - m).powi(2)).sum::<f32>()
441 / n_samples as f32).sqrt().max(1e-10)
442 })
443 .collect();
444
445 self.mean_ = Some(mean);
446 self.std_ = Some(std);
447 }
448 }
449
450 fn transform_internal(&self, x: &Tensor) -> Tensor {
451 let x_data = x.data_f32();
452 let n_samples = x.dims()[0];
453 let n_features = x.dims()[1];
454 let lambdas = self.lambdas_.as_ref().expect("Not fitted");
455
456 let result: Vec<f32> = (0..n_samples)
457 .flat_map(|i| {
458 (0..n_features).map(|j| {
459 let xi = x_data[i * n_features + j];
460 match self.method {
461 PowerMethod::YeoJohnson => self.yeo_johnson_transform(xi, lambdas[j]),
462 PowerMethod::BoxCox => self.box_cox_transform(xi, lambdas[j]),
463 }
464 }).collect::<Vec<_>>()
465 })
466 .collect();
467
468 Tensor::from_slice(&result, &[n_samples, n_features]).unwrap()
469 }
470
471 pub fn transform(&self, x: &Tensor) -> Tensor {
472 let transformed = self.transform_internal(x);
473
474 if self.standardize {
475 let t_data = transformed.data_f32();
476 let n_samples = x.dims()[0];
477 let n_features = x.dims()[1];
478 let mean = self.mean_.as_ref().unwrap();
479 let std = self.std_.as_ref().unwrap();
480
481 let result: Vec<f32> = (0..n_samples)
482 .flat_map(|i| {
483 (0..n_features).map(|j| {
484 (t_data[i * n_features + j] - mean[j]) / std[j]
485 }).collect::<Vec<_>>()
486 })
487 .collect();
488
489 Tensor::from_slice(&result, &[n_samples, n_features]).unwrap()
490 } else {
491 transformed
492 }
493 }
494
495 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
496 self.fit(x);
497 self.transform(x)
498 }
499}
500
501pub struct QuantileTransformer {
503 pub n_quantiles: usize,
504 pub output_distribution: OutputDistribution,
505 quantiles_: Option<Vec<Vec<f32>>>,
506 references_: Option<Vec<f32>>,
507}
508
509#[derive(Clone, Copy)]
510pub enum OutputDistribution {
511 Uniform,
512 Normal,
513}
514
515impl QuantileTransformer {
516 pub fn new() -> Self {
517 QuantileTransformer {
518 n_quantiles: 1000,
519 output_distribution: OutputDistribution::Uniform,
520 quantiles_: None,
521 references_: None,
522 }
523 }
524
525 pub fn n_quantiles(mut self, n: usize) -> Self {
526 self.n_quantiles = n;
527 self
528 }
529
530 pub fn output_distribution(mut self, od: OutputDistribution) -> Self {
531 self.output_distribution = od;
532 self
533 }
534
535 fn normal_ppf(&self, p: f32) -> f32 {
536 if p <= 0.0 { return f32::NEG_INFINITY; }
538 if p >= 1.0 { return f32::INFINITY; }
539
540 let a = [
541 -3.969683028665376e+01,
542 2.209460984245205e+02,
543 -2.759285104469687e+02,
544 1.383577518672690e+02,
545 -3.066479806614716e+01,
546 2.506628277459239e+00,
547 ];
548 let b = [
549 -5.447609879822406e+01,
550 1.615858368580409e+02,
551 -1.556989798598866e+02,
552 6.680131188771972e+01,
553 -1.328068155288572e+01,
554 ];
555 let c = [
556 -7.784894002430293e-03,
557 -3.223964580411365e-01,
558 -2.400758277161838e+00,
559 -2.549732539343734e+00,
560 4.374664141464968e+00,
561 2.938163982698783e+00,
562 ];
563 let d = [
564 7.784695709041462e-03,
565 3.224671290700398e-01,
566 2.445134137142996e+00,
567 3.754408661907416e+00,
568 ];
569
570 let p_low = 0.02425;
571 let p_high = 1.0 - p_low;
572
573 if p < p_low {
574 let q = (-2.0 * p.ln()).sqrt();
575 (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
576 ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1.0)
577 } else if p <= p_high {
578 let q = p - 0.5;
579 let r = q * q;
580 (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q /
581 (((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1.0)
582 } else {
583 let q = (-2.0 * (1.0 - p).ln()).sqrt();
584 -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
585 ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1.0)
586 }
587 }
588
589 pub fn fit(&mut self, x: &Tensor) {
590 let x_data = x.data_f32();
591 let n_samples = x.dims()[0];
592 let n_features = x.dims()[1];
593
594 let n_quantiles = self.n_quantiles.min(n_samples);
595
596 let quantiles: Vec<Vec<f32>> = (0..n_features)
598 .map(|j| {
599 let mut values: Vec<f32> = (0..n_samples)
600 .map(|i| x_data[i * n_features + j])
601 .collect();
602 values.sort_by(|a, b| a.partial_cmp(b).unwrap());
603
604 (0..n_quantiles)
605 .map(|q| {
606 let idx = (q as f32 / (n_quantiles - 1) as f32 * (n_samples - 1) as f32) as usize;
607 values[idx]
608 })
609 .collect()
610 })
611 .collect();
612
613 let references: Vec<f32> = (0..n_quantiles)
615 .map(|q| {
616 let p = q as f32 / (n_quantiles - 1) as f32;
617 match self.output_distribution {
618 OutputDistribution::Uniform => p,
619 OutputDistribution::Normal => self.normal_ppf(p.clamp(0.001, 0.999)),
620 }
621 })
622 .collect();
623
624 self.quantiles_ = Some(quantiles);
625 self.references_ = Some(references);
626 }
627
628 pub fn transform(&self, x: &Tensor) -> Tensor {
629 let x_data = x.data_f32();
630 let n_samples = x.dims()[0];
631 let n_features = x.dims()[1];
632
633 let quantiles = self.quantiles_.as_ref().expect("Not fitted");
634 let references = self.references_.as_ref().unwrap();
635 let n_quantiles = references.len();
636
637 let result: Vec<f32> = (0..n_samples)
638 .flat_map(|i| {
639 (0..n_features).map(|j| {
640 let xi = x_data[i * n_features + j];
641 let q = &quantiles[j];
642
643 let pos = q.iter().position(|&qv| qv >= xi).unwrap_or(n_quantiles - 1);
645
646 if pos == 0 {
647 references[0]
648 } else if pos >= n_quantiles {
649 references[n_quantiles - 1]
650 } else {
651 let lower = q[pos - 1];
653 let upper = q[pos];
654 let t = if (upper - lower).abs() > 1e-10 {
655 (xi - lower) / (upper - lower)
656 } else {
657 0.5
658 };
659 references[pos - 1] + t * (references[pos] - references[pos - 1])
660 }
661 }).collect::<Vec<_>>()
662 })
663 .collect();
664
665 Tensor::from_slice(&result, &[n_samples, n_features]).unwrap()
666 }
667
668 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
669 self.fit(x);
670 self.transform(x)
671 }
672}
673
674impl Default for QuantileTransformer {
675 fn default() -> Self { Self::new() }
676}
677
678#[cfg(test)]
679mod tests {
680 use super::*;
681
682 #[test]
683 fn test_polynomial_features() {
684 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0], &[2, 2]).unwrap();
685 let mut poly = PolynomialFeatures::new(2);
686 let result = poly.fit_transform(&x);
687 assert!(result.dims()[1] > 2);
688 }
689
690 #[test]
691 fn test_power_transformer() {
692 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0], &[3, 2]).unwrap();
693 let mut pt = PowerTransformer::new(PowerMethod::YeoJohnson);
694 let result = pt.fit_transform(&x);
695 assert_eq!(result.dims(), &[3, 2]);
696 }
697
698 #[test]
699 fn test_quantile_transformer() {
700 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0], &[3, 2]).unwrap();
701 let mut qt = QuantileTransformer::new();
702 let result = qt.fit_transform(&x);
703 assert_eq!(result.dims(), &[3, 2]);
704 }
705}
706
707