sklears_kernel_approximation/
feature_generation.rs

1//! Extensible feature generation framework
2//!
3//! This module provides a flexible system for creating and composing feature
4//! generation methods, making it easy to extend with new techniques.
5
6use scirs2_core::ndarray::{Array1, Array2};
7use scirs2_core::random::seeded_rng;
8use sklears_core::error::SklearsError;
9
10/// Feature generator trait
11pub trait FeatureGenerator: Send + Sync {
12    /// Generate features from input data
13    fn generate(&self, data: &Array2<f64>) -> Result<Array2<f64>, SklearsError>;
14
15    /// Get the output dimension
16    fn output_dim(&self) -> usize;
17
18    /// Get generator name
19    fn name(&self) -> &str;
20
21    /// Check if generator is stateful (needs fitting)
22    fn is_stateful(&self) -> bool {
23        false
24    }
25
26    /// Fit the generator if stateful
27    fn fit_generator(&mut self, _data: &Array2<f64>) -> Result<(), SklearsError> {
28        Ok(())
29    }
30}
31
32/// Random Fourier feature generator
33#[derive(Debug, Clone)]
34pub struct RandomFourierGenerator {
35    /// Number of components
36    pub n_components: usize,
37    /// Gamma parameter
38    pub gamma: f64,
39    /// Random weights (fitted)
40    weights: Option<Array2<f64>>,
41    /// Random offset (fitted)
42    offset: Option<Array1<f64>>,
43    /// Random seed
44    pub random_state: Option<u64>,
45}
46
47impl RandomFourierGenerator {
48    /// Create a new Random Fourier generator
49    pub fn new(n_components: usize, gamma: f64, random_state: Option<u64>) -> Self {
50        Self {
51            n_components,
52            gamma,
53            weights: None,
54            offset: None,
55            random_state,
56        }
57    }
58
59    /// Initialize weights and offsets
60    fn initialize(&mut self, n_features: usize) -> Result<(), SklearsError> {
61        use scirs2_core::random::StandardNormal;
62
63        let mut rng = seeded_rng(self.random_state.unwrap_or(42));
64
65        // Sample weights from N(0, gamma)
66        let mut weights = Array2::zeros((n_features, self.n_components));
67        for elem in weights.iter_mut() {
68            *elem = rng.sample::<f64, _>(StandardNormal) * self.gamma.sqrt();
69        }
70
71        // Sample offset from Uniform(0, 2π)
72        let mut offset = Array1::zeros(self.n_components);
73        for elem in offset.iter_mut() {
74            *elem = rng.gen_range(0.0..(2.0 * std::f64::consts::PI));
75        }
76
77        self.weights = Some(weights);
78        self.offset = Some(offset);
79
80        Ok(())
81    }
82}
83
84impl FeatureGenerator for RandomFourierGenerator {
85    fn generate(&self, data: &Array2<f64>) -> Result<Array2<f64>, SklearsError> {
86        let weights = self
87            .weights
88            .as_ref()
89            .ok_or_else(|| SklearsError::NotFitted {
90                operation: "RandomFourierGenerator must be fitted before generating features"
91                    .to_string(),
92            })?;
93
94        let offset = self.offset.as_ref().unwrap();
95
96        // Compute X @ W
97        let projection = data.dot(weights);
98
99        // Apply cos(X @ W + b)
100        let scale = (2.0 / self.n_components as f64).sqrt();
101        let features = projection.mapv(|x| x + offset[0]).mapv(|x| scale * x.cos());
102
103        Ok(features)
104    }
105
106    fn output_dim(&self) -> usize {
107        self.n_components
108    }
109
110    fn name(&self) -> &str {
111        "RandomFourierFeatures"
112    }
113
114    fn is_stateful(&self) -> bool {
115        true
116    }
117
118    fn fit_generator(&mut self, data: &Array2<f64>) -> Result<(), SklearsError> {
119        let (_, n_features) = data.dim();
120        self.initialize(n_features)
121    }
122}
123
124/// Polynomial feature generator
125#[derive(Debug, Clone)]
126pub struct PolynomialGenerator {
127    /// Polynomial degree
128    pub degree: usize,
129    /// Include bias term
130    pub include_bias: bool,
131    /// Interaction only (no powers)
132    pub interaction_only: bool,
133}
134
135impl PolynomialGenerator {
136    /// Create a new polynomial generator
137    pub fn new(degree: usize, include_bias: bool, interaction_only: bool) -> Self {
138        Self {
139            degree,
140            include_bias,
141            interaction_only,
142        }
143    }
144
145    /// Calculate number of output features
146    fn calculate_n_output_features(&self, n_input_features: usize) -> usize {
147        if self.interaction_only {
148            // Combinations with repetition
149            let mut count = if self.include_bias { 1 } else { 0 };
150            count += n_input_features; // degree 1
151
152            for d in 2..=self.degree {
153                // C(n + d - 1, d)
154                let mut comb = 1;
155                for i in 0..d {
156                    comb = comb * (n_input_features + d - 1 - i) / (i + 1);
157                }
158                count += comb;
159            }
160            count
161        } else {
162            // All monomials up to degree
163            let mut count = if self.include_bias { 1 } else { 0 };
164            for d in 1..=self.degree {
165                // Number of monomials of degree d in n variables
166                let mut monomials = 1;
167                for i in 0..d {
168                    monomials = monomials * (n_input_features + d - 1 - i) / (i + 1);
169                }
170                count += monomials;
171            }
172            count
173        }
174    }
175}
176
177impl FeatureGenerator for PolynomialGenerator {
178    fn generate(&self, data: &Array2<f64>) -> Result<Array2<f64>, SklearsError> {
179        let (n_samples, n_features) = data.dim();
180        let n_output = self.calculate_n_output_features(n_features);
181
182        let mut features = Array2::zeros((n_samples, n_output));
183        let mut col_idx = 0;
184
185        // Bias term
186        if self.include_bias {
187            for i in 0..n_samples {
188                features[[i, col_idx]] = 1.0;
189            }
190            col_idx += 1;
191        }
192
193        // Degree 1 (original features)
194        for j in 0..n_features {
195            for i in 0..n_samples {
196                features[[i, col_idx]] = data[[i, j]];
197            }
198            col_idx += 1;
199        }
200
201        // Higher degrees
202        if self.degree > 1 {
203            // Generate all combinations
204            for d in 2..=self.degree {
205                if col_idx >= n_output {
206                    break;
207                }
208                if self.interaction_only {
209                    // Only interactions, no powers
210                    self.generate_interactions(data, &mut features, &mut col_idx, d, n_output);
211                } else {
212                    // All terms including powers
213                    self.generate_all_terms(data, &mut features, &mut col_idx, d, n_output);
214                }
215            }
216        }
217
218        Ok(features)
219    }
220
221    fn output_dim(&self) -> usize {
222        // We need to know input dimension to compute this
223        // Return 0 as placeholder
224        0
225    }
226
227    fn name(&self) -> &str {
228        "PolynomialFeatures"
229    }
230}
231
232impl PolynomialGenerator {
233    fn generate_interactions(
234        &self,
235        data: &Array2<f64>,
236        features: &mut Array2<f64>,
237        col_idx: &mut usize,
238        degree: usize,
239        max_cols: usize,
240    ) {
241        let (n_samples, n_features) = data.dim();
242        let mut indices = vec![0; degree];
243
244        loop {
245            if *col_idx >= max_cols {
246                return;
247            }
248
249            // Check if this is a valid interaction (all different)
250            let mut is_valid = true;
251            for i in 0..degree - 1 {
252                if indices[i] == indices[i + 1] {
253                    is_valid = false;
254                    break;
255                }
256            }
257
258            if is_valid {
259                // Compute product
260                for sample in 0..n_samples {
261                    let mut product = 1.0;
262                    for &idx in &indices {
263                        product *= data[[sample, idx]];
264                    }
265                    features[[sample, *col_idx]] = product;
266                }
267                *col_idx += 1;
268            }
269
270            // Next combination
271            let mut pos = degree - 1;
272            loop {
273                indices[pos] += 1;
274                if indices[pos] < n_features {
275                    break;
276                }
277                if pos == 0 {
278                    return;
279                }
280                indices[pos] = indices[pos - 1];
281                pos -= 1;
282            }
283            for i in pos + 1..degree {
284                indices[i] = indices[pos];
285            }
286        }
287    }
288
289    fn generate_all_terms(
290        &self,
291        data: &Array2<f64>,
292        features: &mut Array2<f64>,
293        col_idx: &mut usize,
294        degree: usize,
295        max_cols: usize,
296    ) {
297        let (n_samples, n_features) = data.dim();
298        let mut indices = vec![0; degree];
299
300        loop {
301            if *col_idx >= max_cols {
302                return;
303            }
304
305            // Compute product
306            for sample in 0..n_samples {
307                let mut product = 1.0;
308                for &idx in &indices {
309                    product *= data[[sample, idx]];
310                }
311                features[[sample, *col_idx]] = product;
312            }
313            *col_idx += 1;
314
315            // Next combination with repetition
316            let mut pos = degree - 1;
317            loop {
318                indices[pos] += 1;
319                if indices[pos] < n_features {
320                    break;
321                }
322                if pos == 0 {
323                    return;
324                }
325                indices[pos] = 0;
326                pos -= 1;
327            }
328        }
329    }
330}
331
332/// Composable feature generator
333pub struct CompositeGenerator {
334    generators: Vec<Box<dyn FeatureGenerator>>,
335}
336
337impl std::fmt::Debug for CompositeGenerator {
338    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
339        f.debug_struct("CompositeGenerator")
340            .field("n_generators", &self.generators.len())
341            .finish()
342    }
343}
344
345impl CompositeGenerator {
346    /// Create a new composite generator
347    pub fn new() -> Self {
348        Self {
349            generators: Vec::new(),
350        }
351    }
352
353    /// Add a generator to the composition
354    pub fn add_generator(&mut self, generator: Box<dyn FeatureGenerator>) {
355        self.generators.push(generator);
356    }
357
358    /// Get number of generators
359    pub fn len(&self) -> usize {
360        self.generators.len()
361    }
362
363    /// Check if empty
364    pub fn is_empty(&self) -> bool {
365        self.generators.is_empty()
366    }
367}
368
369impl Default for CompositeGenerator {
370    fn default() -> Self {
371        Self::new()
372    }
373}
374
375impl FeatureGenerator for CompositeGenerator {
376    fn generate(&self, data: &Array2<f64>) -> Result<Array2<f64>, SklearsError> {
377        if self.generators.is_empty() {
378            return Ok(data.clone());
379        }
380
381        let mut all_features = Vec::new();
382
383        for generator in &self.generators {
384            let features = generator.generate(data)?;
385            all_features.push(features);
386        }
387
388        // Concatenate all features
389        let (n_samples, _) = data.dim();
390        let total_features: usize = all_features.iter().map(|f| f.ncols()).sum();
391
392        let mut result = Array2::zeros((n_samples, total_features));
393        let mut col_offset = 0;
394
395        for feature_matrix in all_features {
396            let n_cols = feature_matrix.ncols();
397            for i in 0..n_samples {
398                for j in 0..n_cols {
399                    result[[i, col_offset + j]] = feature_matrix[[i, j]];
400                }
401            }
402            col_offset += n_cols;
403        }
404
405        Ok(result)
406    }
407
408    fn output_dim(&self) -> usize {
409        self.generators.iter().map(|g| g.output_dim()).sum()
410    }
411
412    fn name(&self) -> &str {
413        "CompositeFeatureGenerator"
414    }
415
416    fn is_stateful(&self) -> bool {
417        self.generators.iter().any(|g| g.is_stateful())
418    }
419
420    fn fit_generator(&mut self, data: &Array2<f64>) -> Result<(), SklearsError> {
421        for generator in &mut self.generators {
422            if generator.is_stateful() {
423                generator.fit_generator(data)?;
424            }
425        }
426        Ok(())
427    }
428}
429
430/// Feature generator builder for fluent API
431pub struct FeatureGeneratorBuilder {
432    composite: CompositeGenerator,
433}
434
435impl FeatureGeneratorBuilder {
436    /// Create a new builder
437    pub fn new() -> Self {
438        Self {
439            composite: CompositeGenerator::new(),
440        }
441    }
442
443    /// Add Random Fourier features
444    pub fn with_random_fourier(
445        mut self,
446        n_components: usize,
447        gamma: f64,
448        random_state: Option<u64>,
449    ) -> Self {
450        self.composite
451            .add_generator(Box::new(RandomFourierGenerator::new(
452                n_components,
453                gamma,
454                random_state,
455            )));
456        self
457    }
458
459    /// Add polynomial features
460    pub fn with_polynomial(mut self, degree: usize, include_bias: bool) -> Self {
461        self.composite
462            .add_generator(Box::new(PolynomialGenerator::new(
463                degree,
464                include_bias,
465                false,
466            )));
467        self
468    }
469
470    /// Add a custom generator
471    pub fn with_custom(mut self, generator: Box<dyn FeatureGenerator>) -> Self {
472        self.composite.add_generator(generator);
473        self
474    }
475
476    /// Build the composite generator
477    pub fn build(self) -> CompositeGenerator {
478        self.composite
479    }
480}
481
482impl Default for FeatureGeneratorBuilder {
483    fn default() -> Self {
484        Self::new()
485    }
486}
487
488#[cfg(test)]
489mod tests {
490    use super::*;
491    use scirs2_core::ndarray::array;
492
493    #[test]
494    fn test_random_fourier_generator() {
495        let mut generator = RandomFourierGenerator::new(50, 1.0, Some(42));
496        let data = array![[1.0, 2.0], [3.0, 4.0]];
497
498        generator.fit_generator(&data).unwrap();
499        let features = generator.generate(&data).unwrap();
500
501        assert_eq!(features.shape(), &[2, 50]);
502        assert_eq!(generator.output_dim(), 50);
503        assert!(generator.is_stateful());
504    }
505
506    #[test]
507    fn test_polynomial_generator() {
508        let generator = PolynomialGenerator::new(2, true, false);
509        let data = array![[1.0, 2.0], [3.0, 4.0]];
510
511        let features = generator.generate(&data).unwrap();
512        assert!(features.ncols() >= 3); // bias + 2 features + interactions
513    }
514
515    #[test]
516    fn test_composite_generator() {
517        let mut composite = CompositeGenerator::new();
518        assert!(composite.is_empty());
519
520        composite.add_generator(Box::new(RandomFourierGenerator::new(10, 1.0, Some(42))));
521        assert_eq!(composite.len(), 1);
522        assert!(!composite.is_empty());
523    }
524
525    #[test]
526    fn test_feature_generator_builder() {
527        let generator = FeatureGeneratorBuilder::new()
528            .with_random_fourier(50, 1.0, Some(42))
529            .with_polynomial(2, true)
530            .build();
531
532        assert_eq!(generator.len(), 2);
533    }
534
535    #[test]
536    fn test_polynomial_interaction_only() {
537        let generator = PolynomialGenerator::new(2, false, true);
538        let data = array![[1.0, 2.0, 3.0]];
539
540        let features = generator.generate(&data).unwrap();
541        // Should have original 3 features + interactions
542        assert!(features.ncols() >= 3);
543    }
544}