sklears_kernel_approximation/
feature_generation.rs1use scirs2_core::ndarray::{Array1, Array2};
7use scirs2_core::random::seeded_rng;
8use sklears_core::error::SklearsError;
9
10pub trait FeatureGenerator: Send + Sync {
12 fn generate(&self, data: &Array2<f64>) -> Result<Array2<f64>, SklearsError>;
14
15 fn output_dim(&self) -> usize;
17
18 fn name(&self) -> &str;
20
21 fn is_stateful(&self) -> bool {
23 false
24 }
25
26 fn fit_generator(&mut self, _data: &Array2<f64>) -> Result<(), SklearsError> {
28 Ok(())
29 }
30}
31
32#[derive(Debug, Clone)]
34pub struct RandomFourierGenerator {
35 pub n_components: usize,
37 pub gamma: f64,
39 weights: Option<Array2<f64>>,
41 offset: Option<Array1<f64>>,
43 pub random_state: Option<u64>,
45}
46
47impl RandomFourierGenerator {
48 pub fn new(n_components: usize, gamma: f64, random_state: Option<u64>) -> Self {
50 Self {
51 n_components,
52 gamma,
53 weights: None,
54 offset: None,
55 random_state,
56 }
57 }
58
59 fn initialize(&mut self, n_features: usize) -> Result<(), SklearsError> {
61 use scirs2_core::random::StandardNormal;
62
63 let mut rng = seeded_rng(self.random_state.unwrap_or(42));
64
65 let mut weights = Array2::zeros((n_features, self.n_components));
67 for elem in weights.iter_mut() {
68 *elem = rng.sample::<f64, _>(StandardNormal) * self.gamma.sqrt();
69 }
70
71 let mut offset = Array1::zeros(self.n_components);
73 for elem in offset.iter_mut() {
74 *elem = rng.gen_range(0.0..(2.0 * std::f64::consts::PI));
75 }
76
77 self.weights = Some(weights);
78 self.offset = Some(offset);
79
80 Ok(())
81 }
82}
83
84impl FeatureGenerator for RandomFourierGenerator {
85 fn generate(&self, data: &Array2<f64>) -> Result<Array2<f64>, SklearsError> {
86 let weights = self
87 .weights
88 .as_ref()
89 .ok_or_else(|| SklearsError::NotFitted {
90 operation: "RandomFourierGenerator must be fitted before generating features"
91 .to_string(),
92 })?;
93
94 let offset = self.offset.as_ref().unwrap();
95
96 let projection = data.dot(weights);
98
99 let scale = (2.0 / self.n_components as f64).sqrt();
101 let features = projection.mapv(|x| x + offset[0]).mapv(|x| scale * x.cos());
102
103 Ok(features)
104 }
105
106 fn output_dim(&self) -> usize {
107 self.n_components
108 }
109
110 fn name(&self) -> &str {
111 "RandomFourierFeatures"
112 }
113
114 fn is_stateful(&self) -> bool {
115 true
116 }
117
118 fn fit_generator(&mut self, data: &Array2<f64>) -> Result<(), SklearsError> {
119 let (_, n_features) = data.dim();
120 self.initialize(n_features)
121 }
122}
123
124#[derive(Debug, Clone)]
126pub struct PolynomialGenerator {
127 pub degree: usize,
129 pub include_bias: bool,
131 pub interaction_only: bool,
133}
134
135impl PolynomialGenerator {
136 pub fn new(degree: usize, include_bias: bool, interaction_only: bool) -> Self {
138 Self {
139 degree,
140 include_bias,
141 interaction_only,
142 }
143 }
144
145 fn calculate_n_output_features(&self, n_input_features: usize) -> usize {
147 if self.interaction_only {
148 let mut count = if self.include_bias { 1 } else { 0 };
150 count += n_input_features; for d in 2..=self.degree {
153 let mut comb = 1;
155 for i in 0..d {
156 comb = comb * (n_input_features + d - 1 - i) / (i + 1);
157 }
158 count += comb;
159 }
160 count
161 } else {
162 let mut count = if self.include_bias { 1 } else { 0 };
164 for d in 1..=self.degree {
165 let mut monomials = 1;
167 for i in 0..d {
168 monomials = monomials * (n_input_features + d - 1 - i) / (i + 1);
169 }
170 count += monomials;
171 }
172 count
173 }
174 }
175}
176
177impl FeatureGenerator for PolynomialGenerator {
178 fn generate(&self, data: &Array2<f64>) -> Result<Array2<f64>, SklearsError> {
179 let (n_samples, n_features) = data.dim();
180 let n_output = self.calculate_n_output_features(n_features);
181
182 let mut features = Array2::zeros((n_samples, n_output));
183 let mut col_idx = 0;
184
185 if self.include_bias {
187 for i in 0..n_samples {
188 features[[i, col_idx]] = 1.0;
189 }
190 col_idx += 1;
191 }
192
193 for j in 0..n_features {
195 for i in 0..n_samples {
196 features[[i, col_idx]] = data[[i, j]];
197 }
198 col_idx += 1;
199 }
200
201 if self.degree > 1 {
203 for d in 2..=self.degree {
205 if col_idx >= n_output {
206 break;
207 }
208 if self.interaction_only {
209 self.generate_interactions(data, &mut features, &mut col_idx, d, n_output);
211 } else {
212 self.generate_all_terms(data, &mut features, &mut col_idx, d, n_output);
214 }
215 }
216 }
217
218 Ok(features)
219 }
220
221 fn output_dim(&self) -> usize {
222 0
225 }
226
227 fn name(&self) -> &str {
228 "PolynomialFeatures"
229 }
230}
231
232impl PolynomialGenerator {
233 fn generate_interactions(
234 &self,
235 data: &Array2<f64>,
236 features: &mut Array2<f64>,
237 col_idx: &mut usize,
238 degree: usize,
239 max_cols: usize,
240 ) {
241 let (n_samples, n_features) = data.dim();
242 let mut indices = vec![0; degree];
243
244 loop {
245 if *col_idx >= max_cols {
246 return;
247 }
248
249 let mut is_valid = true;
251 for i in 0..degree - 1 {
252 if indices[i] == indices[i + 1] {
253 is_valid = false;
254 break;
255 }
256 }
257
258 if is_valid {
259 for sample in 0..n_samples {
261 let mut product = 1.0;
262 for &idx in &indices {
263 product *= data[[sample, idx]];
264 }
265 features[[sample, *col_idx]] = product;
266 }
267 *col_idx += 1;
268 }
269
270 let mut pos = degree - 1;
272 loop {
273 indices[pos] += 1;
274 if indices[pos] < n_features {
275 break;
276 }
277 if pos == 0 {
278 return;
279 }
280 indices[pos] = indices[pos - 1];
281 pos -= 1;
282 }
283 for i in pos + 1..degree {
284 indices[i] = indices[pos];
285 }
286 }
287 }
288
289 fn generate_all_terms(
290 &self,
291 data: &Array2<f64>,
292 features: &mut Array2<f64>,
293 col_idx: &mut usize,
294 degree: usize,
295 max_cols: usize,
296 ) {
297 let (n_samples, n_features) = data.dim();
298 let mut indices = vec![0; degree];
299
300 loop {
301 if *col_idx >= max_cols {
302 return;
303 }
304
305 for sample in 0..n_samples {
307 let mut product = 1.0;
308 for &idx in &indices {
309 product *= data[[sample, idx]];
310 }
311 features[[sample, *col_idx]] = product;
312 }
313 *col_idx += 1;
314
315 let mut pos = degree - 1;
317 loop {
318 indices[pos] += 1;
319 if indices[pos] < n_features {
320 break;
321 }
322 if pos == 0 {
323 return;
324 }
325 indices[pos] = 0;
326 pos -= 1;
327 }
328 }
329 }
330}
331
332pub struct CompositeGenerator {
334 generators: Vec<Box<dyn FeatureGenerator>>,
335}
336
337impl std::fmt::Debug for CompositeGenerator {
338 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
339 f.debug_struct("CompositeGenerator")
340 .field("n_generators", &self.generators.len())
341 .finish()
342 }
343}
344
345impl CompositeGenerator {
346 pub fn new() -> Self {
348 Self {
349 generators: Vec::new(),
350 }
351 }
352
353 pub fn add_generator(&mut self, generator: Box<dyn FeatureGenerator>) {
355 self.generators.push(generator);
356 }
357
358 pub fn len(&self) -> usize {
360 self.generators.len()
361 }
362
363 pub fn is_empty(&self) -> bool {
365 self.generators.is_empty()
366 }
367}
368
369impl Default for CompositeGenerator {
370 fn default() -> Self {
371 Self::new()
372 }
373}
374
375impl FeatureGenerator for CompositeGenerator {
376 fn generate(&self, data: &Array2<f64>) -> Result<Array2<f64>, SklearsError> {
377 if self.generators.is_empty() {
378 return Ok(data.clone());
379 }
380
381 let mut all_features = Vec::new();
382
383 for generator in &self.generators {
384 let features = generator.generate(data)?;
385 all_features.push(features);
386 }
387
388 let (n_samples, _) = data.dim();
390 let total_features: usize = all_features.iter().map(|f| f.ncols()).sum();
391
392 let mut result = Array2::zeros((n_samples, total_features));
393 let mut col_offset = 0;
394
395 for feature_matrix in all_features {
396 let n_cols = feature_matrix.ncols();
397 for i in 0..n_samples {
398 for j in 0..n_cols {
399 result[[i, col_offset + j]] = feature_matrix[[i, j]];
400 }
401 }
402 col_offset += n_cols;
403 }
404
405 Ok(result)
406 }
407
408 fn output_dim(&self) -> usize {
409 self.generators.iter().map(|g| g.output_dim()).sum()
410 }
411
412 fn name(&self) -> &str {
413 "CompositeFeatureGenerator"
414 }
415
416 fn is_stateful(&self) -> bool {
417 self.generators.iter().any(|g| g.is_stateful())
418 }
419
420 fn fit_generator(&mut self, data: &Array2<f64>) -> Result<(), SklearsError> {
421 for generator in &mut self.generators {
422 if generator.is_stateful() {
423 generator.fit_generator(data)?;
424 }
425 }
426 Ok(())
427 }
428}
429
430pub struct FeatureGeneratorBuilder {
432 composite: CompositeGenerator,
433}
434
435impl FeatureGeneratorBuilder {
436 pub fn new() -> Self {
438 Self {
439 composite: CompositeGenerator::new(),
440 }
441 }
442
443 pub fn with_random_fourier(
445 mut self,
446 n_components: usize,
447 gamma: f64,
448 random_state: Option<u64>,
449 ) -> Self {
450 self.composite
451 .add_generator(Box::new(RandomFourierGenerator::new(
452 n_components,
453 gamma,
454 random_state,
455 )));
456 self
457 }
458
459 pub fn with_polynomial(mut self, degree: usize, include_bias: bool) -> Self {
461 self.composite
462 .add_generator(Box::new(PolynomialGenerator::new(
463 degree,
464 include_bias,
465 false,
466 )));
467 self
468 }
469
470 pub fn with_custom(mut self, generator: Box<dyn FeatureGenerator>) -> Self {
472 self.composite.add_generator(generator);
473 self
474 }
475
476 pub fn build(self) -> CompositeGenerator {
478 self.composite
479 }
480}
481
482impl Default for FeatureGeneratorBuilder {
483 fn default() -> Self {
484 Self::new()
485 }
486}
487
488#[cfg(test)]
489mod tests {
490 use super::*;
491 use scirs2_core::ndarray::array;
492
493 #[test]
494 fn test_random_fourier_generator() {
495 let mut generator = RandomFourierGenerator::new(50, 1.0, Some(42));
496 let data = array![[1.0, 2.0], [3.0, 4.0]];
497
498 generator.fit_generator(&data).unwrap();
499 let features = generator.generate(&data).unwrap();
500
501 assert_eq!(features.shape(), &[2, 50]);
502 assert_eq!(generator.output_dim(), 50);
503 assert!(generator.is_stateful());
504 }
505
506 #[test]
507 fn test_polynomial_generator() {
508 let generator = PolynomialGenerator::new(2, true, false);
509 let data = array![[1.0, 2.0], [3.0, 4.0]];
510
511 let features = generator.generate(&data).unwrap();
512 assert!(features.ncols() >= 3); }
514
515 #[test]
516 fn test_composite_generator() {
517 let mut composite = CompositeGenerator::new();
518 assert!(composite.is_empty());
519
520 composite.add_generator(Box::new(RandomFourierGenerator::new(10, 1.0, Some(42))));
521 assert_eq!(composite.len(), 1);
522 assert!(!composite.is_empty());
523 }
524
525 #[test]
526 fn test_feature_generator_builder() {
527 let generator = FeatureGeneratorBuilder::new()
528 .with_random_fourier(50, 1.0, Some(42))
529 .with_polynomial(2, true)
530 .build();
531
532 assert_eq!(generator.len(), 2);
533 }
534
535 #[test]
536 fn test_polynomial_interaction_only() {
537 let generator = PolynomialGenerator::new(2, false, true);
538 let data = array![[1.0, 2.0, 3.0]];
539
540 let features = generator.generate(&data).unwrap();
541 assert!(features.ncols() >= 3);
543 }
544}