sklears_compose/ensemble/
voting.rs

1//! Voting ensemble implementations
2//!
3//! This module provides `VotingClassifier` and `VotingRegressor` implementations
4//! for combining multiple estimators through majority voting or averaging.
5
6use super::common::simd_fallback;
7use crate::PipelinePredictor;
8use scirs2_core::ndarray::{Array1, ArrayView1, ArrayView2};
9use sklears_core::{
10    error::Result as SklResult,
11    prelude::SklearsError,
12    traits::{Estimator, Fit, Untrained},
13    types::Float,
14};
15
16/// `VotingClassifier`
17///
18/// Ensemble classifier that combines multiple classifiers using majority voting
19/// or probability averaging.
20///
21/// # Type Parameters
22///
23/// * `S` - State type (Untrained or `VotingClassifierTrained`)
24///
25/// # Examples
26///
27/// ```ignore
28/// use sklears_compose::{VotingClassifier, MockPredictor};
29/// use scirs2_core::ndarray::array;
30///
31/// let voting_clf = VotingClassifier::builder()
32///     .estimator("clf1", Box::new(MockPredictor::new()))
33///     .estimator("clf2", Box::new(MockPredictor::new()))
34///     .voting("hard")
35///     .build();
36/// ```
37pub struct VotingClassifier<S = Untrained> {
38    state: S,
39    estimators: Vec<(String, Box<dyn PipelinePredictor>)>,
40    voting: String, // "hard" or "soft"
41    weights: Option<Vec<f64>>,
42    n_jobs: Option<i32>,
43    flatten_transform: bool,
44}
45
46/// `VotingRegressor`
47///
48/// Ensemble regressor that combines multiple regressors by averaging their predictions.
49///
50/// # Type Parameters
51///
52/// * `S` - State type (Untrained or `VotingRegressorTrained`)
53///
54/// # Examples
55///
56/// ```ignore
57/// use sklears_compose::{VotingRegressor, MockPredictor};
58/// use scirs2_core::ndarray::array;
59///
60/// let voting_reg = VotingRegressor::builder()
61///     .estimator("reg1", Box::new(MockPredictor::new()))
62///     .estimator("reg2", Box::new(MockPredictor::new()))
63///     .build();
64/// ```
65pub struct VotingRegressor<S = Untrained> {
66    state: S,
67    estimators: Vec<(String, Box<dyn PipelinePredictor>)>,
68    weights: Option<Vec<f64>>,
69    n_jobs: Option<i32>,
70}
71
72/// Trained state for `VotingClassifier`
73pub struct VotingClassifierTrained {
74    fitted_estimators: Vec<(String, Box<dyn PipelinePredictor>)>,
75    classes: Array1<f64>,
76    n_features_in: usize,
77    feature_names_in: Option<Vec<String>>,
78}
79
80/// Trained state for `VotingRegressor`
81pub struct VotingRegressorTrained {
82    fitted_estimators: Vec<(String, Box<dyn PipelinePredictor>)>,
83    n_features_in: usize,
84    feature_names_in: Option<Vec<String>>,
85}
86
87impl VotingClassifier<Untrained> {
88    /// Create a new `VotingClassifier` instance
89    #[must_use]
90    pub fn new() -> Self {
91        Self {
92            state: Untrained,
93            estimators: Vec::new(),
94            voting: "hard".to_string(),
95            weights: None,
96            n_jobs: None,
97            flatten_transform: true,
98        }
99    }
100
101    /// Create a voting classifier builder
102    #[must_use]
103    pub fn builder() -> VotingClassifierBuilder {
104        VotingClassifierBuilder::new()
105    }
106
107    /// Add an estimator
108    pub fn add_estimator(&mut self, name: String, estimator: Box<dyn PipelinePredictor>) {
109        self.estimators.push((name, estimator));
110    }
111
112    /// Set voting strategy
113    #[must_use]
114    pub fn voting(mut self, voting: &str) -> Self {
115        self.voting = voting.to_string();
116        self
117    }
118
119    /// Set weights
120    #[must_use]
121    pub fn weights(mut self, weights: Vec<f64>) -> Self {
122        self.weights = Some(weights);
123        self
124    }
125
126    /// Set number of jobs
127    #[must_use]
128    pub fn n_jobs(mut self, n_jobs: Option<i32>) -> Self {
129        self.n_jobs = n_jobs;
130        self
131    }
132
133    /// Set flatten transform
134    #[must_use]
135    pub fn flatten_transform(mut self, flatten: bool) -> Self {
136        self.flatten_transform = flatten;
137        self
138    }
139}
140
141impl Default for VotingClassifier<Untrained> {
142    fn default() -> Self {
143        Self::new()
144    }
145}
146
147impl Estimator for VotingClassifier<Untrained> {
148    type Config = ();
149    type Error = SklearsError;
150    type Float = Float;
151
152    fn config(&self) -> &Self::Config {
153        &()
154    }
155}
156
157impl Fit<ArrayView2<'_, Float>, Option<&ArrayView1<'_, Float>>> for VotingClassifier<Untrained> {
158    type Fitted = VotingClassifier<VotingClassifierTrained>;
159
160    fn fit(
161        self,
162        x: &ArrayView2<'_, Float>,
163        y: &Option<&ArrayView1<'_, Float>>,
164    ) -> SklResult<Self::Fitted> {
165        if let Some(y_values) = y.as_ref() {
166            let mut fitted_estimators = Vec::new();
167
168            for (name, mut estimator) in self.estimators {
169                estimator.fit(x, y_values)?;
170                fitted_estimators.push((name, estimator));
171            }
172
173            // Extract unique classes
174            let mut classes: Vec<f64> = y_values.to_vec();
175            classes.sort_by(|a, b| a.partial_cmp(b).unwrap());
176            classes.dedup();
177            let classes = Array1::from(classes);
178
179            Ok(VotingClassifier {
180                state: VotingClassifierTrained {
181                    fitted_estimators,
182                    classes,
183                    n_features_in: x.ncols(),
184                    feature_names_in: None,
185                },
186                estimators: Vec::new(),
187                voting: self.voting,
188                weights: self.weights,
189                n_jobs: self.n_jobs,
190                flatten_transform: self.flatten_transform,
191            })
192        } else {
193            Err(SklearsError::InvalidInput(
194                "Target values required for fitting".to_string(),
195            ))
196        }
197    }
198}
199
200impl VotingRegressor<Untrained> {
201    /// Create a new `VotingRegressor` instance
202    #[must_use]
203    pub fn new() -> Self {
204        Self {
205            state: Untrained,
206            estimators: Vec::new(),
207            weights: None,
208            n_jobs: None,
209        }
210    }
211
212    /// Create a voting regressor builder
213    #[must_use]
214    pub fn builder() -> VotingRegressorBuilder {
215        VotingRegressorBuilder::new()
216    }
217
218    /// Add an estimator
219    pub fn add_estimator(&mut self, name: String, estimator: Box<dyn PipelinePredictor>) {
220        self.estimators.push((name, estimator));
221    }
222
223    /// Set weights
224    #[must_use]
225    pub fn weights(mut self, weights: Vec<f64>) -> Self {
226        self.weights = Some(weights);
227        self
228    }
229
230    /// Set number of jobs
231    #[must_use]
232    pub fn n_jobs(mut self, n_jobs: Option<i32>) -> Self {
233        self.n_jobs = n_jobs;
234        self
235    }
236}
237
238impl Default for VotingRegressor<Untrained> {
239    fn default() -> Self {
240        Self::new()
241    }
242}
243
244impl Estimator for VotingRegressor<Untrained> {
245    type Config = ();
246    type Error = SklearsError;
247    type Float = Float;
248
249    fn config(&self) -> &Self::Config {
250        &()
251    }
252}
253
254impl Fit<ArrayView2<'_, Float>, Option<&ArrayView1<'_, Float>>> for VotingRegressor<Untrained> {
255    type Fitted = VotingRegressor<VotingRegressorTrained>;
256
257    fn fit(
258        self,
259        x: &ArrayView2<'_, Float>,
260        y: &Option<&ArrayView1<'_, Float>>,
261    ) -> SklResult<Self::Fitted> {
262        if let Some(y_values) = y.as_ref() {
263            let mut fitted_estimators = Vec::new();
264
265            for (name, mut estimator) in self.estimators {
266                estimator.fit(x, y_values)?;
267                fitted_estimators.push((name, estimator));
268            }
269
270            Ok(VotingRegressor {
271                state: VotingRegressorTrained {
272                    fitted_estimators,
273                    n_features_in: x.ncols(),
274                    feature_names_in: None,
275                },
276                estimators: Vec::new(),
277                weights: self.weights,
278                n_jobs: self.n_jobs,
279            })
280        } else {
281            Err(SklearsError::InvalidInput(
282                "Target values required for fitting".to_string(),
283            ))
284        }
285    }
286}
287
288/// `VotingClassifier` builder for fluent construction
289pub struct VotingClassifierBuilder {
290    estimators: Vec<(String, Box<dyn PipelinePredictor>)>,
291    voting: String,
292    weights: Option<Vec<f64>>,
293    n_jobs: Option<i32>,
294    flatten_transform: bool,
295}
296
297impl VotingClassifierBuilder {
298    /// Create a new builder
299    #[must_use]
300    pub fn new() -> Self {
301        Self {
302            estimators: Vec::new(),
303            voting: "hard".to_string(),
304            weights: None,
305            n_jobs: None,
306            flatten_transform: true,
307        }
308    }
309
310    /// Add an estimator
311    #[must_use]
312    pub fn estimator(mut self, name: &str, estimator: Box<dyn PipelinePredictor>) -> Self {
313        self.estimators.push((name.to_string(), estimator));
314        self
315    }
316
317    /// Set voting strategy
318    #[must_use]
319    pub fn voting(mut self, voting: &str) -> Self {
320        self.voting = voting.to_string();
321        self
322    }
323
324    /// Set weights
325    #[must_use]
326    pub fn weights(mut self, weights: Vec<f64>) -> Self {
327        self.weights = Some(weights);
328        self
329    }
330
331    /// Set number of jobs
332    #[must_use]
333    pub fn n_jobs(mut self, n_jobs: Option<i32>) -> Self {
334        self.n_jobs = n_jobs;
335        self
336    }
337
338    /// Set flatten transform
339    #[must_use]
340    pub fn flatten_transform(mut self, flatten: bool) -> Self {
341        self.flatten_transform = flatten;
342        self
343    }
344
345    /// Build the `VotingClassifier`
346    #[must_use]
347    pub fn build(self) -> VotingClassifier<Untrained> {
348        VotingClassifier {
349            state: Untrained,
350            estimators: self.estimators,
351            voting: self.voting,
352            weights: self.weights,
353            n_jobs: self.n_jobs,
354            flatten_transform: self.flatten_transform,
355        }
356    }
357}
358
359/// `VotingRegressor` builder for fluent construction
360pub struct VotingRegressorBuilder {
361    estimators: Vec<(String, Box<dyn PipelinePredictor>)>,
362    weights: Option<Vec<f64>>,
363    n_jobs: Option<i32>,
364}
365
366impl VotingRegressorBuilder {
367    /// Create a new builder
368    #[must_use]
369    pub fn new() -> Self {
370        Self {
371            estimators: Vec::new(),
372            weights: None,
373            n_jobs: None,
374        }
375    }
376
377    /// Add an estimator
378    #[must_use]
379    pub fn estimator(mut self, name: &str, estimator: Box<dyn PipelinePredictor>) -> Self {
380        self.estimators.push((name.to_string(), estimator));
381        self
382    }
383
384    /// Set weights
385    #[must_use]
386    pub fn weights(mut self, weights: Vec<f64>) -> Self {
387        self.weights = Some(weights);
388        self
389    }
390
391    /// Set number of jobs
392    #[must_use]
393    pub fn n_jobs(mut self, n_jobs: Option<i32>) -> Self {
394        self.n_jobs = n_jobs;
395        self
396    }
397
398    /// Build the `VotingRegressor`
399    #[must_use]
400    pub fn build(self) -> VotingRegressor<Untrained> {
401        VotingRegressor {
402            state: Untrained,
403            estimators: self.estimators,
404            weights: self.weights,
405            n_jobs: self.n_jobs,
406        }
407    }
408}
409
410impl Default for VotingClassifierBuilder {
411    fn default() -> Self {
412        Self::new()
413    }
414}
415
416impl Default for VotingRegressorBuilder {
417    fn default() -> Self {
418        Self::new()
419    }
420}
421
422/// SIMD-accelerated ensemble aggregation functions for voting ensembles
423pub mod simd_voting {
424    use super::{simd_fallback, Array1};
425
426    /// SIMD-accelerated weighted averaging of predictions
427    #[must_use]
428    pub fn simd_weighted_average_predictions(
429        all_predictions: &[Array1<f64>],
430        weights: &[f64],
431    ) -> Array1<f64> {
432        if all_predictions.is_empty() || weights.is_empty() {
433            return Array1::zeros(0);
434        }
435
436        let n_samples = all_predictions[0].len();
437        let n_estimators = all_predictions.len().min(weights.len());
438
439        // Convert to f32 for SIMD processing
440        let predictions_f32: Vec<Vec<f32>> = all_predictions[..n_estimators]
441            .iter()
442            .map(|pred| pred.iter().map(|&x| x as f32).collect())
443            .collect();
444
445        let weights_f32: Vec<f32> = weights[..n_estimators].iter().map(|&x| x as f32).collect();
446
447        // Initialize result vector
448        let mut result_f32 = vec![0.0f32; n_samples];
449
450        // SIMD-accelerated weighted sum across all estimators
451        for (i, (pred, &weight)) in predictions_f32.iter().zip(weights_f32.iter()).enumerate() {
452            if i == 0 {
453                // First prediction: multiply by weight and store
454                simd_fallback::scale_vec(pred, weight, &mut result_f32);
455            } else {
456                // Subsequent predictions: multiply by weight and accumulate
457                let mut weighted_pred = vec![0.0f32; n_samples];
458                simd_fallback::scale_vec(pred, weight, &mut weighted_pred);
459                for i in 0..result_f32.len() {
460                    result_f32[i] += weighted_pred[i];
461                }
462            }
463        }
464
465        // Convert back to f64 and return as Array1
466        let result_f64: Vec<f64> = result_f32.iter().map(|&x| f64::from(x)).collect();
467        Array1::from_vec(result_f64)
468    }
469}