sklears_core/
algorithm_markers.rs

1/// Marker traits for algorithm categorization and type safety
2///
3/// This module provides marker traits that categorize different types of
4/// machine learning algorithms. These traits enable type-safe algorithm
5/// composition, better API design, and compile-time verification of
6/// algorithm compatibility.
7use std::marker::PhantomData;
8
9/// Marker trait for supervised learning algorithms
10///
11/// Algorithms implementing this trait learn from input-output pairs
12/// and can make predictions on new data.
13pub trait SupervisedLearning {}
14
15/// Marker trait for unsupervised learning algorithms  
16///
17/// Algorithms implementing this trait learn patterns from input data
18/// without explicit output targets.
19pub trait UnsupervisedLearning {}
20
21/// Marker trait for semi-supervised learning algorithms
22///
23/// Algorithms implementing this trait can learn from both labeled
24/// and unlabeled data.
25pub trait SemiSupervisedLearning {}
26
27/// Marker trait for reinforcement learning algorithms
28///
29/// Algorithms implementing this trait learn through interaction
30/// with an environment via rewards and penalties.
31pub trait ReinforcementLearning {}
32
33/// Marker trait for online learning algorithms
34///
35/// Algorithms implementing this trait can incrementally update
36/// their model as new data arrives.
37pub trait OnlineLearning {}
38
39/// Marker trait for batch learning algorithms
40///
41/// Algorithms implementing this trait require the entire dataset
42/// to be available for training.
43pub trait BatchLearning {}
44
45/// Marker trait for streaming algorithms
46///
47/// Algorithms implementing this trait can process data streams
48/// in real-time with bounded memory usage.
49pub trait StreamingLearning {}
50
51// =============================================================================
52// Algorithm Category Markers
53// =============================================================================
54
55/// Marker trait for linear model algorithms
56///
57/// Linear models make predictions using linear combinations of input features.
58/// Examples: Linear Regression, Logistic Regression, SVM with linear kernel
59#[cfg(feature = "linear_models")]
60pub trait LinearModel: SupervisedLearning {}
61
62/// Marker trait for tree-based algorithms
63///
64/// Tree-based algorithms use decision trees for prediction.
65/// Examples: Decision Trees, Random Forest, Gradient Boosting
66#[cfg(feature = "tree_models")]
67pub trait TreeBased {}
68
69/// Marker trait for neural network algorithms
70///
71/// Neural network algorithms use artificial neural networks for learning.
72/// Examples: Multi-layer Perceptron, Convolutional Neural Networks
73#[cfg(feature = "neural_networks")]
74pub trait NeuralNetwork: SupervisedLearning {}
75
76/// Marker trait for clustering algorithms
77///
78/// Clustering algorithms group similar data points together.
79/// Examples: K-Means, DBSCAN, Hierarchical Clustering
80#[cfg(feature = "clustering")]
81pub trait Clustering: UnsupervisedLearning {}
82
83/// Marker trait for dimensionality reduction algorithms
84///
85/// These algorithms reduce the number of features while preserving
86/// important information.
87/// Examples: PCA, t-SNE, UMAP
88#[cfg(feature = "dimensionality_reduction")]
89pub trait DimensionalityReduction: UnsupervisedLearning {}
90
91/// Marker trait for ensemble methods
92///
93/// Ensemble methods combine multiple models to improve prediction accuracy.
94/// Examples: Random Forest, AdaBoost, Voting Classifier
95#[cfg(feature = "ensemble_methods")]
96pub trait EnsembleMethod {}
97
98// =============================================================================
99// Problem Type Markers
100// =============================================================================
101
102/// Marker trait for classification algorithms
103///
104/// Classification algorithms predict discrete class labels.
105pub trait Classification: SupervisedLearning {}
106
107/// Marker trait for regression algorithms
108///
109/// Regression algorithms predict continuous numerical values.
110pub trait Regression: SupervisedLearning {}
111
112/// Marker trait for ranking algorithms
113///
114/// Ranking algorithms order items based on relevance or preference.
115pub trait Ranking: SupervisedLearning {}
116
117/// Marker trait for anomaly detection algorithms
118///
119/// Anomaly detection algorithms identify outliers or unusual patterns.
120pub trait AnomalyDetection {}
121
122/// Marker trait for density estimation algorithms
123///
124/// Density estimation algorithms estimate probability distributions.
125pub trait DensityEstimation: UnsupervisedLearning {}
126
127/// Marker trait for feature selection algorithms
128///
129/// Feature selection algorithms choose the most relevant features.
130pub trait FeatureSelection {}
131
132// =============================================================================
133// Model Characteristics Markers
134// =============================================================================
135
136/// Marker trait for parametric models
137///
138/// Parametric models have a fixed number of parameters independent
139/// of the training set size.
140pub trait Parametric {}
141
142/// Marker trait for non-parametric models
143///
144/// Non-parametric models have a number of parameters that grows
145/// with the training set size.
146pub trait NonParametric {}
147
148/// Marker trait for probabilistic models
149///
150/// Probabilistic models output probability distributions rather
151/// than point predictions.
152pub trait Probabilistic {}
153
154/// Marker trait for deterministic models
155///
156/// Deterministic models produce the same output given the same input.
157pub trait Deterministic {}
158
159/// Marker trait for interpretable models
160///
161/// Interpretable models provide insights into their decision-making process.
162pub trait Interpretable {}
163
164/// Marker trait for black-box models
165///
166/// Black-box models provide predictions without explainable reasoning.
167pub trait BlackBox {}
168
169/// Marker trait for models that support incremental learning
170///
171/// These models can update their parameters incrementally as new data arrives.
172pub trait Incremental {}
173
174/// Marker trait for models that require feature scaling
175///
176/// These models are sensitive to the scale of input features.
177pub trait ScaleSensitive {}
178
179/// Marker trait for models that are robust to outliers
180///
181/// These models perform well even in the presence of outliers.
182pub trait OutlierRobust {}
183
184/// Marker trait for models that handle missing values natively
185///
186/// These models can work with incomplete data without preprocessing.
187pub trait MissingValueTolerant {}
188
189// =============================================================================
190// Computational Characteristics Markers
191// =============================================================================
192
193/// Marker trait for algorithms that support parallel training
194///
195/// These algorithms can utilize multiple CPU cores during training.
196pub trait ParallelTraining {}
197
198/// Marker trait for algorithms that support parallel prediction
199///
200/// These algorithms can make predictions in parallel.
201pub trait ParallelPrediction {}
202
203/// Marker trait for algorithms optimized for GPU computation
204///
205/// These algorithms can leverage GPU acceleration.
206#[cfg(feature = "gpu_support")]
207pub trait GpuAccelerated {}
208
209/// Marker trait for algorithms that support distributed computing
210///
211/// These algorithms can run across multiple machines.
212#[cfg(feature = "distributed")]
213pub trait Distributed {}
214
215/// Marker trait for memory-efficient algorithms
216///
217/// These algorithms have bounded memory usage regardless of dataset size.
218pub trait MemoryEfficient {}
219
220/// Marker trait for algorithms with fast training
221///
222/// These algorithms have sub-quadratic training time complexity.
223pub trait FastTraining {}
224
225/// Marker trait for algorithms with fast prediction
226///
227/// These algorithms have constant or logarithmic prediction time.
228pub trait FastPrediction {}
229
230// =============================================================================
231// Data Type Markers
232// =============================================================================
233
234/// Marker trait for algorithms that work with numerical data
235pub trait NumericalData {}
236
237/// Marker trait for algorithms that work with categorical data
238pub trait CategoricalData {}
239
240/// Marker trait for algorithms that work with text data
241pub trait TextData {}
242
243/// Marker trait for algorithms that work with image data
244pub trait ImageData {}
245
246/// Marker trait for algorithms that work with time series data
247pub trait TimeSeriesData {}
248
249/// Marker trait for algorithms that work with graph data
250pub trait GraphData {}
251
252/// Marker trait for algorithms that work with sparse data
253pub trait SparseData {}
254
255/// Marker trait for algorithms that work with high-dimensional data
256pub trait HighDimensionalData {}
257
258// =============================================================================
259// Combination and Utility Traits
260// =============================================================================
261
262/// Composite trait for algorithms that are both fast and memory-efficient
263pub trait FastAndEfficient: FastTraining + FastPrediction + MemoryEfficient {}
264
265/// Composite trait for algorithms suitable for real-time applications
266pub trait RealTime: FastPrediction + StreamingLearning + MemoryEfficient {}
267
268/// Composite trait for algorithms suitable for big data
269pub trait BigData: ParallelTraining + MemoryEfficient + StreamingLearning {}
270
271/// Composite trait for robust algorithms
272pub trait Robust: OutlierRobust + MissingValueTolerant {}
273
274/// Composite trait for explainable AI algorithms
275pub trait ExplainableAI: Interpretable + Probabilistic {}
276
277/// Algorithm complexity categorization
278#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
279pub enum ComplexityClass {
280    /// Constant time complexity O(1)
281    Constant,
282    /// Logarithmic time complexity O(log n)
283    Logarithmic,
284    /// Linear time complexity O(n)
285    Linear,
286    /// Linearithmic time complexity O(n log n)
287    Linearithmic,
288    /// Quadratic time complexity O(n²)
289    Quadratic,
290    /// Cubic time complexity O(n³)
291    Cubic,
292    /// Exponential time complexity O(2^n)
293    Exponential,
294}
295
296/// Trait for algorithms with known complexity characteristics
297pub trait ComplexityBounds {
298    /// Training time complexity
299    fn training_complexity() -> ComplexityClass;
300    /// Prediction time complexity  
301    fn prediction_complexity() -> ComplexityClass;
302    /// Space complexity
303    fn space_complexity() -> ComplexityClass;
304}
305
306/// Algorithm stability categorization
307#[derive(Debug, Clone, Copy, PartialEq, Eq)]
308pub enum StabilityClass {
309    /// Algorithm is stable to small changes in training data
310    Stable,
311    /// Algorithm is somewhat sensitive to training data changes
312    ModeratelySensitive,
313    /// Algorithm is highly sensitive to training data changes
314    Unstable,
315}
316
317/// Trait for algorithms with known stability characteristics
318pub trait StabilityBounds {
319    /// Stability with respect to training data perturbations
320    fn data_stability() -> StabilityClass;
321    /// Stability with respect to hyperparameter changes
322    fn hyperparameter_stability() -> StabilityClass;
323}
324
325/// Type-level algorithm category system
326pub mod category_system {
327    use super::*;
328
329    /// Type-level representation of algorithm categories
330    pub struct AlgorithmCategory<Learning, Problem, Model, Compute, Data> {
331        _phantom: PhantomData<(Learning, Problem, Model, Compute, Data)>,
332    }
333
334    // Learning paradigm types
335    pub struct Supervised;
336    pub struct Unsupervised;
337    pub struct SemiSupervised;
338    pub struct Reinforcement;
339
340    // Problem type types
341    pub struct ClassificationProblem;
342    pub struct RegressionProblem;
343    pub struct ClusteringProblem;
344    pub struct DimensionalityReductionProblem;
345
346    // Model characteristic types
347    pub struct ParametricModel;
348    pub struct NonParametricModel;
349    pub struct ProbabilisticModel;
350    pub struct DeterministicModel;
351
352    // Computational characteristic types
353    pub struct ParallelCompute;
354    pub struct SequentialCompute;
355    pub struct GpuCompute;
356    pub struct CpuCompute;
357
358    // Data type types
359    pub struct NumericalDataType;
360    pub struct CategoricalDataType;
361    pub struct MixedDataType;
362
363    impl<L, P, M, C, D> AlgorithmCategory<L, P, M, C, D> {
364        pub fn new() -> Self {
365            Self {
366                _phantom: PhantomData,
367            }
368        }
369    }
370
371    impl<L, P, M, C, D> Default for AlgorithmCategory<L, P, M, C, D> {
372        fn default() -> Self {
373            Self::new()
374        }
375    }
376
377    /// Type alias for common algorithm categories
378    pub type SupervisedClassifier = AlgorithmCategory<
379        Supervised,
380        ClassificationProblem,
381        ParametricModel,
382        ParallelCompute,
383        NumericalDataType,
384    >;
385
386    pub type SupervisedRegressor = AlgorithmCategory<
387        Supervised,
388        RegressionProblem,
389        ParametricModel,
390        ParallelCompute,
391        NumericalDataType,
392    >;
393
394    pub type UnsupervisedClusterer = AlgorithmCategory<
395        Unsupervised,
396        ClusteringProblem,
397        NonParametricModel,
398        ParallelCompute,
399        NumericalDataType,
400    >;
401}
402
403/// Macro for implementing multiple marker traits at once
404#[macro_export]
405macro_rules! impl_algorithm_markers {
406    ($algorithm:ty: $($trait:path),+ $(,)?) => {
407        $(
408            impl $trait for $algorithm {}
409        )+
410    };
411}
412
413/// Macro for defining algorithm categories with compile-time checking
414#[macro_export]
415macro_rules! define_algorithm_category {
416    (
417        $name:ident:
418        learning = $learning:ty,
419        problem = $problem:ty,
420        model = $model:ty,
421        compute = $compute:ty,
422        data = $data:ty,
423    ) => {
424        pub type $name = $crate::algorithm_markers::category_system::AlgorithmCategory<
425            $learning,
426            $problem,
427            $model,
428            $compute,
429            $data,
430        >;
431    };
432    (
433        $name:ident:
434        learning = $learning:ty,
435        problem = $problem:ty,
436        model = $model:ty,
437    ) => {
438        pub type $name = $crate::algorithm_markers::category_system::AlgorithmCategory<
439            $learning,
440            $problem,
441            $model,
442            $crate::algorithm_markers::category_system::CpuCompute,
443            $crate::algorithm_markers::category_system::NumericalDataType,
444        >;
445    };
446}
447
448#[allow(non_snake_case)]
449#[cfg(test)]
450mod tests {
451    use super::*;
452
453    // Example algorithm for testing
454    struct MockLinearRegression;
455
456    // Implement marker traits for the mock algorithm
457    impl_algorithm_markers!(
458        MockLinearRegression:
459        SupervisedLearning,
460        Regression,
461        Parametric,
462        Deterministic,
463        FastTraining,
464        FastPrediction,
465        ScaleSensitive,
466        NumericalData,
467        ParallelTraining,
468        ParallelPrediction
469    );
470
471    #[cfg(feature = "linear_models")]
472    impl LinearModel for MockLinearRegression {}
473
474    impl ComplexityBounds for MockLinearRegression {
475        fn training_complexity() -> ComplexityClass {
476            ComplexityClass::Cubic // For normal equation: O(n³)
477        }
478
479        fn prediction_complexity() -> ComplexityClass {
480            ComplexityClass::Linear // O(n) for matrix-vector multiplication
481        }
482
483        fn space_complexity() -> ComplexityClass {
484            ComplexityClass::Quadratic // O(n²) for storing the design matrix
485        }
486    }
487
488    impl StabilityBounds for MockLinearRegression {
489        fn data_stability() -> StabilityClass {
490            StabilityClass::Stable
491        }
492
493        fn hyperparameter_stability() -> StabilityClass {
494            StabilityClass::Stable
495        }
496    }
497
498    #[test]
499    fn test_marker_traits() {
500        fn is_supervised<T: SupervisedLearning>() -> bool {
501            true
502        }
503        fn is_regression<T: Regression>() -> bool {
504            true
505        }
506        fn is_parametric<T: Parametric>() -> bool {
507            true
508        }
509
510        assert!(is_supervised::<MockLinearRegression>());
511        assert!(is_regression::<MockLinearRegression>());
512        assert!(is_parametric::<MockLinearRegression>());
513    }
514
515    #[test]
516    fn test_complexity_bounds() {
517        assert_eq!(
518            MockLinearRegression::training_complexity(),
519            ComplexityClass::Cubic
520        );
521        assert_eq!(
522            MockLinearRegression::prediction_complexity(),
523            ComplexityClass::Linear
524        );
525        assert_eq!(
526            MockLinearRegression::space_complexity(),
527            ComplexityClass::Quadratic
528        );
529    }
530
531    #[test]
532    fn test_stability_bounds() {
533        assert_eq!(
534            MockLinearRegression::data_stability(),
535            StabilityClass::Stable
536        );
537        assert_eq!(
538            MockLinearRegression::hyperparameter_stability(),
539            StabilityClass::Stable
540        );
541    }
542
543    #[test]
544    fn test_complexity_ordering() {
545        assert!(ComplexityClass::Constant < ComplexityClass::Linear);
546        assert!(ComplexityClass::Linear < ComplexityClass::Quadratic);
547        assert!(ComplexityClass::Quadratic < ComplexityClass::Exponential);
548    }
549
550    #[test]
551    fn test_category_system() {
552        let _classifier = category_system::SupervisedClassifier::new();
553        let _regressor = category_system::SupervisedRegressor::new();
554        let _clusterer = category_system::UnsupervisedClusterer::new();
555    }
556
557    #[test]
558    fn test_define_algorithm_category_macro() {
559        // Test the macro for defining custom algorithm categories
560        define_algorithm_category!(
561            MyCustomAlgorithm:
562            learning = category_system::Supervised,
563            problem = category_system::ClassificationProblem,
564            model = category_system::ParametricModel,
565            compute = category_system::ParallelCompute,
566            data = category_system::NumericalDataType,
567        );
568
569        let _my_algorithm = MyCustomAlgorithm::new();
570    }
571
572    // Test composite traits
573    struct MockRandomForest;
574
575    impl_algorithm_markers!(
576        MockRandomForest:
577        SupervisedLearning,
578        Classification,
579        NonParametric,
580        OutlierRobust,
581        MissingValueTolerant,
582        ParallelTraining,
583        ParallelPrediction,
584        MemoryEfficient,
585        StreamingLearning
586    );
587
588    #[cfg(feature = "tree_models")]
589    impl TreeBased for MockRandomForest {}
590
591    #[cfg(feature = "ensemble_methods")]
592    impl EnsembleMethod for MockRandomForest {}
593
594    impl Robust for MockRandomForest {}
595    impl BigData for MockRandomForest {}
596
597    #[test]
598    fn test_composite_traits() {
599        fn is_robust<T: Robust>() -> bool {
600            true
601        }
602        fn is_big_data<T: BigData>() -> bool {
603            true
604        }
605
606        assert!(is_robust::<MockRandomForest>());
607        assert!(is_big_data::<MockRandomForest>());
608    }
609
610    // Test that algorithms can be categorized correctly
611    #[test]
612    fn test_algorithm_categorization() {
613        // Function that only accepts supervised learning algorithms
614        fn train_supervised<T: SupervisedLearning>(_algorithm: T) {
615            // Training logic would go here
616        }
617
618        // Function that only accepts ensemble methods
619        #[cfg(feature = "ensemble_methods")]
620        fn ensemble_predict<T: EnsembleMethod>(_algorithm: T) {
621            // Ensemble prediction logic would go here
622        }
623
624        // These should compile without issues
625        train_supervised(MockLinearRegression);
626        train_supervised(MockRandomForest);
627
628        #[cfg(feature = "ensemble_methods")]
629        ensemble_predict(MockRandomForest);
630
631        // This would not compile (MockLinearRegression is not an EnsembleMethod):
632        // ensemble_predict(MockLinearRegression);
633    }
634}