sklears_core/algorithm_markers.rs
1/// Marker traits for algorithm categorization and type safety
2///
3/// This module provides marker traits that categorize different types of
4/// machine learning algorithms. These traits enable type-safe algorithm
5/// composition, better API design, and compile-time verification of
6/// algorithm compatibility.
7use std::marker::PhantomData;
8
9/// Marker trait for supervised learning algorithms
10///
11/// Algorithms implementing this trait learn from input-output pairs
12/// and can make predictions on new data.
13pub trait SupervisedLearning {}
14
15/// Marker trait for unsupervised learning algorithms
16///
17/// Algorithms implementing this trait learn patterns from input data
18/// without explicit output targets.
19pub trait UnsupervisedLearning {}
20
21/// Marker trait for semi-supervised learning algorithms
22///
23/// Algorithms implementing this trait can learn from both labeled
24/// and unlabeled data.
25pub trait SemiSupervisedLearning {}
26
27/// Marker trait for reinforcement learning algorithms
28///
29/// Algorithms implementing this trait learn through interaction
30/// with an environment via rewards and penalties.
31pub trait ReinforcementLearning {}
32
33/// Marker trait for online learning algorithms
34///
35/// Algorithms implementing this trait can incrementally update
36/// their model as new data arrives.
37pub trait OnlineLearning {}
38
39/// Marker trait for batch learning algorithms
40///
41/// Algorithms implementing this trait require the entire dataset
42/// to be available for training.
43pub trait BatchLearning {}
44
45/// Marker trait for streaming algorithms
46///
47/// Algorithms implementing this trait can process data streams
48/// in real-time with bounded memory usage.
49pub trait StreamingLearning {}
50
51// =============================================================================
52// Algorithm Category Markers
53// =============================================================================
54
55/// Marker trait for linear model algorithms
56///
57/// Linear models make predictions using linear combinations of input features.
58/// Examples: Linear Regression, Logistic Regression, SVM with linear kernel
59#[cfg(feature = "linear_models")]
60pub trait LinearModel: SupervisedLearning {}
61
62/// Marker trait for tree-based algorithms
63///
64/// Tree-based algorithms use decision trees for prediction.
65/// Examples: Decision Trees, Random Forest, Gradient Boosting
66#[cfg(feature = "tree_models")]
67pub trait TreeBased {}
68
69/// Marker trait for neural network algorithms
70///
71/// Neural network algorithms use artificial neural networks for learning.
72/// Examples: Multi-layer Perceptron, Convolutional Neural Networks
73#[cfg(feature = "neural_networks")]
74pub trait NeuralNetwork: SupervisedLearning {}
75
76/// Marker trait for clustering algorithms
77///
78/// Clustering algorithms group similar data points together.
79/// Examples: K-Means, DBSCAN, Hierarchical Clustering
80#[cfg(feature = "clustering")]
81pub trait Clustering: UnsupervisedLearning {}
82
83/// Marker trait for dimensionality reduction algorithms
84///
85/// These algorithms reduce the number of features while preserving
86/// important information.
87/// Examples: PCA, t-SNE, UMAP
88#[cfg(feature = "dimensionality_reduction")]
89pub trait DimensionalityReduction: UnsupervisedLearning {}
90
91/// Marker trait for ensemble methods
92///
93/// Ensemble methods combine multiple models to improve prediction accuracy.
94/// Examples: Random Forest, AdaBoost, Voting Classifier
95#[cfg(feature = "ensemble_methods")]
96pub trait EnsembleMethod {}
97
98// =============================================================================
99// Problem Type Markers
100// =============================================================================
101
102/// Marker trait for classification algorithms
103///
104/// Classification algorithms predict discrete class labels.
105pub trait Classification: SupervisedLearning {}
106
107/// Marker trait for regression algorithms
108///
109/// Regression algorithms predict continuous numerical values.
110pub trait Regression: SupervisedLearning {}
111
112/// Marker trait for ranking algorithms
113///
114/// Ranking algorithms order items based on relevance or preference.
115pub trait Ranking: SupervisedLearning {}
116
117/// Marker trait for anomaly detection algorithms
118///
119/// Anomaly detection algorithms identify outliers or unusual patterns.
120pub trait AnomalyDetection {}
121
122/// Marker trait for density estimation algorithms
123///
124/// Density estimation algorithms estimate probability distributions.
125pub trait DensityEstimation: UnsupervisedLearning {}
126
127/// Marker trait for feature selection algorithms
128///
129/// Feature selection algorithms choose the most relevant features.
130pub trait FeatureSelection {}
131
132// =============================================================================
133// Model Characteristics Markers
134// =============================================================================
135
136/// Marker trait for parametric models
137///
138/// Parametric models have a fixed number of parameters independent
139/// of the training set size.
140pub trait Parametric {}
141
142/// Marker trait for non-parametric models
143///
144/// Non-parametric models have a number of parameters that grows
145/// with the training set size.
146pub trait NonParametric {}
147
148/// Marker trait for probabilistic models
149///
150/// Probabilistic models output probability distributions rather
151/// than point predictions.
152pub trait Probabilistic {}
153
154/// Marker trait for deterministic models
155///
156/// Deterministic models produce the same output given the same input.
157pub trait Deterministic {}
158
159/// Marker trait for interpretable models
160///
161/// Interpretable models provide insights into their decision-making process.
162pub trait Interpretable {}
163
164/// Marker trait for black-box models
165///
166/// Black-box models provide predictions without explainable reasoning.
167pub trait BlackBox {}
168
169/// Marker trait for models that support incremental learning
170///
171/// These models can update their parameters incrementally as new data arrives.
172pub trait Incremental {}
173
174/// Marker trait for models that require feature scaling
175///
176/// These models are sensitive to the scale of input features.
177pub trait ScaleSensitive {}
178
179/// Marker trait for models that are robust to outliers
180///
181/// These models perform well even in the presence of outliers.
182pub trait OutlierRobust {}
183
184/// Marker trait for models that handle missing values natively
185///
186/// These models can work with incomplete data without preprocessing.
187pub trait MissingValueTolerant {}
188
189// =============================================================================
190// Computational Characteristics Markers
191// =============================================================================
192
193/// Marker trait for algorithms that support parallel training
194///
195/// These algorithms can utilize multiple CPU cores during training.
196pub trait ParallelTraining {}
197
198/// Marker trait for algorithms that support parallel prediction
199///
200/// These algorithms can make predictions in parallel.
201pub trait ParallelPrediction {}
202
203/// Marker trait for algorithms optimized for GPU computation
204///
205/// These algorithms can leverage GPU acceleration.
206#[cfg(feature = "gpu_support")]
207pub trait GpuAccelerated {}
208
209/// Marker trait for algorithms that support distributed computing
210///
211/// These algorithms can run across multiple machines.
212#[cfg(feature = "distributed")]
213pub trait Distributed {}
214
215/// Marker trait for memory-efficient algorithms
216///
217/// These algorithms have bounded memory usage regardless of dataset size.
218pub trait MemoryEfficient {}
219
220/// Marker trait for algorithms with fast training
221///
222/// These algorithms have sub-quadratic training time complexity.
223pub trait FastTraining {}
224
225/// Marker trait for algorithms with fast prediction
226///
227/// These algorithms have constant or logarithmic prediction time.
228pub trait FastPrediction {}
229
230// =============================================================================
231// Data Type Markers
232// =============================================================================
233
234/// Marker trait for algorithms that work with numerical data
235pub trait NumericalData {}
236
237/// Marker trait for algorithms that work with categorical data
238pub trait CategoricalData {}
239
240/// Marker trait for algorithms that work with text data
241pub trait TextData {}
242
243/// Marker trait for algorithms that work with image data
244pub trait ImageData {}
245
246/// Marker trait for algorithms that work with time series data
247pub trait TimeSeriesData {}
248
249/// Marker trait for algorithms that work with graph data
250pub trait GraphData {}
251
252/// Marker trait for algorithms that work with sparse data
253pub trait SparseData {}
254
255/// Marker trait for algorithms that work with high-dimensional data
256pub trait HighDimensionalData {}
257
258// =============================================================================
259// Combination and Utility Traits
260// =============================================================================
261
262/// Composite trait for algorithms that are both fast and memory-efficient
263pub trait FastAndEfficient: FastTraining + FastPrediction + MemoryEfficient {}
264
265/// Composite trait for algorithms suitable for real-time applications
266pub trait RealTime: FastPrediction + StreamingLearning + MemoryEfficient {}
267
268/// Composite trait for algorithms suitable for big data
269pub trait BigData: ParallelTraining + MemoryEfficient + StreamingLearning {}
270
271/// Composite trait for robust algorithms
272pub trait Robust: OutlierRobust + MissingValueTolerant {}
273
274/// Composite trait for explainable AI algorithms
275pub trait ExplainableAI: Interpretable + Probabilistic {}
276
277/// Algorithm complexity categorization
278#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
279pub enum ComplexityClass {
280 /// Constant time complexity O(1)
281 Constant,
282 /// Logarithmic time complexity O(log n)
283 Logarithmic,
284 /// Linear time complexity O(n)
285 Linear,
286 /// Linearithmic time complexity O(n log n)
287 Linearithmic,
288 /// Quadratic time complexity O(n²)
289 Quadratic,
290 /// Cubic time complexity O(n³)
291 Cubic,
292 /// Exponential time complexity O(2^n)
293 Exponential,
294}
295
296/// Trait for algorithms with known complexity characteristics
297pub trait ComplexityBounds {
298 /// Training time complexity
299 fn training_complexity() -> ComplexityClass;
300 /// Prediction time complexity
301 fn prediction_complexity() -> ComplexityClass;
302 /// Space complexity
303 fn space_complexity() -> ComplexityClass;
304}
305
306/// Algorithm stability categorization
307#[derive(Debug, Clone, Copy, PartialEq, Eq)]
308pub enum StabilityClass {
309 /// Algorithm is stable to small changes in training data
310 Stable,
311 /// Algorithm is somewhat sensitive to training data changes
312 ModeratelySensitive,
313 /// Algorithm is highly sensitive to training data changes
314 Unstable,
315}
316
317/// Trait for algorithms with known stability characteristics
318pub trait StabilityBounds {
319 /// Stability with respect to training data perturbations
320 fn data_stability() -> StabilityClass;
321 /// Stability with respect to hyperparameter changes
322 fn hyperparameter_stability() -> StabilityClass;
323}
324
325/// Type-level algorithm category system
326pub mod category_system {
327 use super::*;
328
329 /// Type-level representation of algorithm categories
330 pub struct AlgorithmCategory<Learning, Problem, Model, Compute, Data> {
331 _phantom: PhantomData<(Learning, Problem, Model, Compute, Data)>,
332 }
333
334 // Learning paradigm types
335 pub struct Supervised;
336 pub struct Unsupervised;
337 pub struct SemiSupervised;
338 pub struct Reinforcement;
339
340 // Problem type types
341 pub struct ClassificationProblem;
342 pub struct RegressionProblem;
343 pub struct ClusteringProblem;
344 pub struct DimensionalityReductionProblem;
345
346 // Model characteristic types
347 pub struct ParametricModel;
348 pub struct NonParametricModel;
349 pub struct ProbabilisticModel;
350 pub struct DeterministicModel;
351
352 // Computational characteristic types
353 pub struct ParallelCompute;
354 pub struct SequentialCompute;
355 pub struct GpuCompute;
356 pub struct CpuCompute;
357
358 // Data type types
359 pub struct NumericalDataType;
360 pub struct CategoricalDataType;
361 pub struct MixedDataType;
362
363 impl<L, P, M, C, D> AlgorithmCategory<L, P, M, C, D> {
364 pub fn new() -> Self {
365 Self {
366 _phantom: PhantomData,
367 }
368 }
369 }
370
371 impl<L, P, M, C, D> Default for AlgorithmCategory<L, P, M, C, D> {
372 fn default() -> Self {
373 Self::new()
374 }
375 }
376
377 /// Type alias for common algorithm categories
378 pub type SupervisedClassifier = AlgorithmCategory<
379 Supervised,
380 ClassificationProblem,
381 ParametricModel,
382 ParallelCompute,
383 NumericalDataType,
384 >;
385
386 pub type SupervisedRegressor = AlgorithmCategory<
387 Supervised,
388 RegressionProblem,
389 ParametricModel,
390 ParallelCompute,
391 NumericalDataType,
392 >;
393
394 pub type UnsupervisedClusterer = AlgorithmCategory<
395 Unsupervised,
396 ClusteringProblem,
397 NonParametricModel,
398 ParallelCompute,
399 NumericalDataType,
400 >;
401}
402
403/// Macro for implementing multiple marker traits at once
404#[macro_export]
405macro_rules! impl_algorithm_markers {
406 ($algorithm:ty: $($trait:path),+ $(,)?) => {
407 $(
408 impl $trait for $algorithm {}
409 )+
410 };
411}
412
413/// Macro for defining algorithm categories with compile-time checking
414#[macro_export]
415macro_rules! define_algorithm_category {
416 (
417 $name:ident:
418 learning = $learning:ty,
419 problem = $problem:ty,
420 model = $model:ty,
421 compute = $compute:ty,
422 data = $data:ty,
423 ) => {
424 pub type $name = $crate::algorithm_markers::category_system::AlgorithmCategory<
425 $learning,
426 $problem,
427 $model,
428 $compute,
429 $data,
430 >;
431 };
432 (
433 $name:ident:
434 learning = $learning:ty,
435 problem = $problem:ty,
436 model = $model:ty,
437 ) => {
438 pub type $name = $crate::algorithm_markers::category_system::AlgorithmCategory<
439 $learning,
440 $problem,
441 $model,
442 $crate::algorithm_markers::category_system::CpuCompute,
443 $crate::algorithm_markers::category_system::NumericalDataType,
444 >;
445 };
446}
447
448#[allow(non_snake_case)]
449#[cfg(test)]
450mod tests {
451 use super::*;
452
453 // Example algorithm for testing
454 struct MockLinearRegression;
455
456 // Implement marker traits for the mock algorithm
457 impl_algorithm_markers!(
458 MockLinearRegression:
459 SupervisedLearning,
460 Regression,
461 Parametric,
462 Deterministic,
463 FastTraining,
464 FastPrediction,
465 ScaleSensitive,
466 NumericalData,
467 ParallelTraining,
468 ParallelPrediction
469 );
470
471 #[cfg(feature = "linear_models")]
472 impl LinearModel for MockLinearRegression {}
473
474 impl ComplexityBounds for MockLinearRegression {
475 fn training_complexity() -> ComplexityClass {
476 ComplexityClass::Cubic // For normal equation: O(n³)
477 }
478
479 fn prediction_complexity() -> ComplexityClass {
480 ComplexityClass::Linear // O(n) for matrix-vector multiplication
481 }
482
483 fn space_complexity() -> ComplexityClass {
484 ComplexityClass::Quadratic // O(n²) for storing the design matrix
485 }
486 }
487
488 impl StabilityBounds for MockLinearRegression {
489 fn data_stability() -> StabilityClass {
490 StabilityClass::Stable
491 }
492
493 fn hyperparameter_stability() -> StabilityClass {
494 StabilityClass::Stable
495 }
496 }
497
498 #[test]
499 fn test_marker_traits() {
500 fn is_supervised<T: SupervisedLearning>() -> bool {
501 true
502 }
503 fn is_regression<T: Regression>() -> bool {
504 true
505 }
506 fn is_parametric<T: Parametric>() -> bool {
507 true
508 }
509
510 assert!(is_supervised::<MockLinearRegression>());
511 assert!(is_regression::<MockLinearRegression>());
512 assert!(is_parametric::<MockLinearRegression>());
513 }
514
515 #[test]
516 fn test_complexity_bounds() {
517 assert_eq!(
518 MockLinearRegression::training_complexity(),
519 ComplexityClass::Cubic
520 );
521 assert_eq!(
522 MockLinearRegression::prediction_complexity(),
523 ComplexityClass::Linear
524 );
525 assert_eq!(
526 MockLinearRegression::space_complexity(),
527 ComplexityClass::Quadratic
528 );
529 }
530
531 #[test]
532 fn test_stability_bounds() {
533 assert_eq!(
534 MockLinearRegression::data_stability(),
535 StabilityClass::Stable
536 );
537 assert_eq!(
538 MockLinearRegression::hyperparameter_stability(),
539 StabilityClass::Stable
540 );
541 }
542
543 #[test]
544 fn test_complexity_ordering() {
545 assert!(ComplexityClass::Constant < ComplexityClass::Linear);
546 assert!(ComplexityClass::Linear < ComplexityClass::Quadratic);
547 assert!(ComplexityClass::Quadratic < ComplexityClass::Exponential);
548 }
549
550 #[test]
551 fn test_category_system() {
552 let _classifier = category_system::SupervisedClassifier::new();
553 let _regressor = category_system::SupervisedRegressor::new();
554 let _clusterer = category_system::UnsupervisedClusterer::new();
555 }
556
557 #[test]
558 fn test_define_algorithm_category_macro() {
559 // Test the macro for defining custom algorithm categories
560 define_algorithm_category!(
561 MyCustomAlgorithm:
562 learning = category_system::Supervised,
563 problem = category_system::ClassificationProblem,
564 model = category_system::ParametricModel,
565 compute = category_system::ParallelCompute,
566 data = category_system::NumericalDataType,
567 );
568
569 let _my_algorithm = MyCustomAlgorithm::new();
570 }
571
572 // Test composite traits
573 struct MockRandomForest;
574
575 impl_algorithm_markers!(
576 MockRandomForest:
577 SupervisedLearning,
578 Classification,
579 NonParametric,
580 OutlierRobust,
581 MissingValueTolerant,
582 ParallelTraining,
583 ParallelPrediction,
584 MemoryEfficient,
585 StreamingLearning
586 );
587
588 #[cfg(feature = "tree_models")]
589 impl TreeBased for MockRandomForest {}
590
591 #[cfg(feature = "ensemble_methods")]
592 impl EnsembleMethod for MockRandomForest {}
593
594 impl Robust for MockRandomForest {}
595 impl BigData for MockRandomForest {}
596
597 #[test]
598 fn test_composite_traits() {
599 fn is_robust<T: Robust>() -> bool {
600 true
601 }
602 fn is_big_data<T: BigData>() -> bool {
603 true
604 }
605
606 assert!(is_robust::<MockRandomForest>());
607 assert!(is_big_data::<MockRandomForest>());
608 }
609
610 // Test that algorithms can be categorized correctly
611 #[test]
612 fn test_algorithm_categorization() {
613 // Function that only accepts supervised learning algorithms
614 fn train_supervised<T: SupervisedLearning>(_algorithm: T) {
615 // Training logic would go here
616 }
617
618 // Function that only accepts ensemble methods
619 #[cfg(feature = "ensemble_methods")]
620 fn ensemble_predict<T: EnsembleMethod>(_algorithm: T) {
621 // Ensemble prediction logic would go here
622 }
623
624 // These should compile without issues
625 train_supervised(MockLinearRegression);
626 train_supervised(MockRandomForest);
627
628 #[cfg(feature = "ensemble_methods")]
629 ensemble_predict(MockRandomForest);
630
631 // This would not compile (MockLinearRegression is not an EnsembleMethod):
632 // ensemble_predict(MockLinearRegression);
633 }
634}