sklears_dummy/
lib.rs

1#![allow(dead_code)]
2#![allow(non_snake_case)]
3#![allow(missing_docs)]
4#![allow(deprecated)]
5#![allow(clippy::all)]
6#![allow(clippy::pedantic)]
7#![allow(clippy::nursery)]
8#![allow(unused_imports)]
9#![allow(unused_variables)]
10#![allow(unused_mut)]
11#![allow(unused_assignments)]
12#![allow(unused_doc_comments)]
13#![allow(unused_parens)]
14#![allow(unused_comparisons)]
15#![allow(ambiguous_glob_reexports)]
16//! Dummy estimators for baseline comparisons
17//!
18//! This module provides simple baseline estimators that ignore the input features
19//! and generate predictions based on simple rules. These are useful for establishing
20//! baselines for comparison with more sophisticated models.
21//!
22//! The module includes:
23//! - [`DummyClassifier`] - Simple rules-based classifier
24//! - [`DummyRegressor`] - Simple rules-based regressor
25//! - [`ContextAwareDummyRegressor`] - Context-aware baselines using feature information
26//! - [`ContextAwareDummyClassifier`] - Context-aware classifier baselines
27//! - [`RobustDummyRegressor`] - Robust baselines resistant to outliers
28//! - [`RobustDummyClassifier`] - Robust classifier baselines
29//! - [`OnlineDummyRegressor`] - Online learning regressor for streaming data
30//! - [`OnlineDummyClassifier`] - Online learning classifier for streaming data
31//! - [`BenchmarkClassifier`] - Standard benchmark baselines for classification
32//! - [`BenchmarkRegressor`] - Standard benchmark baselines for regression
33
34// #![warn(missing_docs)]
35
36pub mod advanced_bayesian;
37pub mod benchmark;
38pub mod causal_inference;
39pub mod comparative_analysis;
40#[allow(non_snake_case)]
41#[cfg(test)]
42pub mod comparison_tests;
43pub mod context_aware;
44pub mod domain_specific;
45pub mod dummy_classifier;
46pub mod dummy_multioutput_regressor;
47pub mod dummy_regressor;
48pub mod ensemble_dummy;
49pub mod extensibility;
50pub mod fairness_ethics;
51pub mod fluent_api;
52pub mod game_theoretic;
53pub mod information_theoretic;
54pub mod integration_utilities;
55pub mod memory_management;
56pub mod meta_learning;
57pub mod modular_design;
58pub mod online;
59pub mod performance;
60pub mod performance_enhancements;
61pub mod robust;
62pub mod scalability;
63pub mod sklearn_benchmarks;
64pub mod sklearn_comparison;
65pub mod type_safe;
66pub mod validation;
67
68pub use advanced_bayesian::{
69    AdvancedBayesianStrategy, EmpiricalBayesEstimator, HierarchicalBayesEstimator,
70    MCMCBayesEstimator, VariationalBayesEstimator,
71};
72pub use benchmark::{
73    BenchmarkClassifier, BenchmarkRegressor, BenchmarkStrategy, CompetitionBaseline,
74    DomainBenchmarkClassifier, DomainStrategy as BenchmarkDomainStrategy, TheoreticalBound,
75    TheoreticalBounds,
76};
77pub use causal_inference::{
78    CausalDiscoveryBaseline, CausalDiscoveryStrategy, CounterfactualBaseline,
79    CounterfactualStrategy, DoCalculusBaseline, DoCalculusStrategy, FittedCausalDiscoveryBaseline,
80    FittedCounterfactualBaseline, FittedDoCalculusBaseline, FittedInstrumentalVariableBaseline,
81    FittedMediationAnalysisBaseline, InstrumentalVariableBaseline, InstrumentalVariableStrategy,
82    MediationAnalysisBaseline, MediationStrategy,
83};
84pub use comparative_analysis::{
85    ComparativeAnalyzer, ComparisonReporter, ConfidenceIntervalType, EffectSizeInterpretation,
86    EffectSizeMeasure, EffectSizeResult, ModelComparisonResult, MultipleComparisonCorrection,
87    PairwiseComparison, SignificanceTest, SignificanceTestResult, StatisticalSummary,
88};
89pub use context_aware::{
90    ContextAwareDummyClassifier, ContextAwareDummyRegressor, ContextAwareStrategy, FeatureWeighting,
91};
92pub use domain_specific::{
93    AnomalyFeatures, AnomalyStrategy, CVFeatures, CVStrategy, ColorSpace, DomainClassifier,
94    DomainFeatures, DomainPreprocessor, DomainStrategy, FrequencyMethod, NLPFeatures, NLPStrategy,
95    PixelStatistic, RecFeatures, RecStrategy, TSFeatures, TextureMethod, ThresholdMethod,
96    TimeSeriesStrategy,
97};
98pub use dummy_classifier::{DummyClassifier, Strategy as ClassifierStrategy};
99pub use dummy_multioutput_regressor::{
100    MultiOutputDummyRegressor, MultiOutputStrategy, SingleOutputStrategy,
101};
102pub use dummy_regressor::{
103    CyclicalMethod, DecompositionMethod, DummyRegressor, PredictConfidenceInterval,
104    ProbabilisticRegression, SeasonalAdjustmentMethod, SeasonalType, Strategy as RegressorStrategy,
105};
106pub use ensemble_dummy::{EnsembleDummyClassifier, EnsembleDummyRegressor, EnsembleStrategy};
107pub use extensibility::{
108    BaselinePlugin, DataInfo, ErrorContext, ErrorHook, EvaluationFramework, EvaluationIntegration,
109    EvaluationResult, FeatureType, FitContext, FitResult, HookSystem, LogLevel, LoggingConfig,
110    MetricComputer, MetricResult, MetricType, MiddlewareContext, MiddlewareParameter,
111    MiddlewarePipeline, MiddlewareResult, PipelineMiddleware, PluginConfig, PluginMetadata,
112    PluginParameter, PluginRegistry, PostFitHook, PostPredictHook, PreFitHook, PrePredictHook,
113    PredictContext, ResourceConfig, TargetType, TaskType, TestData,
114};
115pub use fairness_ethics::{
116    BiasDetectionBaseline, BiasDetectionStrategy, BiasMetric, BiasMetricResult,
117    DemographicParityBaseline, DemographicParityStrategy, EqualizedOddsBaseline,
118    EqualizedOddsStrategy, FairnessAwareBaseline, FairnessConstraint, FairnessStrategy,
119    FittedBiasDetectionBaseline, FittedDemographicParityBaseline, FittedEqualizedOddsBaseline,
120    FittedFairnessAwareBaseline, FittedIndividualFairnessBaseline, GroupStatistics,
121    IndividualFairnessBaseline, IndividualFairnessStrategy, SimilarityMetric, StatisticalTest,
122    StatisticalTestResult,
123};
124pub use fluent_api::{
125    ClassifierConfig, ClassifierFluentExt, ConfigPresets, PreprocessingChain, RegressorConfig,
126    RegressorFluentExt,
127};
128pub use game_theoretic::{
129    ExplorationStrategy, GameTheoreticClassifier, GameTheoreticRegressor, GameTheoreticResult,
130    GameTheoreticStrategy, LpNorm, OpponentStrategy,
131};
132pub use information_theoretic::{
133    EntropySamplingEstimator, InformationGainEstimator, InformationTheoreticStrategy, MDLEstimator,
134    MaximumEntropyEstimator, MutualInformationEstimator,
135};
136pub use integration_utilities::{
137    AutoBaselineGenerator, BaselineEstimator, BaselinePipeline, BaselineRecommendation,
138    BaselineRecommendationEngine, BaselineType, ConfigurationHelper, DataCharacteristics,
139    OptimizationHint, ParameterDefault, PerformanceMetrics as IntegrationPerformanceMetrics,
140    PipelineConfig, PreprocessingStep, RecommendationRule, SmartDefaultSelector,
141    ValidationStrategy,
142};
143pub use memory_management::{advanced_pooling, reference_counting, streaming_algorithms};
144pub use meta_learning::{
145    ContinualLearningBaseline, ContinualStrategy, DomainAdaptationBaseline,
146    DomainAdaptationStrategy, FewShotBaselineClassifier, FewShotBaselineRegressor, FewShotStrategy,
147    FittedContinualLearningBaseline, FittedDomainAdaptationBaseline, FittedFewShotClassifier,
148    FittedFewShotRegressor, FittedTransferBaseline, SourceDomainStats, TransferLearningBaseline,
149    TransferStrategy,
150};
151pub use modular_design::{
152    statistical_methods, BaselineStrategy, BaselineStrategyFactory, ClassificationStrategy,
153    ClippingPostprocessor, FittedPipeline, MeanConfig, MeanFittedData, MeanStrategy,
154    MostFrequentConfig, MostFrequentFittedData, MostFrequentStrategy, Postprocessor,
155    PredictionPipeline, Preprocessor, RegressionStrategy, StandardScaler, StrategyRegistry,
156};
157pub use online::{
158    DriftDetectionMethod, OnlineClassificationStrategy, OnlineDummyClassifier,
159    OnlineDummyRegressor, OnlineStrategy, WindowStrategy,
160};
161pub use performance::{benchmarks, cache_friendly, memory_efficient, parallel, simd_stats};
162pub use performance_enhancements::{branch_optimization, cpu_optimization, dummy_optimization};
163pub use robust::{
164    LocationEstimator, OutlierDetectionMethod, RobustDummyClassifier, RobustDummyRegressor,
165    RobustStrategy, ScaleEstimator,
166};
167pub use scalability::{
168    ApproximateBaseline, ApproximateMethod, ApproximateStats, LargeScaleConfig,
169    LargeScaleDummyEstimator, LargeScaleStrategy, ProcessingStats, SampledBaselineResult,
170    SamplingBasedBaseline, StreamingBaselineUpdater,
171};
172pub use sklearn_benchmarks::{
173    AccuracyComparison, BenchmarkConfig, BenchmarkResult, DatasetConfig, DatasetInfo,
174    DatasetProperties, DatasetSize, DatasetType, NumericalAccuracy, PerformanceMetrics,
175    SklearnBenchmarkFramework, TargetStatistics,
176};
177pub use sklearn_comparison::{
178    generate_comparison_report, ComparisonResult, SklearnComparisonFramework,
179};
180pub use type_safe::{
181    BoundedParameter, Classification, ClassificationFittedData, EstimatorConfig, EstimatorState,
182    ParameterValidation, PositiveInt, Probability, RandomSeed, Regression, RegressionFittedData,
183    StrategyValid, TaskType as TypeSafeTaskType, Trained, TypeSafeDummyEstimator,
184    TypeSafeEstimator, TypeSafeFittedClassifier, TypeSafeFittedRegressor, TypeSafeParameters,
185    Untrained, ValidatedStrategy,
186};
187pub use validation::{
188    analyze_classification_dataset, analyze_regression_dataset, bootstrap_validate_classifier,
189    bootstrap_validate_regressor, compare_dummy_strategies, comprehensive_validation_classifier,
190    cross_validate_dummy, get_adaptive_classification_strategy, get_adaptive_regression_strategy,
191    get_best_strategy, get_ranking_summary, get_strategies_in_tier, permutation_test_classifier,
192    permutation_test_vs_random_classifier, rank_dummy_strategies_classifier,
193    rank_dummy_strategies_regressor, recommend_classification_strategy,
194    recommend_regression_strategy, validate_reproducibility, BootstrapValidationResult,
195    ClassDistribution, DataType, DatasetCharacteristics, DummyValidationResult,
196    PermutationTestResult, StatisticalValidationResult, StrategyRanking, StrategyRecommendation,
197    TargetDistribution, ValidationSummary,
198};
199
200#[allow(non_snake_case)]
201#[cfg(test)]
202mod tests {
203    use super::*;
204    use scirs2_core::ndarray::{array, Array2};
205    use sklears_core::traits::{Fit, Predict};
206
207    #[test]
208    fn test_dummy_classifier_integration() {
209        let x =
210            Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
211        let y = array![0, 0, 1, 1];
212
213        let classifier = DummyClassifier::new(ClassifierStrategy::MostFrequent);
214        let fitted = classifier.fit(&x, &y).unwrap();
215        let predictions = fitted.predict(&x).unwrap();
216
217        assert_eq!(predictions.len(), 4);
218    }
219
220    #[test]
221    fn test_dummy_regressor_integration() {
222        let x =
223            Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
224        let y = array![1.0, 2.0, 3.0, 4.0];
225
226        let regressor = DummyRegressor::new(RegressorStrategy::Mean);
227        let fitted = regressor.fit(&x, &y).unwrap();
228        let predictions = fitted.predict(&x).unwrap();
229
230        assert_eq!(predictions.len(), 4);
231    }
232}