sklears_core/
lib.rs

1#![allow(dead_code)]
2#![allow(non_snake_case)]
3#![allow(missing_docs)]
4#![allow(deprecated)]
5//! # sklears-core - Core Traits and Utilities
6//!
7//! This crate provides the foundational traits, types, and utilities that power
8//! the entire sklears machine learning ecosystem.
9//!
10//! ## Overview
11//!
12//! `sklears-core` defines the essential building blocks for machine learning in Rust:
13//!
14//! - **Core Traits**: `Estimator`, `Fit`, `Predict`, `Transform`, `Score`
15//! - **Type System**: Type-safe state machines (Untrained/Trained)
16//! - **Error Handling**: Comprehensive error types with context
17//! - **Validation**: Input validation and consistency checks
18//! - **Utilities**: Common helper functions and types
19//! - **Parallel Processing**: Abstractions for parallel algorithms
20//! - **Dataset Handling**: Data loading, splitting, and manipulation
21//!
22//! ## Core Traits
23//!
24//! ### Estimator
25//!
26//! The base trait for all machine learning models:
27//!
28//! ```rust,ignore
29//! pub trait Estimator {
30//!     type Config;
31//!     type Error;
32//! }
33//! ```
34//!
35//! ### Fit
36//!
37//! Training an estimator on data:
38//!
39//! ```rust,ignore
40//! pub trait Fit<X, Y> {
41//!     type Fitted;
42//!     fn fit(self, x: &X, y: &Y) -> Result<Self::Fitted, Self::Error>;
43//! }
44//! ```
45//!
46//! ### Predict
47//!
48//! Making predictions with a trained model:
49//!
50//! ```rust,ignore
51//! pub trait Predict<X, Y> {
52//!     fn predict(&self, x: &X) -> Result<Y, Self::Error>;
53//! }
54//! ```
55//!
56//! ### Transform
57//!
58//! Transforming data (for preprocessing and dimensionality reduction):
59//!
60//! ```rust,ignore
61//! pub trait Transform<X> {
62//!     fn transform(&self, x: &X) -> Result<X, Self::Error>;
63//! }
64//! ```
65//!
66//! ## Type-Safe State Machines
67//!
68//! Models use phantom types to track training state at compile time:
69//!
70//! ```rust,ignore
71//! pub struct Untrained;
72//! pub struct Trained;
73//!
74//! pub struct Model<State = Untrained> {
75//!     config: ModelConfig,
76//!     state: PhantomData<State>,
77//!     weights: Option<Weights>, // Only Some in Trained state
78//! }
79//! ```
80//!
81//! This ensures:
82//! - ✅ Can't predict with an untrained model (compile error)
83//! - ✅ Can't accidentally re-train a trained model
84//! - ✅ Type system enforces correct usage patterns
85//!
86//! ## Error Handling
87//!
88//! Comprehensive error types with rich context:
89//!
90//! ```rust,ignore
91//! pub enum SklearsError {
92//!     InvalidInput(String),
93//!     ShapeMismatch { expected: Shape, got: Shape },
94//!     NotFitted,
95//!     ConvergenceError { iterations: usize },
96//!     // ... and many more
97//! }
98//! ```
99//!
100//! ## Validation
101//!
102//! Input validation utilities ensure data consistency:
103//!
104//! ```rust,ignore
105//! use sklears_core::validation;
106//!
107//! // Check that X and y have compatible shapes
108//! validation::check_consistent_length(x, y)?;
109//!
110//! // Check for NaN/Inf values
111//! validation::check_array(x)?;
112//!
113//! // Validate classification targets
114//! validation::check_classification_targets(y)?;
115//! ```
116//!
117//! ## Parallel Processing
118//!
119//! Abstractions for parallel algorithm execution:
120//!
121//! ```rust,ignore
122//! use sklears_core::parallel::ParallelConfig;
123//! use rayon::prelude::*;
124//!
125//! let config = ParallelConfig::new().n_jobs(-1); // Use all cores
126//!
127//! data.par_iter()
128//!     .map(|sample| process(sample))
129//!     .collect()
130//! ```
131//!
132//! ## Feature Flags
133//!
134//! - `simd` - Enable SIMD optimizations
135//! - `gpu_support` - GPU acceleration support
136//! - `arrow` - Apache Arrow interoperability
137//! - `binary` - Binary serialization support
138//!
139//! ## Examples
140//!
141//! See individual module documentation for detailed examples.
142//!
143//! ## Integration
144//!
145//! This crate is re-exported by the main `sklears` crate, so you typically don't
146//! need to depend on it directly unless you're building custom estimators.
147
148pub mod dataset;
149pub mod distributed;
150pub mod distributed_algorithms;
151pub mod error;
152pub mod parallel;
153pub mod traits;
154pub mod types;
155pub mod utils;
156pub mod validation;
157pub mod validation_examples;
158
159#[cfg(feature = "simd")]
160pub mod simd;
161
162#[cfg(feature = "gpu_support")]
163pub mod gpu;
164
165#[cfg(feature = "arrow")]
166pub mod arrow;
167
168#[cfg(feature = "binary")]
169pub mod binary;
170
171pub mod advanced_array_ops;
172pub mod advanced_benchmarking;
173pub mod algorithm_markers;
174pub mod async_traits;
175pub mod auto_benchmark_generation;
176pub mod autodiff;
177pub mod benchmarking;
178pub mod compatibility;
179pub mod compile_time_macros;
180pub mod compile_time_validation;
181// TODO: Temporarily disabled until ndarray 0.17 migration is complete
182// Contract testing framework needs trait bounds updated for new ArrayBase<S, D, T> signature
183// pub mod contract_testing;
184pub mod contribution;
185pub mod dependent_types;
186pub mod derive_macros;
187pub mod dsl_impl;
188pub mod effect_types;
189pub mod ensemble_improvements;
190pub mod exhaustive_error_handling;
191pub mod exotic_hardware;
192pub mod exotic_hardware_impls;
193pub mod fallback_strategies;
194pub mod features;
195pub mod formal_verification;
196pub mod format_io;
197pub mod formatting;
198pub mod memory_safety;
199pub mod mock_objects;
200pub mod performance_profiling;
201pub mod performance_reporting;
202pub mod plugin;
203pub mod plugin_marketplace_impl;
204pub mod refinement_types;
205pub mod streaming_lifetimes;
206pub mod unsafe_audit;
207
208// Export the procedural macros for DSL support
209pub mod macros;
210
211// Modularized API reference system (refactored from api_reference_generator.rs)
212pub mod api_analyzers;
213pub mod api_data_structures;
214pub mod api_formatters;
215pub mod api_generator_config;
216pub mod interactive_api_reference;
217pub mod interactive_playground;
218pub mod search_engines;
219pub mod tutorial_examples;
220pub mod tutorial_system;
221pub mod wasm_playground_impl;
222
223// Trait explorer tool for interactive API navigation
224pub mod trait_explorer;
225
226// Public/private API boundaries
227mod private;
228pub mod public;
229
230// Custom lints for ML-specific patterns
231#[cfg(feature = "custom_lints")]
232pub mod lints;
233
234// Dependency audit and optimization
235pub mod dependency_audit;
236
237// Code coverage reporting and enforcement
238pub mod code_coverage;
239
240// Input sanitization for untrusted data
241pub mod input_sanitization;
242
243// TODO: Temporarily disabled until ndarray 0.17 HRTB trait bound issues are resolved
244// #[allow(non_snake_case)]
245// #[cfg(test)]
246// pub mod property_tests;
247
248// TODO: Temporarily disabled until ndarray 0.17 HRTB trait bound issues are resolved
249// #[allow(non_snake_case)]
250// #[cfg(test)]
251// pub mod test_utilities;
252
253pub mod prelude {
254    /// Convenient re-exports of the most commonly used types and traits
255    ///
256    /// This prelude is organized by stability guarantees:
257    /// - Stable APIs are always available
258    /// - Experimental APIs require explicit opt-in
259    /// - Deprecated APIs emit warnings
260    // === Stable Public APIs (Always Available) ===
261    // Core traits - guaranteed stable
262    pub use crate::public::stable::{
263        Estimator, Fit, FitPredict, FitTransform, PartialFit, Predict, Transform,
264    };
265
266    // Core types - guaranteed stable
267    pub use crate::public::stable::{
268        Array1, Array2, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2, FeatureCount,
269        Features, Float, FloatBounds, Int, IntBounds, Labels, Numeric, Predictions, Probabilities,
270        Probability, SampleCount, Target,
271    };
272
273    // Error handling - guaranteed stable
274    pub use crate::public::stable::{ErrorChain, ErrorContext, Result, SklearsError};
275
276    // Validation - guaranteed stable
277    pub use crate::public::stable::{Validate, ValidationContext, ValidationRule};
278
279    // Dataset utilities - guaranteed stable
280    pub use crate::public::stable::{load_iris, make_blobs, make_regression, Dataset};
281
282    // === Experimental APIs (Require Opt-in) ===
283
284    #[cfg(feature = "experimental")]
285    pub use crate::public::experimental::*;
286
287    // === Additional Stable Exports ===
288
289    // Zero-copy utilities - stable
290    pub use crate::types::zero_copy::{
291        array_views, dataset_ops, ArrayPool, ZeroCopyArray, ZeroCopyDataset,
292    };
293    pub use crate::types::{
294        CowDataset, CowFeatures, CowLabels, CowPredictions, CowProbabilities, CowSampleWeight,
295        CowTarget, Distances, SampleWeight, Similarities, ZeroCopy, ZeroCopyFeatures,
296        ZeroCopyTarget,
297    };
298
299    // Validation utilities - stable
300    pub use crate::validation::{ml as validation_ml, ConfigValidation, ValidationRules};
301
302    // Compile-time validation - stable
303    pub use crate::compile_time_validation::{
304        CompileTimeValidated, DimensionValidator, LinearRegressionConfig,
305        LinearRegressionConfigBuilder, ParameterValidator, PositiveValidator, ProbabilityValidator,
306        RangeValidator, SolverCompatibility, ValidatedConfig,
307    };
308
309    // Memory-mapped datasets - stable when available
310    #[cfg(feature = "mmap")]
311    pub use crate::dataset::MmapDataset;
312
313    // Arrow integration - stable when available
314    #[cfg(feature = "arrow")]
315    pub use crate::arrow::{ArrowDataset, ColumnStats};
316
317    // Binary format support - stable when available
318    #[cfg(feature = "binary")]
319    pub use crate::binary::{
320        convenience, ArrayBinaryFormat, BinaryConfig, BinaryDeserialize, BinaryFileStorage,
321        BinaryFormat, BinaryMetadata, BinarySerialize, BinarySerializer, CompressionType,
322        StreamingBinaryReader, StreamingBinaryWriter,
323    };
324
325    // SIMD operations - experimental, requires feature flag
326    #[cfg(feature = "simd")]
327    pub use crate::simd::{SimdArrayOps, SimdOps};
328
329    // GPU acceleration - experimental, requires feature flag and CUDA
330    #[cfg(feature = "gpu_support")]
331    pub use crate::gpu::{
332        GpuArray, GpuContext, GpuDeviceProperties, GpuMatrixOps, GpuMemoryInfo, GpuUtils,
333        MemoryTransferOpts, TransferStrategy,
334    };
335
336    // Parallel processing - stable
337    pub use crate::parallel::{
338        ParallelConfig, ParallelCrossValidation, ParallelCrossValidator, ParallelEnsemble,
339        ParallelEnsembleOps, ParallelFit, ParallelMatrixOps, ParallelPredict, ParallelTransform,
340    };
341
342    // Async traits - experimental
343    #[cfg(feature = "async_support")]
344    pub use crate::async_traits::{
345        AsyncConfig, AsyncCrossValidation, AsyncEnsemble, AsyncFitAdvanced,
346        AsyncHyperparameterOptimization, AsyncModelPersistence, AsyncPartialFit,
347        AsyncPredictAdvanced, AsyncTransformAdvanced, CancellationToken, ConfidenceInterval,
348        ProgressInfo,
349    };
350
351    // Plugin system - experimental
352    #[cfg(feature = "plugins")]
353    pub use crate::plugin::{
354        AlgorithmPlugin, ClusteringPlugin, LogLevel, Plugin, PluginCapability, PluginCategory,
355        PluginConfig, PluginConfigBuilder, PluginFactory, PluginLoader, PluginMetadata,
356        PluginParameter, PluginRegistry, RuntimeSettings, TransformerPlugin,
357    };
358
359    // API stability utilities
360    pub use crate::public::{
361        api_version_info, is_api_experimental, is_api_stable, ApiStability, ApiVersionInfo,
362        ExperimentalApi, PublicApiConfig, PublicApiConfigBuilder, StableApi,
363    };
364
365    // Custom lints for ML-specific patterns
366    #[cfg(feature = "custom_lints")]
367    pub use crate::lints::{
368        ApiUsageLint, ArrayPerformanceLint, DataValidationLint, LintCategory, LintConfig,
369        LintRegistry, LintRule, LintSeverity, MemoryLeakLint, ModelValidationLint,
370        NumericalStabilityLint,
371    };
372
373    // Dependency audit and optimization
374    pub use crate::dependency_audit::{
375        calculate_metrics, generate_dependency_graph, BinarySizeImpact, CompileTimeImpact,
376        DependencyAudit, DependencyCategory, DependencyInfo, DependencyRecommendation,
377        DependencyReport, RecommendationAction,
378    };
379
380    // Code coverage reporting and enforcement
381    pub use crate::code_coverage::{
382        CICoverageResult, CIDConfig, CoverageCI, CoverageCollector, CoverageConfig, CoverageReport,
383        CoverageTool, QualityGatesResult, RecommendationPriority,
384    };
385
386    // Input sanitization for untrusted data
387    pub use crate::input_sanitization::{
388        is_ml_data_safe, sanitize_ml_data, InputSanitizer, SafetyIssue, SanitizationConfig,
389        Sanitize,
390    };
391
392    // Advanced array operations for high-performance computing
393    pub use crate::advanced_array_ops::{ArrayStats, MatrixOps, MemoryOps};
394
395    // Re-export the error_context macro
396    pub use crate::error_context;
397
398    // Code quality and safety tools - stable
399    pub use crate::formatting::{
400        CodeFormatter, FormattingConfig, FormattingConfigBuilder, FormattingIssue,
401        FormattingReport, IssueSeverity, MLFormattingRules,
402    };
403
404    pub use crate::unsafe_audit::{
405        SafetyRecommendation, SafetySeverity, UnsafeAuditConfig, UnsafeAuditReport, UnsafeAuditor,
406        UnsafeFinding, UnsafePattern, UnsafeType,
407    };
408
409    // Memory safety guarantees and utilities - stable
410    pub use crate::memory_safety::{
411        MemoryPoolStats, MemorySafety, MemorySafetyGuarantee, SafeArrayOps, SafeMemoryPool,
412        SafePooledBuffer, SafePtr, SafeSharedModel, UnsafeValidationResult,
413    };
414
415    // Benchmarking utilities - stable
416    pub use crate::benchmarking::{
417        AccuracyComparison, AlgorithmBenchmark, AlgorithmType, AutomatedBenchmarkRunner,
418        BenchmarkConfig, BenchmarkDataset, BenchmarkResults, BenchmarkRunResult, BenchmarkSuite,
419        MemoryStatistics, TimingStatistics,
420    };
421
422    // Mock objects for testing - now enabled and working
423    pub use crate::mock_objects::{
424        MockBehavior, MockConfig, MockEnsemble, MockErrorType, MockEstimator, MockEstimatorBuilder,
425        MockStateSnapshot, MockTransformConfig, MockTransformType, MockTransformer,
426        MockTransformerBuilder, TrainedMockEstimator, VotingStrategy,
427    };
428
429    // Contract testing framework - temporarily disabled until ndarray 0.17 migration is complete
430    // pub use crate::contract_testing::{
431    //     ContractTestConfig, ContractTestResult, ContractTestSummary, ContractTester,
432    //     PropertyTestStats, TestCase, TraitLaws,
433    // };
434
435    // Compatibility layers for popular ML libraries - stable
436    pub use crate::compatibility::{
437        numpy::NumpyArray,
438        pandas::{DataFrame, DataValue},
439        pytorch::{ndarray_to_pytorch_tensor, TensorMetadata},
440        serialization::{CrossPlatformModel, ModelFormat, ModelSerialization},
441        sklearn::{FittedScikitLearnModel, ParamValue, ScikitLearnModel, SklearnCompatible},
442    };
443
444    // Standard format readers and writers - stable
445    pub use crate::format_io::{
446        CsvOptions, DataFormat, FormatDetector, FormatOptions, FormatReader, FormatWriter,
447        Hdf5Options, JsonOptions, NumpyOptions, ParquetOptions, StreamingReader,
448    };
449
450    // Contribution guidelines and review process - stable
451    pub use crate::contribution::{
452        AlgorithmicCriteria, ClippyLevel, CodeQualityCriteria, ContributionChecker,
453        ContributionConfig, ContributionResult, ContributionWorkflow, DocumentationCriteria,
454        GateResult, PerformanceCriteria, QualityGate, QualityGateType, ReviewCriteria,
455        TestingCriteria, WorkflowStep,
456    };
457
458    // Automated performance reporting system - stable
459    pub use crate::performance_reporting::{
460        AlertConfig, AnalysisResult, AnalysisType, HealthStatus, OutputFormat, PerformanceAnalyzer,
461        PerformanceReport, PerformanceReporter, RegressionThreshold, ReportConfig, TimeRange,
462        TrendDirection,
463    };
464
465    // Modularized API reference system - stable
466    pub use crate::api_analyzers::{
467        CrossReferenceBuilder as ModularCrossReferenceBuilder, ExampleValidator,
468        TraitAnalyzer as ModularTraitAnalyzer, TypeExtractor as ModularTypeExtractor,
469    };
470    pub use crate::api_data_structures::{
471        ApiReference as ModularApiReference, CodeExample as ModularCodeExample,
472        TraitInfo as ModularTraitInfo, TypeInfo as ModularTypeInfo,
473    };
474    pub use crate::api_formatters::{
475        ApiReferenceGenerator as ModularApiReferenceGenerator, DocumentFormatter,
476    };
477    pub use crate::api_generator_config::{
478        GeneratorConfig as ModularGeneratorConfig, OutputFormat as ModularOutputFormat,
479        ValidationConfig,
480    };
481    pub use crate::interactive_playground::{
482        LiveCodeRunner, UIComponentBuilder, WasmPlaygroundManager,
483    };
484    pub use crate::search_engines::{
485        AutocompleteTrie, SearchQuery, SearchResult, SemanticSearchEngine,
486    };
487    pub use crate::tutorial_system::{
488        LearningPath, ProgressTracker, Tutorial, TutorialBuilder, TutorialSystem,
489    };
490
491    // Trait explorer tool for interactive API navigation - stable
492    pub use crate::trait_explorer::{
493        CompilationImpact, DependencyAnalysis, DependencyAnalyzer, EdgeType, ExampleCategory,
494        ExampleDifficulty, ExampleGenerator, ExplorerConfig, GraphExportFormat, MemoryFootprint,
495        PerformanceAnalysis, RuntimeOverhead, SimilarTrait, TraitExplorationResult, TraitExplorer,
496        TraitGraph, TraitGraphEdge, TraitGraphGenerator, TraitGraphMetadata, TraitGraphNode,
497        TraitNodeType, TraitPerformanceAnalyzer, TraitRegistry, UsageExample,
498    };
499
500    // Exotic hardware support - experimental (TPU, FPGA, Quantum)
501    #[cfg(feature = "exotic_hardware")]
502    pub use crate::exotic_hardware::{
503        ActivationType, ComputationGraph, ComputationMetadata, ComputationNode, ComputationResult,
504        ExoticHardware, ExoticHardwareManager, FpgaDevice, FpgaVendor, HardwareCapabilities,
505        HardwareCompiler, HardwareComputation, HardwareId, HardwareMemoryManager, HardwareStatus,
506        HardwareType, MemoryHandle, MemoryStats, Operation, PerformanceEstimate, Precision,
507        QuantumBackend, QuantumDevice, TensorSpec, TpuDevice, TpuVersion, ValidationReport,
508    };
509
510    // Effect type system - experimental (compile-time effect tracking)
511    #[cfg(feature = "effect_types")]
512    pub use crate::effect_types::{
513        AsyncEffect, Capability, Combined, Effect, EffectAnalyzer, EffectBuilder, EffectMetadata,
514        EffectType, Fallible, FallibleIOEffect, GPUMemoryEffect, IORandomEffect, Linear, Memory,
515        MemoryIOEffect, Pure, Random, GPU, IO,
516    };
517
518    // Automatic differentiation - experimental (forward/reverse mode AD)
519    #[cfg(feature = "autodiff")]
520    pub use crate::autodiff::{
521        ADMode, AutodiffConfig, ComputationNode as ADNode, Dual, SymbolicExpression, Variable,
522        VariableId,
523    };
524
525    // Distributed computing support - experimental (cluster-aware ML) - TEMPORARILY DISABLED
526    // #[cfg(feature = "distributed")]
527    // pub use crate::distributed::{
528    //     ClusterInfo, ClusterNode, DistributedCluster, DistributedDataset, DistributedEstimator,
529    //     DistributedMessage, DistributedMetrics, DistributedOptimizer, DistributedTraining,
530    //     FaultTolerance, GradientAggregation, MessagePassing, NodeId, ParameterServer,
531    // };
532
533    // Compile-time macros and verification - experimental (model verification) - TEMPORARILY DISABLED
534    // #[cfg(feature = "compile_time_macros")]
535    // pub use crate::compile_time_macros::{
536    //     validate_performance, verify_dimensions, verify_model, BenchmarkConfig as CompileTimeBenchmarkConfig,
537    //     CompileTimeVerifiable, ComplexityAnalysis, DimensionVerifiable, MathematicallyVerifiable,
538    //     OptimizationSuggestion, PerformanceTargets, ScalingBehavior, VerificationConfig,
539    //     VerificationEngine, VerificationResult,
540    // };
541
542    // Automatic benchmark generation - experimental (performance testing)
543    #[cfg(feature = "auto_benchmarks")]
544    pub use crate::auto_benchmark_generation::{
545        generate_benchmarks_for_type, AutoBenchmarkConfig, BenchmarkExecutor, BenchmarkGenerator,
546        BenchmarkResult, BenchmarkType, ComplexityClass, GeneratedBenchmark,
547        PerformanceEstimate as AutoBenchmarkPerformanceEstimate, RegressionDetector,
548        ScalingDimension,
549    };
550
551    // Advanced ensemble method improvements - now enabled and working
552    pub use crate::ensemble_improvements::{
553        AggregationMethod, BaseEstimator, BaseEstimatorConfig, BaseEstimatorType,
554        DistributedConfig, DistributedEnsemble, EnsembleConfig, EnsembleType,
555        LoadBalancingStrategy, NodeRole, ParallelConfig as EnsembleParallelConfig,
556        ParallelEnsemble as AdvancedParallelEnsemble, SamplingStrategy, TrainedBaseModel,
557        TrainedParallelEnsemble, TrainingState,
558    };
559}