Skip to main content

sklears_core/
lib.rs

1//! # sklears-core - Core Traits and Utilities
2//!
3//! This crate provides the foundational traits, types, and utilities that power
4//! the entire sklears machine learning ecosystem.
5//!
6//! ## Overview
7//!
8//! `sklears-core` defines the essential building blocks for machine learning in Rust:
9//!
10//! - **Core Traits**: `Estimator`, `Fit`, `Predict`, `Transform`, `Score`
11//! - **Type System**: Type-safe state machines (Untrained/Trained)
12//! - **Error Handling**: Comprehensive error types with context
13//! - **Validation**: Input validation and consistency checks
14//! - **Utilities**: Common helper functions and types
15//! - **Parallel Processing**: Abstractions for parallel algorithms
16//! - **Dataset Handling**: Data loading, splitting, and manipulation
17//!
18//! ## Core Traits
19//!
20//! ### Estimator
21//!
22//! The base trait for all machine learning models:
23//!
24//! ```rust,ignore
25//! pub trait Estimator {
26//!     type Config;
27//!     type Error;
28//! }
29//! ```
30//!
31//! ### Fit
32//!
33//! Training an estimator on data:
34//!
35//! ```rust,ignore
36//! pub trait Fit<X, Y> {
37//!     type Fitted;
38//!     fn fit(self, x: &X, y: &Y) -> Result<Self::Fitted, Self::Error>;
39//! }
40//! ```
41//!
42//! ### Predict
43//!
44//! Making predictions with a trained model:
45//!
46//! ```rust,ignore
47//! pub trait Predict<X, Y> {
48//!     fn predict(&self, x: &X) -> Result<Y, Self::Error>;
49//! }
50//! ```
51//!
52//! ### Transform
53//!
54//! Transforming data (for preprocessing and dimensionality reduction):
55//!
56//! ```rust,ignore
57//! pub trait Transform<X> {
58//!     fn transform(&self, x: &X) -> Result<X, Self::Error>;
59//! }
60//! ```
61//!
62//! ## Type-Safe State Machines
63//!
64//! Models use phantom types to track training state at compile time:
65//!
66//! ```rust,ignore
67//! pub struct Untrained;
68//! pub struct Trained;
69//!
70//! pub struct Model<State = Untrained> {
71//!     config: ModelConfig,
72//!     state: PhantomData<State>,
73//!     weights: Option<Weights>, // Only Some in Trained state
74//! }
75//! ```
76//!
77//! This ensures:
78//! - ✅ Can't predict with an untrained model (compile error)
79//! - ✅ Can't accidentally re-train a trained model
80//! - ✅ Type system enforces correct usage patterns
81//!
82//! ## Error Handling
83//!
84//! Comprehensive error types with rich context:
85//!
86//! ```rust,ignore
87//! pub enum SklearsError {
88//!     InvalidInput(String),
89//!     ShapeMismatch { expected: Shape, got: Shape },
90//!     NotFitted,
91//!     ConvergenceError { iterations: usize },
92//!     // ... and many more
93//! }
94//! ```
95//!
96//! ## Validation
97//!
98//! Input validation utilities ensure data consistency:
99//!
100//! ```rust,ignore
101//! use sklears_core::validation;
102//!
103//! // Check that X and y have compatible shapes
104//! validation::check_consistent_length(x, y)?;
105//!
106//! // Check for NaN/Inf values
107//! validation::check_array(x)?;
108//!
109//! // Validate classification targets
110//! validation::check_classification_targets(y)?;
111//! ```
112//!
113//! ## Parallel Processing
114//!
115//! Abstractions for parallel algorithm execution:
116//!
117//! ```rust,ignore
118//! use sklears_core::parallel::ParallelConfig;
119//! use rayon::prelude::*;
120//!
121//! let config = ParallelConfig::new().n_jobs(-1); // Use all cores
122//!
123//! data.par_iter()
124//!     .map(|sample| process(sample))
125//!     .collect()
126//! ```
127//!
128//! ## Feature Flags
129//!
130//! - `simd` - Enable SIMD optimizations
131//! - `gpu_support` - GPU acceleration support
132//! - `arrow` - Apache Arrow interoperability
133//! - `binary` - Binary serialization support
134//!
135//! ## Examples
136//!
137//! See individual module documentation for detailed examples.
138//!
139//! ## Known Limitations
140//!
141//! The following test modules are disabled due to ndarray HRTB (Higher-Ranked Trait Bound)
142//! lifetime constraints introduced in ndarray 0.17. Planned for re-enabling in v0.2.0:
143//! - `property_tests` - Property-based tests requiring trait bound simplification
144//! - `test_utilities` - Test utilities requiring trait bound simplification
145//!
146//! ## Integration
147//!
148//! This crate is re-exported by the main `sklears` crate, so you typically don't
149//! need to depend on it directly unless you're building custom estimators.
150
151pub mod dataset;
152pub mod distributed;
153pub mod distributed_algorithms;
154pub mod error;
155pub mod parallel;
156pub mod system_info;
157pub mod traits;
158pub mod types;
159pub mod utils;
160pub mod validation;
161pub mod validation_examples;
162
163#[cfg(feature = "simd")]
164pub mod simd;
165
166#[cfg(feature = "gpu_support")]
167pub mod gpu;
168
169#[cfg(feature = "arrow")]
170pub mod arrow;
171
172#[cfg(feature = "binary")]
173pub mod binary;
174
175pub mod advanced_array_ops;
176pub mod advanced_benchmarking;
177pub mod algorithm_markers;
178pub mod async_traits;
179pub mod auto_benchmark_generation;
180pub mod autodiff;
181pub mod benchmarking;
182pub mod compatibility;
183pub mod compile_time_macros;
184pub mod compile_time_validation;
185// TODO: Complex generic testing - needs blanket trait implementations
186// pub mod contract_testing;
187pub mod contribution;
188pub mod dependent_types;
189pub mod derive_macros;
190pub mod dsl_impl;
191pub mod effect_types;
192pub mod ensemble_improvements;
193pub mod exhaustive_error_handling;
194pub mod exotic_hardware;
195pub mod exotic_hardware_impls;
196pub mod fallback_strategies;
197pub mod features;
198pub mod formal_verification;
199pub mod format_io;
200pub mod formatting;
201pub mod memory_safety;
202pub mod mock_objects;
203pub mod performance_profiling;
204pub mod performance_reporting;
205pub mod plugin;
206pub mod plugin_marketplace_impl;
207pub mod refinement_types;
208pub mod streaming_lifetimes;
209pub mod unsafe_audit;
210
211// Export the procedural macros for DSL support
212pub mod macros;
213
214// Modularized API reference system (refactored from api_reference_generator.rs)
215pub mod api_analyzers;
216pub mod api_data_structures;
217pub mod api_formatters;
218pub mod api_generator_config;
219pub mod interactive_api_reference;
220pub mod interactive_playground;
221pub mod search_engines;
222pub mod tutorial_examples;
223pub mod tutorial_system;
224pub mod wasm_playground_impl;
225
226// Trait explorer tool for interactive API navigation
227pub mod trait_explorer;
228
229// Public/private API boundaries
230mod private;
231pub mod public;
232
233// Custom lints for ML-specific patterns
234#[cfg(feature = "custom_lints")]
235pub mod lints;
236
237// Dependency audit and optimization
238pub mod dependency_audit;
239
240// Code coverage reporting and enforcement
241pub mod code_coverage;
242
243// Input sanitization for untrusted data
244pub mod input_sanitization;
245
246// KNOWN ISSUE (v0.1.0): Module disabled due to ndarray HRTB lifetime constraints. Planned for v0.2.0.
247// #[allow(non_snake_case)]
248// #[cfg(test)]
249// pub mod property_tests;
250
251// KNOWN ISSUE (v0.1.0): Module disabled due to ndarray HRTB lifetime constraints. Planned for v0.2.0.
252// #[allow(non_snake_case)]
253// #[cfg(test)]
254// pub mod test_utilities;
255
256pub mod prelude {
257    /// Convenient re-exports of the most commonly used types and traits
258    ///
259    /// This prelude is organized by stability guarantees:
260    /// - Stable APIs are always available
261    /// - Experimental APIs require explicit opt-in
262    /// - Deprecated APIs emit warnings
263    // === Stable Public APIs (Always Available) ===
264    // Core traits - guaranteed stable
265    pub use crate::public::stable::{
266        Estimator, Fit, FitPredict, FitTransform, PartialFit, Predict, Transform,
267    };
268
269    // Core types - guaranteed stable
270    pub use crate::public::stable::{
271        Array1, Array2, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2, FeatureCount,
272        Features, Float, FloatBounds, Int, IntBounds, Labels, Numeric, Predictions, Probabilities,
273        Probability, SampleCount, Target,
274    };
275
276    // Error handling - guaranteed stable
277    pub use crate::public::stable::{ErrorChain, ErrorContext, Result, SklearsError};
278
279    // Validation - guaranteed stable
280    pub use crate::public::stable::{Validate, ValidationContext, ValidationRule};
281
282    // Dataset utilities - guaranteed stable
283    pub use crate::public::stable::{load_iris, make_blobs, make_regression, Dataset};
284
285    // === Experimental APIs (Require Opt-in) ===
286
287    #[cfg(feature = "experimental")]
288    pub use crate::public::experimental::*;
289
290    // === Additional Stable Exports ===
291
292    // Zero-copy utilities - stable
293    pub use crate::types::zero_copy::{
294        array_views, dataset_ops, ArrayPool, ZeroCopyArray, ZeroCopyDataset,
295    };
296    pub use crate::types::{
297        CowDataset, CowFeatures, CowLabels, CowPredictions, CowProbabilities, CowSampleWeight,
298        CowTarget, Distances, SampleWeight, Similarities, ZeroCopy, ZeroCopyFeatures,
299        ZeroCopyTarget,
300    };
301
302    // Validation utilities - stable
303    pub use crate::validation::{ml as validation_ml, ConfigValidation, ValidationRules};
304
305    // Compile-time validation - stable
306    pub use crate::compile_time_validation::{
307        CompileTimeValidated, DimensionValidator, LinearRegressionConfig,
308        LinearRegressionConfigBuilder, ParameterValidator, PositiveValidator, ProbabilityValidator,
309        RangeValidator, SolverCompatibility, ValidatedConfig,
310    };
311
312    // Memory-mapped datasets - stable when available
313    #[cfg(feature = "mmap")]
314    pub use crate::dataset::MmapDataset;
315
316    // Arrow integration - stable when available
317    #[cfg(feature = "arrow")]
318    pub use crate::arrow::{ArrowDataset, ColumnStats};
319
320    // Binary format support - stable when available
321    #[cfg(feature = "binary")]
322    pub use crate::binary::{
323        convenience, ArrayBinaryFormat, BinaryConfig, BinaryDeserialize, BinaryFileStorage,
324        BinaryFormat, BinaryMetadata, BinarySerialize, BinarySerializer, CompressionType,
325        StreamingBinaryReader, StreamingBinaryWriter,
326    };
327
328    // SIMD operations - experimental, requires feature flag
329    #[cfg(feature = "simd")]
330    pub use crate::simd::{SimdArrayOps, SimdOps};
331
332    // GPU acceleration - experimental, requires feature flag and CUDA
333    #[cfg(feature = "gpu_support")]
334    pub use crate::gpu::{
335        GpuArray, GpuContext, GpuDeviceProperties, GpuMatrixOps, GpuMemoryInfo, GpuUtils,
336        MemoryTransferOpts, TransferStrategy,
337    };
338
339    // Parallel processing - stable
340    pub use crate::parallel::{
341        ParallelConfig, ParallelCrossValidation, ParallelCrossValidator, ParallelEnsemble,
342        ParallelEnsembleOps, ParallelFit, ParallelMatrixOps, ParallelPredict, ParallelTransform,
343    };
344
345    // Async traits - experimental
346    #[cfg(feature = "async_support")]
347    pub use crate::async_traits::{
348        AsyncConfig, AsyncCrossValidation, AsyncEnsemble, AsyncFitAdvanced,
349        AsyncHyperparameterOptimization, AsyncModelPersistence, AsyncPartialFit,
350        AsyncPredictAdvanced, AsyncTransformAdvanced, CancellationToken, ConfidenceInterval,
351        ProgressInfo,
352    };
353
354    // Plugin system - experimental
355    #[cfg(feature = "plugins")]
356    pub use crate::plugin::{
357        AlgorithmPlugin, ClusteringPlugin, LogLevel, Plugin, PluginCapability, PluginCategory,
358        PluginConfig, PluginConfigBuilder, PluginFactory, PluginLoader, PluginMetadata,
359        PluginParameter, PluginRegistry, RuntimeSettings, TransformerPlugin,
360    };
361
362    // API stability utilities
363    pub use crate::public::{
364        api_version_info, is_api_experimental, is_api_stable, ApiStability, ApiVersionInfo,
365        ExperimentalApi, PublicApiConfig, PublicApiConfigBuilder, StableApi,
366    };
367
368    // Custom lints for ML-specific patterns
369    #[cfg(feature = "custom_lints")]
370    pub use crate::lints::{
371        ApiUsageLint, ArrayPerformanceLint, DataValidationLint, LintCategory, LintConfig,
372        LintRegistry, LintRule, LintSeverity, MemoryLeakLint, ModelValidationLint,
373        NumericalStabilityLint,
374    };
375
376    // Dependency audit and optimization
377    pub use crate::dependency_audit::{
378        calculate_metrics, generate_dependency_graph, BinarySizeImpact, CompileTimeImpact,
379        DependencyAudit, DependencyCategory, DependencyInfo, DependencyRecommendation,
380        DependencyReport, RecommendationAction,
381    };
382
383    // Code coverage reporting and enforcement
384    pub use crate::code_coverage::{
385        CICoverageResult, CIDConfig, CoverageCI, CoverageCollector, CoverageConfig, CoverageReport,
386        CoverageTool, QualityGatesResult, RecommendationPriority,
387    };
388
389    // Input sanitization for untrusted data
390    pub use crate::input_sanitization::{
391        is_ml_data_safe, sanitize_ml_data, InputSanitizer, SafetyIssue, SanitizationConfig,
392        Sanitize,
393    };
394
395    // System memory statistics — real OS values, never fabricated
396    pub use crate::system_info::{process_rss_bytes, system_memory, SystemMemory};
397
398    // Advanced array operations for high-performance computing
399    pub use crate::advanced_array_ops::{ArrayStats, MatrixOps, MemoryOps};
400
401    // Re-export the error_context macro
402    pub use crate::error_context;
403
404    // Code quality and safety tools - stable
405    pub use crate::formatting::{
406        CodeFormatter, FormattingConfig, FormattingConfigBuilder, FormattingIssue,
407        FormattingReport, IssueSeverity, MLFormattingRules,
408    };
409
410    pub use crate::unsafe_audit::{
411        SafetyRecommendation, SafetySeverity, UnsafeAuditConfig, UnsafeAuditReport, UnsafeAuditor,
412        UnsafeFinding, UnsafePattern, UnsafeType,
413    };
414
415    // Memory safety guarantees and utilities - stable
416    pub use crate::memory_safety::{
417        MemoryPoolStats, MemorySafety, MemorySafetyGuarantee, SafeArrayOps, SafeMemoryPool,
418        SafePooledBuffer, SafePtr, SafeSharedModel, UnsafeValidationResult,
419    };
420
421    // Benchmarking utilities - stable
422    pub use crate::benchmarking::{
423        AccuracyComparison, AlgorithmBenchmark, AlgorithmType, AutomatedBenchmarkRunner,
424        BenchmarkConfig, BenchmarkDataset, BenchmarkResults, BenchmarkRunResult, BenchmarkSuite,
425        MemoryStatistics, TimingStatistics,
426    };
427
428    // Mock objects for testing - now enabled and working
429    pub use crate::mock_objects::{
430        MockBehavior, MockConfig, MockEnsemble, MockErrorType, MockEstimator, MockEstimatorBuilder,
431        MockStateSnapshot, MockTransformConfig, MockTransformType, MockTransformer,
432        MockTransformerBuilder, TrainedMockEstimator, VotingStrategy,
433    };
434
435    // Contract testing framework - temporarily disabled until ndarray 0.17 migration is complete
436    // pub use crate::contract_testing::{
437    //     ContractTestConfig, ContractTestResult, ContractTestSummary, ContractTester,
438    //     PropertyTestStats, TestCase, TraitLaws,
439    // };
440
441    // Compatibility layers for popular ML libraries - stable
442    pub use crate::compatibility::{
443        numpy::NumpyArray,
444        pandas::{DataFrame, DataValue},
445        pytorch::{ndarray_to_pytorch_tensor, TensorMetadata},
446        serialization::{CrossPlatformModel, ModelFormat, ModelSerialization},
447        sklearn::{FittedScikitLearnModel, ParamValue, ScikitLearnModel, SklearnCompatible},
448    };
449
450    // Standard format readers and writers - stable
451    pub use crate::format_io::{
452        CsvOptions, DataFormat, FormatDetector, FormatOptions, FormatReader, FormatWriter,
453        Hdf5Options, JsonOptions, NumpyOptions, ParquetOptions, StreamingReader,
454    };
455
456    // Contribution guidelines and review process - stable
457    pub use crate::contribution::{
458        AlgorithmicCriteria, ClippyLevel, CodeQualityCriteria, ContributionChecker,
459        ContributionConfig, ContributionResult, ContributionWorkflow, DocumentationCriteria,
460        GateResult, PerformanceCriteria, QualityGate, QualityGateType, ReviewCriteria,
461        TestingCriteria, WorkflowStep,
462    };
463
464    // Automated performance reporting system - stable
465    pub use crate::performance_reporting::{
466        AlertConfig, AnalysisResult, AnalysisType, HealthStatus, OutputFormat, PerformanceAnalyzer,
467        PerformanceReport, PerformanceReporter, RegressionThreshold, ReportConfig, TimeRange,
468        TrendDirection,
469    };
470
471    // Modularized API reference system - stable
472    pub use crate::api_analyzers::{
473        CrossReferenceBuilder as ModularCrossReferenceBuilder, ExampleValidator,
474        TraitAnalyzer as ModularTraitAnalyzer, TypeExtractor as ModularTypeExtractor,
475    };
476    pub use crate::api_data_structures::{
477        ApiReference as ModularApiReference, CodeExample as ModularCodeExample,
478        TraitInfo as ModularTraitInfo, TypeInfo as ModularTypeInfo,
479    };
480    pub use crate::api_formatters::{
481        ApiReferenceGenerator as ModularApiReferenceGenerator, DocumentFormatter,
482    };
483    pub use crate::api_generator_config::{
484        GeneratorConfig as ModularGeneratorConfig, OutputFormat as ModularOutputFormat,
485        ValidationConfig,
486    };
487    pub use crate::interactive_playground::{
488        LiveCodeRunner, UIComponentBuilder, WasmPlaygroundManager,
489    };
490    pub use crate::search_engines::{
491        AutocompleteTrie, SearchQuery, SearchResult, SemanticSearchEngine,
492    };
493    pub use crate::tutorial_system::{
494        LearningPath, ProgressTracker, Tutorial, TutorialBuilder, TutorialSystem,
495    };
496
497    // Trait explorer tool for interactive API navigation - stable
498    pub use crate::trait_explorer::{
499        CompilationImpact, DependencyAnalysis, DependencyAnalyzer, EdgeType, ExampleCategory,
500        ExampleDifficulty, ExampleGenerator, ExplorerConfig, GraphExportFormat, MemoryFootprint,
501        PerformanceAnalysis, RuntimeOverhead, SimilarTrait, TraitExplorationResult, TraitExplorer,
502        TraitGraph, TraitGraphEdge, TraitGraphGenerator, TraitGraphMetadata, TraitGraphNode,
503        TraitNodeType, TraitPerformanceAnalyzer, TraitRegistry, UsageExample,
504    };
505
506    // Exotic hardware support - experimental (TPU, FPGA, Quantum)
507    #[cfg(feature = "exotic_hardware")]
508    pub use crate::exotic_hardware::{
509        ActivationType, ComputationGraph, ComputationMetadata, ComputationNode, ComputationResult,
510        ExoticHardware, ExoticHardwareManager, FpgaDevice, FpgaVendor, HardwareCapabilities,
511        HardwareCompiler, HardwareComputation, HardwareId, HardwareMemoryManager, HardwareStatus,
512        HardwareType, MemoryHandle, MemoryStats, Operation, PerformanceEstimate, Precision,
513        QuantumBackend, QuantumDevice, TensorSpec, TpuDevice, TpuVersion, ValidationReport,
514    };
515
516    // Effect type system - experimental (compile-time effect tracking)
517    #[cfg(feature = "effect_types")]
518    pub use crate::effect_types::{
519        AsyncEffect, Capability, Combined, Effect, EffectAnalyzer, EffectBuilder, EffectMetadata,
520        EffectType, Fallible, FallibleIOEffect, GPUMemoryEffect, IORandomEffect, Linear, Memory,
521        MemoryIOEffect, Pure, Random, GPU, IO,
522    };
523
524    // Automatic differentiation - experimental (forward/reverse mode AD)
525    #[cfg(feature = "autodiff")]
526    pub use crate::autodiff::{
527        ADMode, AutodiffConfig, ComputationNode as ADNode, Dual, SymbolicExpression, Variable,
528        VariableId,
529    };
530
531    // Distributed computing support - experimental (cluster-aware ML) - TEMPORARILY DISABLED
532    // #[cfg(feature = "distributed")]
533    // pub use crate::distributed::{
534    //     ClusterInfo, ClusterNode, DistributedCluster, DistributedDataset, DistributedEstimator,
535    //     DistributedMessage, DistributedMetrics, DistributedOptimizer, DistributedTraining,
536    //     FaultTolerance, GradientAggregation, MessagePassing, NodeId, ParameterServer,
537    // };
538
539    // Compile-time macros and verification - experimental (model verification) - TEMPORARILY DISABLED
540    // #[cfg(feature = "compile_time_macros")]
541    // pub use crate::compile_time_macros::{
542    //     validate_performance, verify_dimensions, verify_model, BenchmarkConfig as CompileTimeBenchmarkConfig,
543    //     CompileTimeVerifiable, ComplexityAnalysis, DimensionVerifiable, MathematicallyVerifiable,
544    //     OptimizationSuggestion, PerformanceTargets, ScalingBehavior, VerificationConfig,
545    //     VerificationEngine, VerificationResult,
546    // };
547
548    // Automatic benchmark generation - experimental (performance testing)
549    #[cfg(feature = "auto_benchmarks")]
550    pub use crate::auto_benchmark_generation::{
551        generate_benchmarks_for_type, AutoBenchmarkConfig, BenchmarkExecutor, BenchmarkGenerator,
552        BenchmarkResult, BenchmarkType, ComplexityClass, GeneratedBenchmark,
553        PerformanceEstimate as AutoBenchmarkPerformanceEstimate, RegressionDetector,
554        ScalingDimension,
555    };
556
557    // Advanced ensemble method improvements - now enabled and working
558    pub use crate::ensemble_improvements::{
559        AggregationMethod, BaseEstimator, BaseEstimatorConfig, BaseEstimatorType,
560        DistributedConfig, DistributedEnsemble, EnsembleConfig, EnsembleType,
561        LoadBalancingStrategy, NodeRole, ParallelConfig as EnsembleParallelConfig,
562        ParallelEnsemble as AdvancedParallelEnsemble, SamplingStrategy, TrainedBaseModel,
563        TrainedParallelEnsemble, TrainingState,
564    };
565}