Skip to main content

sklears_core/
lib.rs

1#![allow(dead_code)]
2#![allow(non_snake_case)]
3#![allow(missing_docs)]
4#![allow(deprecated)]
5//! # sklears-core - Core Traits and Utilities
6//!
7//! This crate provides the foundational traits, types, and utilities that power
8//! the entire sklears machine learning ecosystem.
9//!
10//! ## Overview
11//!
12//! `sklears-core` defines the essential building blocks for machine learning in Rust:
13//!
14//! - **Core Traits**: `Estimator`, `Fit`, `Predict`, `Transform`, `Score`
15//! - **Type System**: Type-safe state machines (Untrained/Trained)
16//! - **Error Handling**: Comprehensive error types with context
17//! - **Validation**: Input validation and consistency checks
18//! - **Utilities**: Common helper functions and types
19//! - **Parallel Processing**: Abstractions for parallel algorithms
20//! - **Dataset Handling**: Data loading, splitting, and manipulation
21//!
22//! ## Core Traits
23//!
24//! ### Estimator
25//!
26//! The base trait for all machine learning models:
27//!
28//! ```rust,ignore
29//! pub trait Estimator {
30//!     type Config;
31//!     type Error;
32//! }
33//! ```
34//!
35//! ### Fit
36//!
37//! Training an estimator on data:
38//!
39//! ```rust,ignore
40//! pub trait Fit<X, Y> {
41//!     type Fitted;
42//!     fn fit(self, x: &X, y: &Y) -> Result<Self::Fitted, Self::Error>;
43//! }
44//! ```
45//!
46//! ### Predict
47//!
48//! Making predictions with a trained model:
49//!
50//! ```rust,ignore
51//! pub trait Predict<X, Y> {
52//!     fn predict(&self, x: &X) -> Result<Y, Self::Error>;
53//! }
54//! ```
55//!
56//! ### Transform
57//!
58//! Transforming data (for preprocessing and dimensionality reduction):
59//!
60//! ```rust,ignore
61//! pub trait Transform<X> {
62//!     fn transform(&self, x: &X) -> Result<X, Self::Error>;
63//! }
64//! ```
65//!
66//! ## Type-Safe State Machines
67//!
68//! Models use phantom types to track training state at compile time:
69//!
70//! ```rust,ignore
71//! pub struct Untrained;
72//! pub struct Trained;
73//!
74//! pub struct Model<State = Untrained> {
75//!     config: ModelConfig,
76//!     state: PhantomData<State>,
77//!     weights: Option<Weights>, // Only Some in Trained state
78//! }
79//! ```
80//!
81//! This ensures:
82//! - ✅ Can't predict with an untrained model (compile error)
83//! - ✅ Can't accidentally re-train a trained model
84//! - ✅ Type system enforces correct usage patterns
85//!
86//! ## Error Handling
87//!
88//! Comprehensive error types with rich context:
89//!
90//! ```rust,ignore
91//! pub enum SklearsError {
92//!     InvalidInput(String),
93//!     ShapeMismatch { expected: Shape, got: Shape },
94//!     NotFitted,
95//!     ConvergenceError { iterations: usize },
96//!     // ... and many more
97//! }
98//! ```
99//!
100//! ## Validation
101//!
102//! Input validation utilities ensure data consistency:
103//!
104//! ```rust,ignore
105//! use sklears_core::validation;
106//!
107//! // Check that X and y have compatible shapes
108//! validation::check_consistent_length(x, y)?;
109//!
110//! // Check for NaN/Inf values
111//! validation::check_array(x)?;
112//!
113//! // Validate classification targets
114//! validation::check_classification_targets(y)?;
115//! ```
116//!
117//! ## Parallel Processing
118//!
119//! Abstractions for parallel algorithm execution:
120//!
121//! ```rust,ignore
122//! use sklears_core::parallel::ParallelConfig;
123//! use rayon::prelude::*;
124//!
125//! let config = ParallelConfig::new().n_jobs(-1); // Use all cores
126//!
127//! data.par_iter()
128//!     .map(|sample| process(sample))
129//!     .collect()
130//! ```
131//!
132//! ## Feature Flags
133//!
134//! - `simd` - Enable SIMD optimizations
135//! - `gpu_support` - GPU acceleration support
136//! - `arrow` - Apache Arrow interoperability
137//! - `binary` - Binary serialization support
138//!
139//! ## Examples
140//!
141//! See individual module documentation for detailed examples.
142//!
143//! ## Known Limitations
144//!
145//! The following test modules are disabled due to ndarray HRTB (Higher-Ranked Trait Bound)
146//! lifetime constraints introduced in ndarray 0.17. Planned for re-enabling in v0.2.0:
147//! - `property_tests` - Property-based tests requiring trait bound simplification
148//! - `test_utilities` - Test utilities requiring trait bound simplification
149//!
150//! ## Integration
151//!
152//! This crate is re-exported by the main `sklears` crate, so you typically don't
153//! need to depend on it directly unless you're building custom estimators.
154
155pub mod dataset;
156pub mod distributed;
157pub mod distributed_algorithms;
158pub mod error;
159pub mod parallel;
160pub mod traits;
161pub mod types;
162pub mod utils;
163pub mod validation;
164pub mod validation_examples;
165
166#[cfg(feature = "simd")]
167pub mod simd;
168
169#[cfg(feature = "gpu_support")]
170pub mod gpu;
171
172#[cfg(feature = "arrow")]
173pub mod arrow;
174
175#[cfg(feature = "binary")]
176pub mod binary;
177
178pub mod advanced_array_ops;
179pub mod advanced_benchmarking;
180pub mod algorithm_markers;
181pub mod async_traits;
182pub mod auto_benchmark_generation;
183pub mod autodiff;
184pub mod benchmarking;
185pub mod compatibility;
186pub mod compile_time_macros;
187pub mod compile_time_validation;
188// TODO: Complex generic testing - needs blanket trait implementations
189// pub mod contract_testing;
190pub mod contribution;
191pub mod dependent_types;
192pub mod derive_macros;
193pub mod dsl_impl;
194pub mod effect_types;
195pub mod ensemble_improvements;
196pub mod exhaustive_error_handling;
197pub mod exotic_hardware;
198pub mod exotic_hardware_impls;
199pub mod fallback_strategies;
200pub mod features;
201pub mod formal_verification;
202pub mod format_io;
203pub mod formatting;
204pub mod memory_safety;
205pub mod mock_objects;
206pub mod performance_profiling;
207pub mod performance_reporting;
208pub mod plugin;
209pub mod plugin_marketplace_impl;
210pub mod refinement_types;
211pub mod streaming_lifetimes;
212pub mod unsafe_audit;
213
214// Export the procedural macros for DSL support
215pub mod macros;
216
217// Modularized API reference system (refactored from api_reference_generator.rs)
218pub mod api_analyzers;
219pub mod api_data_structures;
220pub mod api_formatters;
221pub mod api_generator_config;
222pub mod interactive_api_reference;
223pub mod interactive_playground;
224pub mod search_engines;
225pub mod tutorial_examples;
226pub mod tutorial_system;
227pub mod wasm_playground_impl;
228
229// Trait explorer tool for interactive API navigation
230pub mod trait_explorer;
231
232// Public/private API boundaries
233mod private;
234pub mod public;
235
236// Custom lints for ML-specific patterns
237#[cfg(feature = "custom_lints")]
238pub mod lints;
239
240// Dependency audit and optimization
241pub mod dependency_audit;
242
243// Code coverage reporting and enforcement
244pub mod code_coverage;
245
246// Input sanitization for untrusted data
247pub mod input_sanitization;
248
249// KNOWN ISSUE (v0.1.0): Module disabled due to ndarray HRTB lifetime constraints. Planned for v0.2.0.
250// #[allow(non_snake_case)]
251// #[cfg(test)]
252// pub mod property_tests;
253
254// KNOWN ISSUE (v0.1.0): Module disabled due to ndarray HRTB lifetime constraints. Planned for v0.2.0.
255// #[allow(non_snake_case)]
256// #[cfg(test)]
257// pub mod test_utilities;
258
259pub mod prelude {
260    /// Convenient re-exports of the most commonly used types and traits
261    ///
262    /// This prelude is organized by stability guarantees:
263    /// - Stable APIs are always available
264    /// - Experimental APIs require explicit opt-in
265    /// - Deprecated APIs emit warnings
266    // === Stable Public APIs (Always Available) ===
267    // Core traits - guaranteed stable
268    pub use crate::public::stable::{
269        Estimator, Fit, FitPredict, FitTransform, PartialFit, Predict, Transform,
270    };
271
272    // Core types - guaranteed stable
273    pub use crate::public::stable::{
274        Array1, Array2, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2, FeatureCount,
275        Features, Float, FloatBounds, Int, IntBounds, Labels, Numeric, Predictions, Probabilities,
276        Probability, SampleCount, Target,
277    };
278
279    // Error handling - guaranteed stable
280    pub use crate::public::stable::{ErrorChain, ErrorContext, Result, SklearsError};
281
282    // Validation - guaranteed stable
283    pub use crate::public::stable::{Validate, ValidationContext, ValidationRule};
284
285    // Dataset utilities - guaranteed stable
286    pub use crate::public::stable::{load_iris, make_blobs, make_regression, Dataset};
287
288    // === Experimental APIs (Require Opt-in) ===
289
290    #[cfg(feature = "experimental")]
291    pub use crate::public::experimental::*;
292
293    // === Additional Stable Exports ===
294
295    // Zero-copy utilities - stable
296    pub use crate::types::zero_copy::{
297        array_views, dataset_ops, ArrayPool, ZeroCopyArray, ZeroCopyDataset,
298    };
299    pub use crate::types::{
300        CowDataset, CowFeatures, CowLabels, CowPredictions, CowProbabilities, CowSampleWeight,
301        CowTarget, Distances, SampleWeight, Similarities, ZeroCopy, ZeroCopyFeatures,
302        ZeroCopyTarget,
303    };
304
305    // Validation utilities - stable
306    pub use crate::validation::{ml as validation_ml, ConfigValidation, ValidationRules};
307
308    // Compile-time validation - stable
309    pub use crate::compile_time_validation::{
310        CompileTimeValidated, DimensionValidator, LinearRegressionConfig,
311        LinearRegressionConfigBuilder, ParameterValidator, PositiveValidator, ProbabilityValidator,
312        RangeValidator, SolverCompatibility, ValidatedConfig,
313    };
314
315    // Memory-mapped datasets - stable when available
316    #[cfg(feature = "mmap")]
317    pub use crate::dataset::MmapDataset;
318
319    // Arrow integration - stable when available
320    #[cfg(feature = "arrow")]
321    pub use crate::arrow::{ArrowDataset, ColumnStats};
322
323    // Binary format support - stable when available
324    #[cfg(feature = "binary")]
325    pub use crate::binary::{
326        convenience, ArrayBinaryFormat, BinaryConfig, BinaryDeserialize, BinaryFileStorage,
327        BinaryFormat, BinaryMetadata, BinarySerialize, BinarySerializer, CompressionType,
328        StreamingBinaryReader, StreamingBinaryWriter,
329    };
330
331    // SIMD operations - experimental, requires feature flag
332    #[cfg(feature = "simd")]
333    pub use crate::simd::{SimdArrayOps, SimdOps};
334
335    // GPU acceleration - experimental, requires feature flag and CUDA
336    #[cfg(feature = "gpu_support")]
337    pub use crate::gpu::{
338        GpuArray, GpuContext, GpuDeviceProperties, GpuMatrixOps, GpuMemoryInfo, GpuUtils,
339        MemoryTransferOpts, TransferStrategy,
340    };
341
342    // Parallel processing - stable
343    pub use crate::parallel::{
344        ParallelConfig, ParallelCrossValidation, ParallelCrossValidator, ParallelEnsemble,
345        ParallelEnsembleOps, ParallelFit, ParallelMatrixOps, ParallelPredict, ParallelTransform,
346    };
347
348    // Async traits - experimental
349    #[cfg(feature = "async_support")]
350    pub use crate::async_traits::{
351        AsyncConfig, AsyncCrossValidation, AsyncEnsemble, AsyncFitAdvanced,
352        AsyncHyperparameterOptimization, AsyncModelPersistence, AsyncPartialFit,
353        AsyncPredictAdvanced, AsyncTransformAdvanced, CancellationToken, ConfidenceInterval,
354        ProgressInfo,
355    };
356
357    // Plugin system - experimental
358    #[cfg(feature = "plugins")]
359    pub use crate::plugin::{
360        AlgorithmPlugin, ClusteringPlugin, LogLevel, Plugin, PluginCapability, PluginCategory,
361        PluginConfig, PluginConfigBuilder, PluginFactory, PluginLoader, PluginMetadata,
362        PluginParameter, PluginRegistry, RuntimeSettings, TransformerPlugin,
363    };
364
365    // API stability utilities
366    pub use crate::public::{
367        api_version_info, is_api_experimental, is_api_stable, ApiStability, ApiVersionInfo,
368        ExperimentalApi, PublicApiConfig, PublicApiConfigBuilder, StableApi,
369    };
370
371    // Custom lints for ML-specific patterns
372    #[cfg(feature = "custom_lints")]
373    pub use crate::lints::{
374        ApiUsageLint, ArrayPerformanceLint, DataValidationLint, LintCategory, LintConfig,
375        LintRegistry, LintRule, LintSeverity, MemoryLeakLint, ModelValidationLint,
376        NumericalStabilityLint,
377    };
378
379    // Dependency audit and optimization
380    pub use crate::dependency_audit::{
381        calculate_metrics, generate_dependency_graph, BinarySizeImpact, CompileTimeImpact,
382        DependencyAudit, DependencyCategory, DependencyInfo, DependencyRecommendation,
383        DependencyReport, RecommendationAction,
384    };
385
386    // Code coverage reporting and enforcement
387    pub use crate::code_coverage::{
388        CICoverageResult, CIDConfig, CoverageCI, CoverageCollector, CoverageConfig, CoverageReport,
389        CoverageTool, QualityGatesResult, RecommendationPriority,
390    };
391
392    // Input sanitization for untrusted data
393    pub use crate::input_sanitization::{
394        is_ml_data_safe, sanitize_ml_data, InputSanitizer, SafetyIssue, SanitizationConfig,
395        Sanitize,
396    };
397
398    // Advanced array operations for high-performance computing
399    pub use crate::advanced_array_ops::{ArrayStats, MatrixOps, MemoryOps};
400
401    // Re-export the error_context macro
402    pub use crate::error_context;
403
404    // Code quality and safety tools - stable
405    pub use crate::formatting::{
406        CodeFormatter, FormattingConfig, FormattingConfigBuilder, FormattingIssue,
407        FormattingReport, IssueSeverity, MLFormattingRules,
408    };
409
410    pub use crate::unsafe_audit::{
411        SafetyRecommendation, SafetySeverity, UnsafeAuditConfig, UnsafeAuditReport, UnsafeAuditor,
412        UnsafeFinding, UnsafePattern, UnsafeType,
413    };
414
415    // Memory safety guarantees and utilities - stable
416    pub use crate::memory_safety::{
417        MemoryPoolStats, MemorySafety, MemorySafetyGuarantee, SafeArrayOps, SafeMemoryPool,
418        SafePooledBuffer, SafePtr, SafeSharedModel, UnsafeValidationResult,
419    };
420
421    // Benchmarking utilities - stable
422    pub use crate::benchmarking::{
423        AccuracyComparison, AlgorithmBenchmark, AlgorithmType, AutomatedBenchmarkRunner,
424        BenchmarkConfig, BenchmarkDataset, BenchmarkResults, BenchmarkRunResult, BenchmarkSuite,
425        MemoryStatistics, TimingStatistics,
426    };
427
428    // Mock objects for testing - now enabled and working
429    pub use crate::mock_objects::{
430        MockBehavior, MockConfig, MockEnsemble, MockErrorType, MockEstimator, MockEstimatorBuilder,
431        MockStateSnapshot, MockTransformConfig, MockTransformType, MockTransformer,
432        MockTransformerBuilder, TrainedMockEstimator, VotingStrategy,
433    };
434
435    // Contract testing framework - temporarily disabled until ndarray 0.17 migration is complete
436    // pub use crate::contract_testing::{
437    //     ContractTestConfig, ContractTestResult, ContractTestSummary, ContractTester,
438    //     PropertyTestStats, TestCase, TraitLaws,
439    // };
440
441    // Compatibility layers for popular ML libraries - stable
442    pub use crate::compatibility::{
443        numpy::NumpyArray,
444        pandas::{DataFrame, DataValue},
445        pytorch::{ndarray_to_pytorch_tensor, TensorMetadata},
446        serialization::{CrossPlatformModel, ModelFormat, ModelSerialization},
447        sklearn::{FittedScikitLearnModel, ParamValue, ScikitLearnModel, SklearnCompatible},
448    };
449
450    // Standard format readers and writers - stable
451    pub use crate::format_io::{
452        CsvOptions, DataFormat, FormatDetector, FormatOptions, FormatReader, FormatWriter,
453        Hdf5Options, JsonOptions, NumpyOptions, ParquetOptions, StreamingReader,
454    };
455
456    // Contribution guidelines and review process - stable
457    pub use crate::contribution::{
458        AlgorithmicCriteria, ClippyLevel, CodeQualityCriteria, ContributionChecker,
459        ContributionConfig, ContributionResult, ContributionWorkflow, DocumentationCriteria,
460        GateResult, PerformanceCriteria, QualityGate, QualityGateType, ReviewCriteria,
461        TestingCriteria, WorkflowStep,
462    };
463
464    // Automated performance reporting system - stable
465    pub use crate::performance_reporting::{
466        AlertConfig, AnalysisResult, AnalysisType, HealthStatus, OutputFormat, PerformanceAnalyzer,
467        PerformanceReport, PerformanceReporter, RegressionThreshold, ReportConfig, TimeRange,
468        TrendDirection,
469    };
470
471    // Modularized API reference system - stable
472    pub use crate::api_analyzers::{
473        CrossReferenceBuilder as ModularCrossReferenceBuilder, ExampleValidator,
474        TraitAnalyzer as ModularTraitAnalyzer, TypeExtractor as ModularTypeExtractor,
475    };
476    pub use crate::api_data_structures::{
477        ApiReference as ModularApiReference, CodeExample as ModularCodeExample,
478        TraitInfo as ModularTraitInfo, TypeInfo as ModularTypeInfo,
479    };
480    pub use crate::api_formatters::{
481        ApiReferenceGenerator as ModularApiReferenceGenerator, DocumentFormatter,
482    };
483    pub use crate::api_generator_config::{
484        GeneratorConfig as ModularGeneratorConfig, OutputFormat as ModularOutputFormat,
485        ValidationConfig,
486    };
487    pub use crate::interactive_playground::{
488        LiveCodeRunner, UIComponentBuilder, WasmPlaygroundManager,
489    };
490    pub use crate::search_engines::{
491        AutocompleteTrie, SearchQuery, SearchResult, SemanticSearchEngine,
492    };
493    pub use crate::tutorial_system::{
494        LearningPath, ProgressTracker, Tutorial, TutorialBuilder, TutorialSystem,
495    };
496
497    // Trait explorer tool for interactive API navigation - stable
498    pub use crate::trait_explorer::{
499        CompilationImpact, DependencyAnalysis, DependencyAnalyzer, EdgeType, ExampleCategory,
500        ExampleDifficulty, ExampleGenerator, ExplorerConfig, GraphExportFormat, MemoryFootprint,
501        PerformanceAnalysis, RuntimeOverhead, SimilarTrait, TraitExplorationResult, TraitExplorer,
502        TraitGraph, TraitGraphEdge, TraitGraphGenerator, TraitGraphMetadata, TraitGraphNode,
503        TraitNodeType, TraitPerformanceAnalyzer, TraitRegistry, UsageExample,
504    };
505
506    // Exotic hardware support - experimental (TPU, FPGA, Quantum)
507    #[cfg(feature = "exotic_hardware")]
508    pub use crate::exotic_hardware::{
509        ActivationType, ComputationGraph, ComputationMetadata, ComputationNode, ComputationResult,
510        ExoticHardware, ExoticHardwareManager, FpgaDevice, FpgaVendor, HardwareCapabilities,
511        HardwareCompiler, HardwareComputation, HardwareId, HardwareMemoryManager, HardwareStatus,
512        HardwareType, MemoryHandle, MemoryStats, Operation, PerformanceEstimate, Precision,
513        QuantumBackend, QuantumDevice, TensorSpec, TpuDevice, TpuVersion, ValidationReport,
514    };
515
516    // Effect type system - experimental (compile-time effect tracking)
517    #[cfg(feature = "effect_types")]
518    pub use crate::effect_types::{
519        AsyncEffect, Capability, Combined, Effect, EffectAnalyzer, EffectBuilder, EffectMetadata,
520        EffectType, Fallible, FallibleIOEffect, GPUMemoryEffect, IORandomEffect, Linear, Memory,
521        MemoryIOEffect, Pure, Random, GPU, IO,
522    };
523
524    // Automatic differentiation - experimental (forward/reverse mode AD)
525    #[cfg(feature = "autodiff")]
526    pub use crate::autodiff::{
527        ADMode, AutodiffConfig, ComputationNode as ADNode, Dual, SymbolicExpression, Variable,
528        VariableId,
529    };
530
531    // Distributed computing support - experimental (cluster-aware ML) - TEMPORARILY DISABLED
532    // #[cfg(feature = "distributed")]
533    // pub use crate::distributed::{
534    //     ClusterInfo, ClusterNode, DistributedCluster, DistributedDataset, DistributedEstimator,
535    //     DistributedMessage, DistributedMetrics, DistributedOptimizer, DistributedTraining,
536    //     FaultTolerance, GradientAggregation, MessagePassing, NodeId, ParameterServer,
537    // };
538
539    // Compile-time macros and verification - experimental (model verification) - TEMPORARILY DISABLED
540    // #[cfg(feature = "compile_time_macros")]
541    // pub use crate::compile_time_macros::{
542    //     validate_performance, verify_dimensions, verify_model, BenchmarkConfig as CompileTimeBenchmarkConfig,
543    //     CompileTimeVerifiable, ComplexityAnalysis, DimensionVerifiable, MathematicallyVerifiable,
544    //     OptimizationSuggestion, PerformanceTargets, ScalingBehavior, VerificationConfig,
545    //     VerificationEngine, VerificationResult,
546    // };
547
548    // Automatic benchmark generation - experimental (performance testing)
549    #[cfg(feature = "auto_benchmarks")]
550    pub use crate::auto_benchmark_generation::{
551        generate_benchmarks_for_type, AutoBenchmarkConfig, BenchmarkExecutor, BenchmarkGenerator,
552        BenchmarkResult, BenchmarkType, ComplexityClass, GeneratedBenchmark,
553        PerformanceEstimate as AutoBenchmarkPerformanceEstimate, RegressionDetector,
554        ScalingDimension,
555    };
556
557    // Advanced ensemble method improvements - now enabled and working
558    pub use crate::ensemble_improvements::{
559        AggregationMethod, BaseEstimator, BaseEstimatorConfig, BaseEstimatorType,
560        DistributedConfig, DistributedEnsemble, EnsembleConfig, EnsembleType,
561        LoadBalancingStrategy, NodeRole, ParallelConfig as EnsembleParallelConfig,
562        ParallelEnsemble as AdvancedParallelEnsemble, SamplingStrategy, TrainedBaseModel,
563        TrainedParallelEnsemble, TrainingState,
564    };
565}