Expand description
Utility functions and helpers for sklears
This crate provides common utilities used across the sklears ecosystem, including data validation, array manipulation, random number generation, and dataset creation utilities.
§Examples
use sklears_utils::validation::check_consistent_length;
use sklears_utils::random::set_random_state;
use scirs2_core::ndarray::array;
// Validation
let x = array![1, 2, 3];
let y = array![4, 5, 6];
assert!(check_consistent_length(&[&x, &y]).is_ok());
// Random state
set_random_state(42);Re-exports§
pub use api_integration::ApiClient;pub use api_integration::ApiConfig;pub use api_integration::ApiError;pub use api_integration::ApiMetrics;pub use api_integration::ApiRequest;pub use api_integration::ApiResponse;pub use api_integration::ApiService;pub use api_integration::Authentication;pub use api_integration::HttpMethod;pub use api_integration::MLApiPatterns;pub use api_integration::MethodStats;pub use api_integration::MockApiClient;pub use api_integration::RequestBuilder;pub use array_utils::argmax;pub use array_utils::argmin;pub use array_utils::argsort;pub use array_utils::array_add_constant_inplace;pub use array_utils::array_apply_inplace;pub use array_utils::array_concatenate;pub use array_utils::array_cumsum;pub use array_utils::array_describe;pub use array_utils::array_max;pub use array_utils::array_mean;pub use array_utils::array_mean_f64;pub use array_utils::array_median;pub use array_utils::array_min;pub use array_utils::array_min_max;pub use array_utils::array_min_max_normalize;pub use array_utils::array_min_max_normalize_inplace;pub use array_utils::array_percentile;pub use array_utils::array_quantiles;pub use array_utils::array_resize;pub use array_utils::array_reverse;pub use array_utils::array_scale_inplace;pub use array_utils::array_split;pub use array_utils::array_standardize;pub use array_utils::array_standardize_inplace;pub use array_utils::array_std;pub use array_utils::array_sum;pub use array_utils::array_unique_counts;pub use array_utils::array_var;pub use array_utils::array_variance_f64;pub use array_utils::boolean_indexing_1d;pub use array_utils::boolean_indexing_2d;pub use array_utils::broadcast_shape;pub use array_utils::check_array_1d;pub use array_utils::check_array_2d;pub use array_utils::column_or_1d;pub use array_utils::compatible_layout;pub use array_utils::compress_1d;pub use array_utils::concatenate_2d;pub use array_utils::create_mask;pub use array_utils::densify_threshold;pub use array_utils::efficient_copy;pub use array_utils::fancy_indexing_1d;pub use array_utils::fancy_indexing_2d;pub use array_utils::fast_dot_product_f32;pub use array_utils::fast_dot_product_f64;pub use array_utils::fast_sum_f32;pub use array_utils::fast_sum_f64;pub use array_utils::filter_array;pub use array_utils::flatten_2d;pub use array_utils::get_strides;pub use array_utils::is_broadcastable;pub use array_utils::is_contiguous;pub use array_utils::label_counts;pub use array_utils::make_contiguous;pub use array_utils::normalize_array;pub use array_utils::pad_2d;pub use array_utils::put_1d;pub use array_utils::reshape_1d_to_2d;pub use array_utils::safe_indexing;pub use array_utils::safe_indexing_2d;pub use array_utils::safe_sparse_dot;pub use array_utils::safe_sparse_dot_f32;pub use array_utils::safe_sparse_dot_f64;pub use array_utils::simd_add_arrays_f32;pub use array_utils::simd_add_arrays_f64;pub use array_utils::simd_multiply_arrays_f32;pub use array_utils::simd_multiply_arrays_f64;pub use array_utils::simd_scale_array_f32;pub use array_utils::simd_scale_array_f64;pub use array_utils::slice_with_step;pub use array_utils::sparse_add;pub use array_utils::sparse_diag;pub use array_utils::sparse_transpose;pub use array_utils::split_2d;pub use array_utils::stack_1d;pub use array_utils::take_1d;pub use array_utils::tile_2d;pub use array_utils::transpose;pub use array_utils::unique_labels;pub use array_utils::where_condition;pub use array_utils::ArrayStatistics;pub use cloud_storage::CloudProvider;pub use cloud_storage::CloudStorageClient;pub use cloud_storage::CloudStorageConfig;pub use cloud_storage::CloudStorageFactory;pub use cloud_storage::CloudStorageUtils;pub use cloud_storage::MockCloudStorageClient;pub use cloud_storage::ObjectMetadata;pub use cloud_storage::StorageMetrics;pub use cloud_storage::SyncMode;pub use cloud_storage::SyncResult;pub use config::ArgParser;pub use config::Config;pub use config::ConfigBuilder;pub use config::ConfigSource;pub use config::ConfigValidator;pub use config::ConfigValue;pub use config::HotReloadConfig;pub use cross_validation::CVSplit;pub use cross_validation::GroupKFold;pub use cross_validation::LeaveOneGroupOut;pub use cross_validation::StratifiedKFold;pub use cross_validation::TimeSeriesSplit;pub use data_pipeline::DataPipeline;pub use data_pipeline::MLPipelineBuilder;pub use data_pipeline::PipelineContext;pub use data_pipeline::PipelineMetrics;pub use data_pipeline::PipelineMonitor;pub use data_pipeline::PipelineResult;pub use data_pipeline::PipelineStep;pub use data_pipeline::StepMetrics;pub use data_pipeline::TransformStep;pub use data_structures::AtomicCounter;pub use data_structures::BinarySearchTree;pub use data_structures::BlockMatrix;pub use data_structures::ConcurrentHashMap;pub use data_structures::ConcurrentQueue;pub use data_structures::ConcurrentRingBuffer;pub use data_structures::Graph;pub use data_structures::RingBuffer;pub use data_structures::TreeNode;pub use data_structures::TreeStatistics;pub use data_structures::Trie;pub use data_structures::TrieStatistics;pub use data_structures::WeightedGraph;pub use data_structures::WorkQueue;pub use database::Connection;pub use database::DatabaseConfig;pub use database::DatabaseError;pub use database::DatabasePool;pub use database::Query;pub use database::QueryBuilder;pub use database::QueryResult;pub use database::ResultSet;pub use database::Transaction;pub use debug::ArrayDebugger;pub use debug::DebugContext;pub use debug::DiagnosticTools;pub use debug::MemoryDebugger;pub use debug::PerformanceDebugger;pub use debug::TestDataGenerator;pub use debug::TimingStats;pub use distributed_computing::ClusterConfig;pub use distributed_computing::ClusterNode;pub use distributed_computing::ClusterStats;pub use distributed_computing::DistributedCluster;pub use distributed_computing::DistributedError;pub use distributed_computing::DistributedJob;pub use distributed_computing::FaultDetector;pub use distributed_computing::JobExecution;pub use distributed_computing::JobPriority;pub use distributed_computing::JobScheduler;pub use distributed_computing::JobStatus;pub use distributed_computing::JobType;pub use distributed_computing::LoadBalancer;pub use distributed_computing::LoadMetrics;pub use distributed_computing::NodeCapabilities;pub use distributed_computing::NodeStatus;pub use distributed_computing::ResourceRequirements;pub use distributed_computing::ResourceUsage;pub use distributed_computing::SchedulingStrategy;pub use ensemble::AggregationStrategy;pub use ensemble::BaggingPredictor;pub use ensemble::Bootstrap;pub use ensemble::OOBScoreEstimator;pub use ensemble::StackingHelper;pub use environment::CacheInfo;pub use environment::CpuInfo;pub use environment::EnvironmentInfo;pub use environment::FeatureChecker;pub use environment::HardwareDetector;pub use environment::MemoryInfo;pub use environment::OSInfo;pub use environment::PerformanceCharacteristics;pub use environment::RuntimeInfo;pub use error_handling::create_error;pub use error_handling::create_error_at;pub use error_handling::EnhancedError;pub use error_handling::ErrorAggregator;pub use error_handling::ErrorContext;pub use error_handling::ErrorRecovery;pub use error_handling::ErrorReporter;pub use error_handling::ErrorStatistics;pub use error_handling::ErrorSummary;pub use error_handling::RecoveryStrategy;pub use external_integration::ArrayTransfer;pub use external_integration::CFunctionSignature;pub use external_integration::CParameter;pub use external_integration::CType;pub use external_integration::FFIUtils;pub use external_integration::PyArrayBuffer;pub use external_integration::PythonInterop;pub use external_integration::PythonParameter;pub use external_integration::PythonValue;pub use external_integration::WasmBuildConfig;pub use external_integration::WasmOptimization;pub use external_integration::WasmParameter;pub use external_integration::WasmType;pub use external_integration::WasmUtils;pub use feature_engineering::BinningStrategy;pub use feature_engineering::FeatureBinner;pub use feature_engineering::InteractionFeatures;pub use feature_engineering::PolynomialFeatures;pub use file_io::CompressionUtils;pub use file_io::EfficientFileReader;pub use file_io::EfficientFileWriter;pub use file_io::FormatConverter;pub use file_io::SerializationUtils;pub use file_io::StreamProcessor;pub use gpu_computing::ActivationFunction;pub use gpu_computing::GpuArrayOps;pub use gpu_computing::GpuDevice;pub use gpu_computing::GpuError;pub use gpu_computing::GpuKernelExecution;pub use gpu_computing::GpuKernelInfo;pub use gpu_computing::GpuMemoryAllocation;pub use gpu_computing::GpuProfiler;pub use gpu_computing::GpuUtils;pub use gpu_computing::KernelStats;pub use gpu_computing::MemoryStats;pub use gpu_computing::MemoryTransferStats;pub use linear_algebra::ConditionNumber;pub use linear_algebra::EigenDecomposition;pub use linear_algebra::MatrixDecomposition;pub use linear_algebra::MatrixNorms;pub use linear_algebra::MatrixRank;pub use linear_algebra::MatrixUtils;pub use linear_algebra::Pseudoinverse;pub use logging::flush_global_logger;pub use logging::get_global_logger;pub use logging::set_global_level;pub use logging::ConsoleOutput;pub use logging::DistributedLogger;pub use logging::FileOutput;pub use logging::JsonFormatter;pub use logging::LogAnalysis;pub use logging::LogAnalyzer;pub use logging::LogEntry;pub use logging::LogLevel;pub use logging::LogStats;pub use logging::Logger;pub use logging::LoggerConfig;pub use logging::OperationStats;pub use logging::PerformanceLogger;pub use logging::TextFormatter;pub use math_utils::constants;pub use math_utils::NumericalPrecision;pub use math_utils::OverflowDetection;pub use math_utils::RobustArrayOps;pub use math_utils::SpecialFunctions;pub use memory::AllocationStats;pub use memory::GcHelper;pub use memory::LeakDetector;pub use memory::MemoryAlignment;pub use memory::MemoryMappedFile;pub use memory::MemoryMonitor;pub use memory::MemoryPool;pub use memory::MemoryValidator;pub use memory::SafeBuffer;pub use memory::SafePtr;pub use memory::SafeVec;pub use memory::StackGuard;pub use memory::TrackingAllocator;pub use metrics::bhattacharyya_distance;pub use metrics::braycurtis_distance;pub use metrics::canberra_distance;pub use metrics::chebyshev_distance;pub use metrics::cosine_distance;pub use metrics::cosine_distance_f32;pub use metrics::cosine_similarity;pub use metrics::cosine_similarity_f32;pub use metrics::euclidean_distance;pub use metrics::euclidean_distance_f32;pub use metrics::hamming_distance;pub use metrics::hamming_distance_normalized;pub use metrics::hellinger_distance;pub use metrics::jaccard_distance;pub use metrics::jaccard_similarity;pub use metrics::jensen_shannon_divergence;pub use metrics::kl_divergence;pub use metrics::mahalanobis_distance;pub use metrics::manhattan_distance;pub use metrics::manhattan_distance_f32;pub use metrics::minkowski_distance;pub use metrics::wasserstein_1d;pub use optimization::ConstraintHandler;pub use optimization::ConstraintViolation;pub use optimization::ConvergenceCriteria;pub use optimization::ConvergenceStatus;pub use optimization::GradientComputer;pub use optimization::GradientMethod;pub use optimization::LineSearch;pub use optimization::LineSearchMethod;pub use optimization::OptimizationHistory;pub use parallel::ParallelIterator;pub use parallel::ParallelReducer;pub use parallel::ThreadPool;pub use parallel::WorkStealingQueue;pub use performance::BaselineMetrics;pub use performance::Benchmark;pub use performance::BenchmarkResult;pub use performance::MemoryTracker;pub use performance::ProfileReport;pub use performance::ProfileResult;pub use performance::Profiler;pub use performance::RegressionDetector;pub use performance::RegressionResult;pub use performance::Timer;pub use performance::TimerSummary;pub use performance_regression::PerformanceRegressionTester;pub use performance_regression::RegressionTestResult;pub use performance_regression::RegressionThresholds;pub use preprocessing::DataCleaner;pub use preprocessing::DataQualityAssessor;pub use preprocessing::FeatureScaler;pub use preprocessing::OutlierDetector;pub use probabilistic::BloomFilter;pub use probabilistic::BloomFilterStats;pub use probabilistic::CountMinSketch;pub use probabilistic::CountMinSketchStats;pub use probabilistic::HyperLogLog;pub use probabilistic::HyperLogLogStats;pub use probabilistic::LSHash;pub use probabilistic::LSHashStats;pub use probabilistic::MinHash;pub use probabilistic::MinHashStats;pub use profile_guided_optimization::BranchProfile;pub use profile_guided_optimization::BranchType;pub use profile_guided_optimization::CacheStatistics;pub use profile_guided_optimization::DependencyChain;pub use profile_guided_optimization::FunctionProfile;pub use profile_guided_optimization::ImplementationEffort;pub use profile_guided_optimization::InstructionMix;pub use profile_guided_optimization::LoopProfile;pub use profile_guided_optimization::MemoryAccessPattern;pub use profile_guided_optimization::MemoryAccessType;pub use profile_guided_optimization::OptimizationApplication;pub use profile_guided_optimization::OptimizationOpportunity;pub use profile_guided_optimization::OptimizationRecommendation;pub use profile_guided_optimization::OptimizationReport;pub use profile_guided_optimization::OptimizationRule;pub use profile_guided_optimization::OptimizationType;pub use profile_guided_optimization::PerformanceProfile;pub use profile_guided_optimization::PerformanceTargets;pub use profile_guided_optimization::ProfileError;pub use profile_guided_optimization::ProfileGuidedOptimizer;pub use profile_guided_optimization::ProfileSummary;pub use profile_guided_optimization::ProfilerConfig;pub use profile_guided_optimization::RiskLevel;pub use profile_guided_optimization::StridePattern;pub use profile_guided_optimization::TriggerCondition;pub use r_integration::RDataFrame;pub use r_integration::RError;pub use r_integration::RIntegration;pub use r_integration::RMatrix;pub use r_integration::RPackageManager;pub use r_integration::RScriptBuilder;pub use r_integration::RStatisticalFunctions;pub use r_integration::RValue;pub use random::bootstrap_indices;pub use random::get_rng;pub use random::importance_sampling;pub use random::k_fold_indices;pub use random::random_indices;pub use random::random_permutation;pub use random::random_weights;pub use random::reservoir_sampling;pub use random::set_random_state;pub use random::shuffle_indices;pub use random::stratified_split_indices;pub use random::train_test_split_indices;pub use random::weighted_sampling_without_replacement;pub use random::DistributionSampler;pub use random::ThreadSafeRng;pub use simd::SimdCapabilities;pub use simd::SimdDistanceOps;pub use simd::SimdF32Ops;pub use simd::SimdF64Ops;pub use simd::SimdMatrixOps;pub use simd::SimdStatsOps;pub use spatial::geographic::CoordinateSystem;pub use spatial::geographic::GeoBounds;pub use spatial::geographic::GeoPoint;pub use spatial::geographic::GeoUtils;pub use spatial::geographic::Hemisphere;pub use spatial::KdTree;pub use spatial::OctTree;pub use spatial::Point;pub use spatial::QuadTree;pub use spatial::RTree;pub use spatial::Rectangle;pub use spatial::SpatialHash;pub use spatial::SpatialHashStats;pub use statistical::ConfidenceInterval;pub use statistical::ConfidenceIntervals;pub use statistical::CorrelationAnalysis;pub use statistical::DistributionFitting;pub use statistical::StatisticalTests;pub use statistical::TestResult;pub use text_processing::RegexUtils;pub use text_processing::StringSimilarity;pub use text_processing::TextAnalysis;pub use text_processing::TextNormalizer;pub use text_processing::TextParser;pub use text_processing::UnicodeUtils;pub use time_series::AggregationMethod;pub use time_series::LagFeatureGenerator;pub use time_series::SlidingWindow;pub use time_series::TemporalAggregator;pub use time_series::TemporalIndex;pub use time_series::TimeSeries;pub use time_series::TimeSeriesPoint;pub use time_series::TimeZoneUtils;pub use time_series::Timestamp;pub use time_series::TrendDirection;pub use time_series::WindowStats;pub use type_safety::DataState;pub use type_safety::ExactSize;pub use type_safety::Kilograms;pub use type_safety::MatrixMul;pub use type_safety::Measurement;pub use type_safety::Meters;pub use type_safety::MinSize;pub use type_safety::ModelState;pub use type_safety::NonNegative;pub use type_safety::Normalized;pub use type_safety::One;pub use type_safety::Pixels;pub use type_safety::Positive;pub use type_safety::Seconds;pub use type_safety::Three;pub use type_safety::Trained;pub use type_safety::Two;pub use type_safety::TypedArray;pub use type_safety::Untrained;pub use type_safety::Unvalidated;pub use type_safety::Validated;pub use type_safety::ValidatedArray;pub use type_safety::Zero;pub use type_safety::D1;pub use type_safety::D2;pub use type_safety::D3;pub use visualization::AxisConfig;pub use visualization::BoxPlotData;pub use visualization::ChartData;pub use visualization::Color;pub use visualization::HeatmapData;pub use visualization::HistogramData;pub use visualization::LinePlotData;pub use visualization::MLVisualizationUtils;pub use visualization::PlotData;pub use visualization::PlotLayout;pub use visualization::PlotMargin;pub use visualization::PlotSummary;pub use visualization::PlotUtils;pub use visualization::Point2D;pub use visualization::ScatterPlotData;pub use data_generation::*;pub use multiclass::*;pub use validation::*;
Modules§
- api_
integration - API integration utilities for ML workflows
- array_
utils - Array utilities module
- cloud_
storage - Cloud storage utilities for machine learning data processing
- config
- Configuration Management Utilities
- cross_
validation - Advanced cross-validation utilities for machine learning
- data_
generation - data_
pipeline - Data pipeline utilities for ML workflows
- data_
structures - Auto-generated module structure
- database
- Database connectivity utilities for ML data processing
- debug
- Debug utilities for development and troubleshooting
- distributed_
computing - Auto-generated module structure
- ensemble
- Ensemble utilities for machine learning
- environment
- Environment detection utilities for hardware and runtime capabilities
- error_
handling - Enhanced error handling utilities for machine learning workflows
- external_
integration - External integration utilities for machine learning interoperability
- feature_
engineering - Feature engineering utilities for machine learning
- file_io
- File I/O utilities for efficient data handling in machine learning workflows
- gpu_
computing - GPU computing integration utilities
- linear_
algebra - Linear Algebra Utilities
- logging
- Comprehensive logging framework for sklears
- math_
utils - Mathematical utility functions for numerical computing
- memory
- Memory management utilities for high-performance ML workloads
- metrics
- Distance and similarity metrics
- multiclass
- optimization
- Optimization utilities for numerical optimization algorithms
- parallel
- Parallel computing utilities for machine learning workloads
- performance
- Performance measurement and profiling utilities
- performance_
regression - Performance regression testing utilities
- preprocessing
- Data preprocessing utilities for machine learning
- probabilistic
- Probabilistic data structures for efficient approximate algorithms
- profile_
guided_ optimization - Profile-guided optimization utilities
- r_
integration - R integration utilities
- random
- Random number generation utilities
- simd
- SIMD (Single Instruction, Multiple Data) optimizations for high-performance computing
- spatial
- Spatial data structures for efficient spatial queries and indexing
- statistical
- Statistical Utilities
- stats
- Statistical utility functions
- text_
processing - Text processing utilities for machine learning workflows
- time_
series - Time Series Utilities
- type_
safety - Type safety utilities for compile-time validation and zero-cost abstractions
- validation
- Input validation utilities
- visualization
- Visualization utilities for machine learning data preparation
Macros§
- assert_
shape - Compile-time shape assertion
- benchmark_
regression - Macro to easily benchmark functions
- const_
assert - Compile-time assertion macro
- debug_
assert_ msg - Enhanced assertion macro with debugging information
- debug_
context - Macro for creating debug contexts easily
- debug_
println - Macro for conditional debugging output
- defer
- Macro for creating stack guards
- log_
debug - log_
error - Logging macros
- log_
info - log_
trace - log_
warn - time_it
- Macro for quick performance timing
Enums§
- Utils
Error - Common error type for utils
Type Aliases§
- Utils
Result - Type alias for utils results