1#![allow(dead_code)]
2#![allow(non_snake_case)]
3#![allow(missing_docs)]
4#![allow(deprecated)]
5#![allow(clippy::needless_range_loop)]
6#![allow(clippy::needless_borrow)]
7pub mod api_integration;
30pub mod array_utils;
33pub mod cloud_storage;
34pub mod config;
35pub mod cross_validation;
36pub mod data_generation;
37pub mod data_pipeline;
38pub mod data_structures;
39pub mod database;
40pub mod debug;
41pub mod distributed_computing;
42pub mod ensemble;
43pub mod environment;
44pub mod error_handling;
45pub mod external_integration;
46pub mod feature_engineering;
47pub mod file_io;
48pub mod gpu_computing;
49pub mod linear_algebra;
50pub mod logging;
51pub mod math_utils;
52pub mod memory;
53pub mod metrics;
54pub mod multiclass;
55pub mod optimization;
56pub mod parallel;
57pub mod performance;
58pub mod performance_regression;
59pub mod preprocessing;
60pub mod probabilistic;
61pub mod profile_guided_optimization;
62pub mod r_integration;
63pub mod random;
64pub mod simd;
65pub mod spatial;
66pub mod statistical;
67pub mod stats;
68pub mod text_processing;
69pub mod time_series;
70pub mod type_safety;
71pub mod validation;
72pub mod visualization;
73
74#[allow(non_snake_case)]
75#[cfg(test)]
76mod property_tests;
77
78pub use api_integration::{
80 ApiClient, ApiConfig, ApiError, ApiMetrics, ApiRequest, ApiResponse, ApiService,
81 Authentication, HttpMethod, MLApiPatterns, MethodStats, MockApiClient, RequestBuilder,
82};
83pub use array_utils::{
103 argmax,
104 argmin,
105 argsort,
106 array_add_constant_inplace,
107 array_apply_inplace,
108 array_concatenate,
109 array_cumsum,
110 array_describe,
111 array_max,
112 array_mean,
113 array_mean_f64,
114 array_median,
115 array_min,
116 array_min_max,
117 array_min_max_normalize,
118 array_min_max_normalize_inplace,
119 array_percentile,
120 array_quantiles,
121 array_resize,
122 array_reverse,
123
124 array_scale_inplace,
125 array_split,
126 array_standardize,
127 array_standardize_inplace,
129 array_std,
130 array_sum,
132 array_unique_counts,
133 array_var,
134 array_variance_f64,
135 boolean_indexing_1d,
136 boolean_indexing_2d,
137 broadcast_shape,
138 check_array_1d,
140 check_array_2d,
141 column_or_1d,
142 compatible_layout,
143
144 compress_1d,
145
146 concatenate_2d,
147 create_mask,
148 densify_threshold,
149
150 efficient_copy,
151 fancy_indexing_1d,
153 fancy_indexing_2d,
154 fast_dot_product_f32,
155 fast_dot_product_f64,
156 fast_sum_f32,
157
158 fast_sum_f64,
159 filter_array,
160 flatten_2d,
161 get_strides,
162 is_broadcastable,
163 is_contiguous,
165 label_counts,
166 make_contiguous,
167 normalize_array,
168 pad_2d,
169
170 put_1d,
171 reshape_1d_to_2d,
173 safe_indexing,
174 safe_indexing_2d,
175 safe_sparse_dot,
177 safe_sparse_dot_f32,
178 safe_sparse_dot_f64,
179 simd_add_arrays_f32,
180 simd_add_arrays_f64,
182 simd_multiply_arrays_f32,
183 simd_multiply_arrays_f64,
184 simd_scale_array_f32,
185 simd_scale_array_f64,
186 slice_with_step,
187 sparse_add,
188 sparse_diag,
189 sparse_transpose,
190 split_2d,
191 stack_1d,
192 take_1d,
193 tile_2d,
194 transpose,
195 unique_labels,
196 where_condition,
197 ArrayStatistics,
198};
199pub use cloud_storage::{
200 CloudProvider, CloudStorageClient, CloudStorageConfig, CloudStorageFactory, CloudStorageUtils,
201 MockCloudStorageClient, ObjectMetadata, StorageMetrics, SyncMode, SyncResult,
202};
203pub use config::{
204 ArgParser, Config, ConfigBuilder, ConfigSource, ConfigValidator, ConfigValue, HotReloadConfig,
205};
206pub use cross_validation::{
207 CVSplit, GroupKFold, LeaveOneGroupOut, StratifiedKFold, TimeSeriesSplit,
208};
209pub use data_generation::*;
210pub use data_pipeline::{
211 DataPipeline, MLPipelineBuilder, PipelineContext, PipelineMetrics, PipelineMonitor,
212 PipelineResult, PipelineStep, StepMetrics, TransformStep,
213};
214pub use data_structures::{
215 AtomicCounter, BinarySearchTree, BlockMatrix, ConcurrentHashMap, ConcurrentQueue,
216 ConcurrentRingBuffer, Graph, RingBuffer, TreeNode, TreeStatistics, Trie, TrieStatistics,
217 WeightedGraph, WorkQueue,
218};
219pub use database::{
220 Connection, DatabaseConfig, DatabaseError, DatabasePool, Query, QueryBuilder, QueryResult,
221 ResultSet, Transaction,
222};
223pub use debug::{
224 ArrayDebugger, DebugContext, DiagnosticTools, MemoryDebugger, PerformanceDebugger,
225 TestDataGenerator, TimingStats,
226};
227pub use distributed_computing::{
228 ClusterConfig, ClusterNode, ClusterStats, DistributedCluster, DistributedError, DistributedJob,
229 FaultDetector, JobExecution, JobPriority, JobScheduler, JobStatus, JobType, LoadBalancer,
230 LoadMetrics, NodeCapabilities, NodeStatus, ResourceRequirements, ResourceUsage,
231 SchedulingStrategy,
232};
233pub use ensemble::{
234 AggregationStrategy, BaggingPredictor, Bootstrap, OOBScoreEstimator, StackingHelper,
235};
236pub use environment::{
237 CacheInfo, CpuInfo, EnvironmentInfo, FeatureChecker, HardwareDetector, MemoryInfo, OSInfo,
238 PerformanceCharacteristics, RuntimeInfo,
239};
240pub use error_handling::{
241 create_error, create_error_at, EnhancedError, ErrorAggregator, ErrorContext, ErrorRecovery,
242 ErrorReporter, ErrorStatistics, ErrorSummary, RecoveryStrategy,
243};
244pub use external_integration::{
245 ArrayTransfer, CFunctionSignature, CParameter, CType, FFIUtils, PyArrayBuffer, PythonInterop,
246 PythonParameter, PythonValue, WasmBuildConfig, WasmOptimization, WasmParameter, WasmType,
247 WasmUtils,
248};
249pub use feature_engineering::{
250 BinningStrategy, FeatureBinner, InteractionFeatures, PolynomialFeatures,
251};
252pub use file_io::{
253 CompressionUtils, EfficientFileReader, EfficientFileWriter, FormatConverter,
254 SerializationUtils, StreamProcessor,
255};
256pub use gpu_computing::{
257 ActivationFunction, GpuArrayOps, GpuDevice, GpuError, GpuKernelExecution, GpuKernelInfo,
258 GpuMemoryAllocation, GpuProfiler, GpuUtils, KernelStats, MemoryStats, MemoryTransferStats,
259};
260pub use linear_algebra::{
261 ConditionNumber, EigenDecomposition, MatrixDecomposition, MatrixNorms, MatrixRank, MatrixUtils,
262 Pseudoinverse,
263};
264pub use logging::{
265 flush_global_logger, get_global_logger, set_global_level, ConsoleOutput, DistributedLogger,
266 FileOutput, JsonFormatter, LogAnalysis, LogAnalyzer, LogEntry, LogLevel, LogStats, Logger,
267 LoggerConfig, OperationStats, PerformanceLogger, TextFormatter,
268};
269pub use math_utils::{
270 constants, NumericalPrecision, OverflowDetection, RobustArrayOps, SpecialFunctions,
271};
272pub use memory::{
273 AllocationStats, GcHelper, LeakDetector, MemoryAlignment, MemoryMappedFile, MemoryMonitor,
274 MemoryPool, MemoryValidator, SafeBuffer, SafePtr, SafeVec, StackGuard, TrackingAllocator,
275};
276pub use metrics::{
277 bhattacharyya_distance, braycurtis_distance, canberra_distance, chebyshev_distance,
278 cosine_distance, cosine_distance_f32, cosine_similarity, cosine_similarity_f32,
279 euclidean_distance, euclidean_distance_f32, hamming_distance, hamming_distance_normalized,
280 hellinger_distance, jaccard_distance, jaccard_similarity, jensen_shannon_divergence,
281 kl_divergence, mahalanobis_distance, manhattan_distance, manhattan_distance_f32,
282 minkowski_distance, wasserstein_1d,
283};
284pub use multiclass::*;
285pub use optimization::{
286 ConstraintHandler, ConstraintViolation, ConvergenceCriteria, ConvergenceStatus,
287 GradientComputer, GradientMethod, LineSearch, LineSearchMethod, OptimizationHistory,
288};
289pub use parallel::{ParallelIterator, ParallelReducer, ThreadPool, WorkStealingQueue};
290pub use performance::{
291 BaselineMetrics, Benchmark, BenchmarkResult, MemoryTracker, ProfileReport, ProfileResult,
292 Profiler, RegressionDetector, RegressionResult, Timer, TimerSummary,
293};
294pub use performance_regression::{
295 PerformanceRegressionTester, RegressionTestResult, RegressionThresholds,
296};
297pub use preprocessing::{DataCleaner, DataQualityAssessor, FeatureScaler, OutlierDetector};
298pub use probabilistic::{
299 BloomFilter, BloomFilterStats, CountMinSketch, CountMinSketchStats, HyperLogLog,
300 HyperLogLogStats, LSHash, LSHashStats, MinHash, MinHashStats,
301};
302pub use profile_guided_optimization::{
303 BranchProfile, BranchType, CacheStatistics, DependencyChain, FunctionProfile,
304 ImplementationEffort, InstructionMix, LoopProfile, MemoryAccessPattern, MemoryAccessType,
305 OptimizationApplication, OptimizationOpportunity, OptimizationRecommendation,
306 OptimizationReport, OptimizationRule, OptimizationType, PerformanceProfile, PerformanceTargets,
307 ProfileError, ProfileGuidedOptimizer, ProfileSummary, ProfilerConfig, RiskLevel, StridePattern,
308 TriggerCondition,
309};
310pub use r_integration::{
311 RDataFrame, RError, RIntegration, RMatrix, RPackageManager, RScriptBuilder,
312 RStatisticalFunctions, RValue,
313};
314pub use random::{
315 bootstrap_indices, get_rng, importance_sampling, k_fold_indices, random_indices,
316 random_permutation, random_weights, reservoir_sampling, set_random_state, shuffle_indices,
317 stratified_split_indices, train_test_split_indices, weighted_sampling_without_replacement,
318 DistributionSampler, ThreadSafeRng,
319};
320pub use simd::{
321 SimdCapabilities, SimdDistanceOps, SimdF32Ops, SimdF64Ops, SimdMatrixOps, SimdStatsOps,
322};
323pub use spatial::{
324 geographic::{CoordinateSystem, GeoBounds, GeoPoint, GeoUtils, Hemisphere},
325 KdTree, OctTree, Point, QuadTree, RTree, Rectangle, SpatialHash, SpatialHashStats,
326};
327pub use statistical::{
328 ConfidenceInterval, ConfidenceIntervals, CorrelationAnalysis, DistributionFitting,
329 StatisticalTests, TestResult,
330};
331pub use text_processing::{
332 RegexUtils, StringSimilarity, TextAnalysis, TextNormalizer, TextParser, UnicodeUtils,
333};
334pub use time_series::{
335 AggregationMethod, LagFeatureGenerator, SlidingWindow, TemporalAggregator, TemporalIndex,
336 TimeSeries, TimeSeriesPoint, TimeZoneUtils, Timestamp, TrendDirection, WindowStats,
337};
338pub use type_safety::{
339 DataState, ExactSize, Kilograms, MatrixMul, Measurement, Meters, MinSize, ModelState,
340 NonNegative, Normalized, One, Pixels, Positive, Seconds, Three, Trained, Two, TypedArray,
341 Untrained, Unvalidated, Validated, ValidatedArray, Zero, D1, D2, D3,
342};
343pub use validation::*;
344pub use visualization::{
345 AxisConfig, BoxPlotData, ChartData, Color, HeatmapData, HistogramData, LinePlotData,
346 MLVisualizationUtils, PlotData, PlotLayout, PlotMargin, PlotSummary, PlotUtils, Point2D,
347 ScatterPlotData,
348};
349
350#[derive(thiserror::Error, Debug, Clone)]
352pub enum UtilsError {
353 #[error("Shape mismatch: expected {expected:?}, got {actual:?}")]
354 ShapeMismatch {
355 expected: Vec<usize>,
356 actual: Vec<usize>,
357 },
358 #[error("Invalid parameter: {0}")]
359 InvalidParameter(String),
360 #[error("Empty input")]
361 EmptyInput,
362 #[error("Invalid random state: {0}")]
363 InvalidRandomState(String),
364 #[error("Insufficient data: need at least {min} samples, got {actual}")]
365 InsufficientData { min: usize, actual: usize },
366}
367
368impl From<UtilsError> for sklears_core::error::SklearsError {
369 fn from(err: UtilsError) -> Self {
370 sklears_core::error::SklearsError::InvalidInput(err.to_string())
371 }
372}
373
374impl From<serde_json::Error> for UtilsError {
375 fn from(err: serde_json::Error) -> Self {
376 UtilsError::InvalidParameter(format!("JSON serialization error: {err}"))
377 }
378}
379
380pub type UtilsResult<T> = std::result::Result<T, UtilsError>;