1#![warn(missing_docs)]
129
130pub mod advanced_generators;
131pub mod benchmarks;
132pub mod cache;
133pub mod cloud;
134pub mod distributed;
135pub mod domain_specific;
136pub mod error;
137pub mod explore;
138pub mod external;
139pub mod generators;
140pub mod gpu;
141pub mod gpu_optimization;
142pub mod loaders;
143pub mod ml_integration;
144pub mod real_world;
145pub mod registry;
146pub mod sample;
147pub mod streaming;
148pub mod time_series;
149pub mod toy;
150pub mod utils;
155
156pub mod standard;
161
162pub mod stability;
167
168pub mod platform_dirs;
170
171mod method_resolution_test;
173
174pub mod adaptive_streaming_engine;
175pub mod neuromorphic_data_processor;
176pub mod quantum_enhanced_generators;
177pub mod quantum_neuromorphic_fusion;
178
179#[cfg(feature = "lazy-loading")]
185pub mod lazy_loading;
186
187#[cfg(feature = "augmentation")]
192pub mod augmentation;
193
194pub mod parallel_preprocessing;
199
200#[cfg(feature = "distributed")]
205pub mod distributed_loading;
206
207pub mod formats;
212
213pub use adaptive_streaming_engine::{
215 create_adaptive_engine, create_adaptive_engine_with_config, AdaptiveStreamConfig,
216 AdaptiveStreamingEngine, AlertSeverity, AlertType, ChunkMetadata, DataCharacteristics,
217 MemoryStrategy, PatternType, PerformanceMetrics, QualityAlert, QualityMetrics,
218 StatisticalMoments, StreamChunk, TrendDirection, TrendIndicators,
219};
220pub use advanced_generators::{
221 make_adversarial_examples, make_anomaly_dataset, make_continual_learning_dataset,
222 make_domain_adaptation_dataset, make_few_shot_dataset, make_multitask_dataset,
223 AdversarialConfig, AnomalyConfig, AnomalyType, AttackMethod, ContinualLearningDataset,
224 DomainAdaptationConfig, DomainAdaptationDataset, FewShotDataset, MultiTaskConfig,
225 MultiTaskDataset, TaskType,
226};
227pub use benchmarks::{BenchmarkResult, BenchmarkRunner, BenchmarkSuite, PerformanceComparison};
228pub use cloud::{
229 presets::{azure_client, gcs_client, public_s3_client, s3_client, s3_compatible_client},
230 public_datasets::{AWSOpenData, AzureOpenData, GCPPublicData},
231 CloudClient, CloudConfig, CloudCredentials, CloudProvider,
232};
233pub use distributed::{DistributedConfig, DistributedProcessor, ScalingMethod, ScalingParameters};
234pub use domain_specific::{
235 astronomy::StellarDatasets,
236 climate::ClimateDatasets,
237 convenience::{
238 list_domain_datasets, load_atmospheric_chemistry, load_climate_data, load_exoplanets,
239 load_gene_expression, load_stellar_classification,
240 },
241 genomics::GenomicsDatasets,
242 DomainConfig, QualityFilters,
243};
244pub use explore::{
245 convenience::{explore, export_summary, info, quick_summary},
246 DatasetExplorer, DatasetSummary, ExploreConfig, FeatureStatistics, InferredDataType,
247 OutputFormat, QualityAssessment,
248};
249#[cfg(not(feature = "download"))]
250pub use external::convenience::{load_github_dataset_sync, load_uci_dataset_sync};
251pub use external::{
252 convenience::{list_uci_datasets, load_from_url_sync},
253 repositories::{GitHubRepository, KaggleRepository, UCIRepository},
254 ExternalClient, ExternalConfig, ProgressCallback,
255};
256pub use ml_integration::{
257 convenience::{create_experiment, cv_split, prepare_for_ml, train_test_split},
258 CrossValidationResults, DataSplit, MLExperiment, MLPipeline, MLPipelineConfig,
259 ScalingMethod as MLScalingMethod,
260};
261
262pub use cache::{
263 get_cachedir, BatchOperations, BatchResult, CacheFileInfo, CacheManager, CacheStats,
264 DatasetCache, DetailedCacheStats,
265};
266#[cfg(feature = "download")]
267pub use external::convenience::{load_from_url, load_github_dataset, load_uci_dataset};
268pub use generators::{
269 add_time_series_noise, benchmark_gpu_vs_cpu, get_gpu_info, gpu_is_available,
270 inject_missing_data, inject_outliers, make_anisotropic_blobs, make_blobs, make_blobs_gpu,
271 make_circles, make_classification, make_classification_gpu, make_corrupted_dataset, make_helix,
272 make_hierarchical_clusters, make_intersecting_manifolds, make_manifold, make_moons,
273 make_regression, make_regression_gpu, make_s_curve, make_severed_sphere, make_spirals,
274 make_swiss_roll, make_swiss_roll_advanced, make_time_series, make_torus, make_twin_peaks,
275 ManifoldConfig, ManifoldType, MissingPattern, OutlierType,
276};
277pub use generators::time_series::{
279 make_ar_process, make_random_walk, make_seasonal, make_sine_wave,
280};
281pub use generators::graph::{
283 make_barabasi_albert, make_karate_club, make_random_graph, make_watts_strogatz,
284};
285pub use generators::sparse::{make_sparse_banded, make_sparse_laplacian, make_sparse_spd};
287pub use generators::classification::{
289 make_classification_enhanced, make_hastie_10_2, make_multilabel_classification,
290 ClassificationConfig, MultilabelConfig, MultilabelDataset,
291};
292pub use generators::regression::{
294 make_friedman1, make_friedman2, make_friedman3, make_low_rank_matrix, make_sparse_uncorrelated,
295};
296pub use generators::structured::{
298 make_biclusters, make_checkerboard, make_sparse_coded_signal, make_sparse_spd_matrix,
299 make_spd_matrix,
300};
301pub use gpu::{
303 get_optimal_gpu_config, is_cuda_available, is_opencl_available, list_gpu_devices,
304 make_blobs_auto_gpu, make_classification_auto_gpu, make_regression_auto_gpu, GpuBackend,
305 GpuBenchmark, GpuBenchmarkResults, GpuConfig, GpuContext, GpuDeviceInfo, GpuMemoryConfig,
306};
307pub use gpu_optimization::{
308 benchmark_advanced_performance, generate_advanced_matrix, AdvancedGpuOptimizer,
309 AdvancedKernelConfig, BenchmarkResult as AdvancedBenchmarkResult, DataLayout,
310 LoadBalancingMethod, MemoryAccessPattern, PerformanceBenchmarkResults, SpecializationLevel,
311 VectorizationStrategy,
312};
313pub use loaders::{
314 load_csv, load_csv_legacy, load_csv_parallel, load_csv_streaming, load_json, load_raw,
315 save_json, CsvConfig, DatasetChunkIterator, StreamingConfig,
316};
317pub use neuromorphic_data_processor::{
318 create_neuromorphic_processor, create_neuromorphic_processor_with_topology, NetworkTopology,
319 NeuromorphicProcessor, NeuromorphicTransform, SynapticPlasticity,
320};
321pub use quantum_enhanced_generators::{
322 make_quantum_blobs, make_quantum_classification, make_quantum_regression,
323 QuantumDatasetGenerator,
324};
325pub use quantum_neuromorphic_fusion::{
326 create_fusion_with_params, create_quantum_neuromorphic_fusion, QuantumBioFusionResult,
327 QuantumInterference, QuantumNeuromorphicFusion,
328};
329pub use real_world::{
330 list_real_world_datasets, load_adult, load_california_housing, load_heart_disease,
331 load_red_wine_quality, load_titanic, RealWorldConfig, RealWorldDatasets,
332};
333pub use registry::{get_registry, load_dataset_byname, DatasetMetadata, DatasetRegistry};
334pub use sample::*;
335pub use standard::{
336 load_boston as load_boston_full, load_breast_cancer as load_breast_cancer_full,
337 load_digits as load_digits_full, load_iris as load_iris_full, load_wine, DatasetResult,
338};
339pub use streaming::{
340 stream_classification, stream_csv, stream_regression, DataChunk, StreamConfig, StreamProcessor,
341 StreamStats, StreamTransformer, StreamingIterator,
342};
343pub use toy::*;
344pub use utils::{
345 analyze_dataset_advanced, create_balanced_dataset, create_binned_features,
346 generate_synthetic_samples, importance_sample, k_fold_split, min_max_scale,
347 polynomial_features, quick_quality_assessment, random_oversample, random_sample,
348 random_undersample, robust_scale, statistical_features, stratified_k_fold_split,
349 stratified_sample, time_series_split, AdvancedDatasetAnalyzer, AdvancedQualityMetrics,
350 BalancingStrategy, BinningStrategy, CorrelationInsights, CrossValidationFolds, Dataset,
351 NormalityAssessment,
352};
353
354#[cfg(feature = "lazy-loading")]
356pub use lazy_loading::{
357 from_binary as lazy_from_binary, from_binary_with_config as lazy_from_binary_with_config,
358 LazyChunkIterator, LazyDataset, LazyLoadConfig, MmapDataset,
359};
360
361#[cfg(feature = "augmentation")]
362pub use augmentation::{
363 standard_image_augmentation, standard_tabular_augmentation, AugmentationPipeline, Brightness,
364 Contrast, GaussianNoise, HorizontalFlip, Mixup, RandomFeatureScale, RandomRotation90,
365 Transform, VerticalFlip,
366};
367
368pub use parallel_preprocessing::{
369 create_pipeline, create_pipeline_with_config, ParallelConfig, ParallelPipeline, PreprocessFn,
370};
371
372#[cfg(feature = "distributed")]
373pub use distributed_loading::{
374 create_loader, create_loader_with_config, DistributedCache,
375 DistributedConfig as DistributedLoadingConfig, DistributedLoader, Shard,
376};
377
378pub use formats::{CompressionCodec, FormatConfig, FormatType};
379
380#[cfg(feature = "formats")]
381pub use formats::{
382 read_auto, read_hdf5, read_parquet, write_hdf5, write_parquet, FormatConverter, Hdf5Reader,
383 Hdf5Writer, ParquetReader, ParquetWriter,
384};