#![warn(missing_docs)]
pub mod advanced_generators;
pub mod benchmarks;
pub mod cache;
pub mod cloud;
pub mod distributed;
pub mod domain_specific;
pub mod error;
pub mod explore;
pub mod external;
pub mod generators;
pub mod gpu;
pub mod gpu_optimization;
pub mod loaders;
pub mod ml_integration;
pub mod real_world;
pub mod registry;
pub mod sample;
pub mod streaming;
pub mod time_series;
pub mod toy;
pub mod utils;
pub mod standard;
pub mod stability;
pub mod platform_dirs;
mod method_resolution_test;
pub mod adaptive_streaming_engine;
pub mod neuromorphic_data_processor;
pub mod quantum_enhanced_generators;
pub mod quantum_neuromorphic_fusion;
#[cfg(feature = "lazy-loading")]
pub mod lazy_loading;
#[cfg(feature = "augmentation")]
pub mod augmentation;
pub mod parallel_preprocessing;
#[cfg(feature = "distributed")]
pub mod distributed_loading;
pub mod formats;
pub mod benchmarks_module;
pub mod hub_metadata;
pub mod sharding;
pub mod sampling;
pub mod streaming_csv;
pub use adaptive_streaming_engine::{
create_adaptive_engine, create_adaptive_engine_with_config, AdaptiveStreamConfig,
AdaptiveStreamingEngine, AlertSeverity, AlertType, ChunkMetadata, DataCharacteristics,
MemoryStrategy, PatternType, PerformanceMetrics, QualityAlert, QualityMetrics,
StatisticalMoments, StreamChunk, TrendDirection, TrendIndicators,
};
pub use advanced_generators::{
make_adversarial_examples, make_anomaly_dataset, make_continual_learning_dataset,
make_domain_adaptation_dataset, make_few_shot_dataset, make_multitask_dataset,
AdversarialConfig, AnomalyConfig, AnomalyType, AttackMethod, ContinualLearningDataset,
DomainAdaptationConfig, DomainAdaptationDataset, FewShotDataset, MultiTaskConfig,
MultiTaskDataset, TaskType,
};
pub use benchmarks::{BenchmarkResult, BenchmarkRunner, BenchmarkSuite, PerformanceComparison};
pub use cloud::{
presets::{azure_client, gcs_client, public_s3_client, s3_client, s3_compatible_client},
public_datasets::{AWSOpenData, AzureOpenData, GCPPublicData},
CloudClient, CloudConfig, CloudCredentials, CloudProvider,
};
pub use distributed::{DistributedConfig, DistributedProcessor, ScalingMethod, ScalingParameters};
pub use domain_specific::{
astronomy::StellarDatasets,
climate::ClimateDatasets,
convenience::{
list_domain_datasets, load_atmospheric_chemistry, load_climate_data, load_exoplanets,
load_gene_expression, load_stellar_classification,
},
genomics::GenomicsDatasets,
DomainConfig, QualityFilters,
};
pub use explore::{
convenience::{explore, export_summary, info, quick_summary},
DatasetExplorer, DatasetSummary, ExploreConfig, FeatureStatistics, InferredDataType,
OutputFormat, QualityAssessment,
};
#[cfg(not(feature = "download"))]
pub use external::convenience::{load_github_dataset_sync, load_uci_dataset_sync};
pub use external::{
convenience::{list_uci_datasets, load_from_url_sync},
repositories::{GitHubRepository, KaggleRepository, UCIRepository},
ExternalClient, ExternalConfig, ProgressCallback,
};
pub use ml_integration::{
convenience::{create_experiment, cv_split, prepare_for_ml, train_test_split},
CrossValidationResults, DataSplit, MLExperiment, MLPipeline, MLPipelineConfig,
ScalingMethod as MLScalingMethod,
};
pub use cache::{
get_cachedir, BatchOperations, BatchResult, CacheFileInfo, CacheManager, CacheStats,
DatasetCache, DetailedCacheStats,
};
#[cfg(feature = "download")]
pub use external::convenience::{load_from_url, load_github_dataset, load_uci_dataset};
pub use generators::{
add_time_series_noise, benchmark_gpu_vs_cpu, get_gpu_info, gpu_is_available,
inject_missing_data, inject_outliers, make_anisotropic_blobs, make_blobs, make_blobs_gpu,
make_circles, make_classification, make_classification_gpu, make_corrupted_dataset, make_helix,
make_hierarchical_clusters, make_intersecting_manifolds, make_manifold, make_moons,
make_regression, make_regression_gpu, make_s_curve, make_severed_sphere, make_spirals,
make_swiss_roll, make_swiss_roll_advanced, make_time_series, make_torus, make_twin_peaks,
ManifoldConfig, ManifoldType, MissingPattern, OutlierType,
};
pub use generators::time_series::{
make_ar_process, make_random_walk, make_seasonal, make_sine_wave,
};
pub use generators::graph::{
make_barabasi_albert, make_karate_club, make_random_graph, make_watts_strogatz,
};
pub use generators::sparse::{make_sparse_banded, make_sparse_laplacian, make_sparse_spd};
pub use generators::classification::{
make_classification_enhanced, make_hastie_10_2, make_multilabel_classification,
ClassificationConfig, MultilabelConfig, MultilabelDataset,
};
pub use generators::regression::{
make_friedman1, make_friedman2, make_friedman3, make_low_rank_matrix, make_sparse_uncorrelated,
};
pub use generators::structured::{
make_biclusters, make_checkerboard, make_sparse_coded_signal, make_sparse_spd_matrix,
make_spd_matrix,
};
pub use generators::concept_drift::{
detect_drift_accuracy, make_concept_drift, ConceptDriftConfig, ConceptDriftDataset, DriftType,
};
pub use generators::heterogeneous::{
encode_one_hot, make_heterogeneous, FeatureType, HeteroConfig, HeteroDataset,
HeteroFeatureValue,
};
pub use generators::low_rank::{
make_low_rank as make_low_rank_completion, observed_rmse, reconstruction_error, LowRankConfig,
LowRankDataset,
};
pub use generators::multilabel_advanced::{
hamming_loss, label_cardinality, label_density_score, make_advanced_multilabel_classification,
AdvancedMultilabelConfig, AdvancedMultilabelDataset,
};
pub use generators::sparse_classification::{
make_sparse_classification as make_sparse_class, sparsity_ratio, SparseClassConfig,
SparseClassDataset,
};
pub use sharding::{merge_shards, shard_dataset, shuffled_shard, stratified_shard, DatasetShard};
pub use sampling::{iter_batches, MiniBatch, MiniBatchSampler, SamplerConfig, SamplerStrategy};
pub use gpu::{
get_optimal_gpu_config, is_cuda_available, is_opencl_available, list_gpu_devices,
make_blobs_auto_gpu, make_classification_auto_gpu, make_regression_auto_gpu, GpuBackend,
GpuBenchmark, GpuBenchmarkResults, GpuConfig, GpuContext, GpuDeviceInfo, GpuMemoryConfig,
};
pub use gpu_optimization::{
benchmark_advanced_performance, generate_advanced_matrix, AdvancedGpuOptimizer,
AdvancedKernelConfig, BenchmarkResult as AdvancedBenchmarkResult, DataLayout,
LoadBalancingMethod, MemoryAccessPattern, PerformanceBenchmarkResults, SpecializationLevel,
VectorizationStrategy,
};
pub use loaders::{
load_csv, load_csv_legacy, load_csv_parallel, load_csv_streaming, load_json, load_raw,
save_json, CsvConfig, DatasetChunkIterator, StreamingConfig,
};
pub use neuromorphic_data_processor::{
create_neuromorphic_processor, create_neuromorphic_processor_with_topology, NetworkTopology,
NeuromorphicProcessor, NeuromorphicTransform, SynapticPlasticity,
};
pub use quantum_enhanced_generators::{
make_quantum_blobs, make_quantum_classification, make_quantum_regression,
QuantumDatasetGenerator,
};
pub use quantum_neuromorphic_fusion::{
create_fusion_with_params, create_quantum_neuromorphic_fusion, QuantumBioFusionResult,
QuantumInterference, QuantumNeuromorphicFusion,
};
pub use real_world::{
list_real_world_datasets, load_adult, load_california_housing, load_heart_disease,
load_red_wine_quality, load_titanic, RealWorldConfig, RealWorldDatasets,
};
pub use registry::{get_registry, load_dataset_byname, DatasetMetadata, DatasetRegistry};
pub use sample::*;
pub use standard::{
load_boston as load_boston_full, load_breast_cancer as load_breast_cancer_full,
load_digits as load_digits_full, load_iris as load_iris_full, load_wine, DatasetResult,
};
pub use streaming::{
stream_classification, stream_csv, stream_regression, DataChunk, StreamConfig, StreamProcessor,
StreamStats, StreamTransformer, StreamingIterator,
};
pub use toy::*;
pub use utils::{
analyze_dataset_advanced, create_balanced_dataset, create_binned_features,
generate_synthetic_samples, importance_sample, k_fold_split, min_max_scale,
polynomial_features, quick_quality_assessment, random_oversample, random_sample,
random_undersample, robust_scale, statistical_features, stratified_k_fold_split,
stratified_sample, time_series_split, AdvancedDatasetAnalyzer, AdvancedQualityMetrics,
BalancingStrategy, BinningStrategy, CorrelationInsights, CrossValidationFolds, Dataset,
NormalityAssessment,
};
#[cfg(feature = "lazy-loading")]
pub use lazy_loading::{
from_binary as lazy_from_binary, from_binary_with_config as lazy_from_binary_with_config,
LazyChunkIterator, LazyDataset, LazyLoadConfig, MmapDataset,
};
#[cfg(feature = "augmentation")]
pub use augmentation::{
standard_image_augmentation, standard_tabular_augmentation, AugmentationPipeline, Brightness,
Contrast, GaussianNoise, HorizontalFlip, Mixup, RandomFeatureScale, RandomRotation90,
Transform, VerticalFlip,
};
pub use parallel_preprocessing::{
create_pipeline, create_pipeline_with_config, ParallelConfig, ParallelPipeline, PreprocessFn,
};
#[cfg(feature = "distributed")]
pub use distributed_loading::{
create_loader, create_loader_with_config, DistributedCache,
DistributedConfig as DistributedLoadingConfig, DistributedLoader, Shard,
};
pub use formats::{CompressionCodec, FormatConfig, FormatType};
#[cfg(feature = "formats")]
pub use formats::{
read_auto, read_hdf5, read_parquet, write_hdf5, write_parquet, FormatConverter, Hdf5Reader,
Hdf5Writer, ParquetReader, ParquetWriter,
};