#![warn(unsafe_code)]
#![allow(unexpected_cfgs)]
#![allow(clippy::result_large_err)]
#![allow(clippy::too_many_arguments)]
#![allow(clippy::let_and_return)]
#![allow(clippy::should_implement_trait)]
#![allow(clippy::erasing_op)]
#![allow(clippy::identity_op)]
#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(unused_mut)]
#![allow(dead_code)]
#![allow(clippy::clone_on_copy)]
#![allow(clippy::multiple_bound_locations)]
#![allow(clippy::iter_cloned_collect)]
#![allow(clippy::collapsible_else_if)]
#![allow(clippy::type_complexity)]
#![allow(clippy::borrowed_box)]
#![allow(clippy::derivable_impls)]
pub use tenflowers_core::{Result, TensorError};
pub mod active_learning;
pub mod adaptive_prefetch;
pub mod advanced_benchmarks;
pub mod advanced_sampling;
pub mod attention_optimized;
pub mod benchmarks;
pub mod cache;
pub mod config;
pub mod data_quality;
pub mod dataloader;
pub mod dataset_core;
pub mod debug_tools;
pub mod distributed_loading;
pub mod distributed_sharding;
pub mod distributed_streaming;
pub mod enhanced_dataloader;
pub mod error_taxonomy;
pub mod federated;
pub mod formats;
pub mod gpu_transforms;
pub mod memory_pool;
pub mod multimodal;
pub mod numa_scheduler;
pub mod online_learning;
pub mod predictive_prefetch;
#[cfg(feature = "download")]
pub mod real_datasets;
pub mod reproducibility;
pub mod schema_inference;
pub mod simd_transforms;
pub mod smart_cache;
pub mod statistics;
pub mod stream_prefetch_optimizer;
pub mod streaming_optimized;
pub mod synthetic;
pub mod throughput_benchmark;
pub mod transforms;
pub mod validation;
pub mod versioning;
pub mod visualization;
pub mod work_stealing;
pub mod zero_copy;
pub use data_quality::{
compute_drift, jensen_shannon_divergence, ks_two_sample, population_stability_index,
DataQualityAnalyzer, DataQualityExt, DataQualityIssue, DataQualityMetrics,
DriftDetectionConfig, DriftDetectionResult, DriftReport, DriftType, IssueCategory,
IssueSeverity, OutlierDetectionMethod, QualityAnalysisConfig, StatisticalTest,
};
pub use dataloader::{
BatchResult, BucketCollate, CollateFn, DataLoader, DataLoaderBuilder, DataLoaderConfig,
DefaultCollate, DistributedSampler, ImportanceSampler, PaddingCollate, PaddingStrategy,
RandomSampler, Sampler, SequentialSampler, StratifiedSampler,
};
pub use debug_tools::{
Bottleneck, BottleneckCategory, ConsistencyReport, DatasetDebugger, EventType,
InspectablePipeline, InspectionEvent, PipelineInspectionReport, PipelineProfiler, ProfileEvent,
ProfileReport, ProfilerConfig, SampleInfo as DebugSampleInfo, Severity, StageStatistics,
StageTimer,
};
pub use enhanced_dataloader::{
EnhancedDataLoader, EnhancedDataLoaderBuilder, LoaderStats, WorkerStats,
};
pub use error_taxonomy::{
classification, helpers as error_helpers, DatasetErrorBuilder, DatasetErrorCategory,
DatasetErrorContext,
};
pub use formats::common::{MissingValueStrategy, NamingPattern};
pub use formats::csv::{ChunkedCsvDataset, CsvChunk, CsvDataset, CsvDatasetBuilder};
pub use formats::image::{
image_folder_dataset_with_transform, ImageFolderConfig, ImageFolderDataset,
ImageFolderDatasetBuilder,
};
pub use formats::registry::{
global as format_registry, register_format_factory, FormatInfo, GlobalFormatRegistry,
};
pub use formats::schema_validator::{
FieldDiff, SchemaValidator, ValidationPolicy, ValidationReport as SchemaValidationReport,
};
pub use transforms::{
AddNoise, BackgroundNoise, DatasetExt, GaussianNoise, GlobalNormalize, MinMaxScale, NoiseType,
Normalize, PerChannelNormalize, RealTimeAudioAugmentation, RobustScaler, Transform,
TransformedDataset,
};
#[cfg(feature = "serialize")]
pub use formats::json::{
JsonConfig, JsonDataset, JsonDatasetBuilder, JsonDatasetInfo, JsonLDataset,
};
pub use formats::text::{
LabelStrategy, TextConfig, TextDataset, TextDatasetBuilder, TextDatasetInfo,
TokenizationStrategy, TokenizedDataset, Vocabulary,
};
pub use active_learning::{
ActiveLearningDataset, ActiveLearningSampler, DiversityStrategy, LabeledSubset,
UncertaintyStrategy, UnlabeledSubset,
};
pub use adaptive_prefetch::{
AdaptationStrategy, AdaptivePrefetchPolicy, AdaptivePrefetchTuner, PidAdaptiveController,
PrefetchMetrics as AdaptivePrefetchMetrics, TuningDecision,
};
pub use advanced_benchmarks::{
AdvancedBenchmarkSuite, BenchmarkConfig, BenchmarkResult, CpuStats, GpuStats, MemoryStats,
MemoryTracker as BenchmarkMemoryTracker, SystemInfo, ThroughputStats, TimingStats,
};
pub use advanced_sampling::{
AdvancedImportanceSampler, BalancingStrategy, ClassBalancedSampler, CurriculumScheduler,
CurriculumStrategy, HardNegativeMiner, MiningStrategy,
};
pub use attention_optimized::{
AttentionOptimizedConfig, AttentionOptimizedDataset, AttentionOptimizedDatasetBuilder,
AttentionPattern, AttentionSequence, SequenceMetadata as AttentionSequenceMetadata,
};
pub use benchmarks::{BenchmarkDatasets, CifarDataset, DatasetInfo, IrisDataset, MnistDataset};
pub use cache::{
AggregatedStats, AlertSeverity, AlertThresholds, AlertType, CacheEvent, CacheEventType,
CacheExt, CacheStats, CacheTelemetryCollector, CacheTelemetryMetrics, CachedDataset,
EnhancedTelemetryCollector, LruCache, MetricsSnapshot, PerformanceAlert, PerformanceBaselines,
TelemetryConfig, ThreadSafeLruCache, WarmingStrategy,
};
#[cfg(feature = "serialize")]
pub use cache::{PersistentCache, PersistentlyCachedDataset, TensorPersistentCache};
pub use distributed_loading::{
create_distributed_dataloader, CollectiveOpType, CommunicationManager,
DistributedLoadingConfig, DistributedLoadingStats, DistributedMessage,
EnhancedDistributedSampler, NodeInfo,
};
pub use distributed_sharding::{
DatasetShardingExt, ShardConfig, ShardStatistics, ShardStrategy, ShardableDataset,
ShardedDataset,
};
pub use distributed_streaming::{
CheckpointState, PartitionStrategy, StreamCoordinator, StreamingConfig, StreamingShardIterator,
StreamingShardLoader, StreamingStats, WorkerHealth, WorkerMetrics, WorkerStatus,
};
pub use federated::{
AggregationStrategy, ClientConfig, ClientId, ClientIndexedDataset, ClientStats,
DataDistribution, FederatedAggregator, FederatedClientDataset, FederatedDatasetExt,
FederatedFeatureStats, FederatedPartitioner, NoiseMechanism, PartitioningStrategy,
PrivacyConfig, PrivacyManager, PrivateStats, QualityMetrics,
};
#[cfg(feature = "parquet")]
pub use formats::arrow::{
ArrowArrayExt, ArrowConfig, ArrowDataset, ArrowDatasetBuilder, ArrowFormatFactory,
ArrowFormatReader, ArrowTensorView,
};
#[cfg(feature = "audio")]
pub use formats::audio::{
AudioConfig, AudioDataset, AudioDatasetBuilder, AudioDatasetInfo, AudioInfo,
AudioLabelStrategy, FeatureType as AudioFeatureType,
};
#[cfg(feature = "hdf5")]
pub use formats::hdf5::{HDF5Config, HDF5Dataset, HDF5DatasetBuilder, HDF5DatasetInfo};
#[cfg(feature = "parquet")]
pub use formats::parquet::{
ParquetConfig, ParquetDataset, ParquetDatasetBuilder, ParquetDatasetInfo,
};
#[cfg(feature = "tfrecord")]
pub use formats::tfrecord::{
Feature, FeatureInfo, FeatureType, TFRecord, TFRecordConfig, TFRecordDataset,
TFRecordDatasetBuilder, TFRecordDatasetInfo,
};
#[cfg(feature = "webdataset")]
pub use formats::webdataset::{
StreamingWebDataset, WebDataset, WebDatasetBuilder, WebDatasetConfig, WebDatasetSample,
};
pub use formats::zarr::{
ZarrArrayInfo, ZarrCompressionType, ZarrConfig, ZarrDataset, ZarrDatasetBuilder, ZarrDatasetExt,
};
#[cfg(feature = "cloud")]
pub use formats::zarr::CloudBackend;
pub use gpu_transforms::{
GpuColorJitter, GpuContext, GpuGaussianBlur, GpuGaussianNoise, GpuRandomCrop,
GpuRandomHorizontalFlip, GpuResize, GpuRotation,
};
pub use memory_pool::{GlobalMemoryPool, MemoryPool, MemoryPoolExt, PoolStats, PooledMemory};
pub use multimodal::{
FusionStrategy, Modality, MultimodalConfig, MultimodalDataset, MultimodalDatasetBuilder,
MultimodalSample, MultimodalTransform, MultimodalTransformedDataset,
};
pub use numa_scheduler::{
NumaAssignmentStats, NumaAssignmentStrategy, NumaConfig, NumaNode, NumaScheduler, NumaTopology,
NumaWorkerAssignment,
};
pub use online_learning::{
ADWINDetector, DriftDetectionMethod, DriftDetector, ErrorRateDetector, KSDetector,
OnlineLearningConfig, OnlineLearningDataset, OnlineStats, PageHinkleyDetector,
};
pub use predictive_prefetch::{
AccessPattern, AccessStats, PredictivePrefetchDataset, PredictivePrefetcher, PrefetchConfig,
};
#[cfg(feature = "download")]
pub use real_datasets::{
AgNewsConfig, Cifar10Config, ImageNetConfig, ImdbConfig, MnistConfig, RealAgNewsBuilder,
RealAgNewsDataset, RealCifar10Builder, RealCifar10Dataset, RealImageNetBuilder,
RealImageNetDataset, RealImdbBuilder, RealImdbDataset, RealMnistBuilder, RealMnistDataset,
};
pub use reproducibility::{
DatasetConfig, DeterministicDataset, DeterministicOps, DeterministicOrdering, EnvironmentInfo,
ExperimentConfig, ExperimentTracker, OperationRecord, OrderingStrategy, ReproducibilityExt,
SamplingConfig, SeedInfo, SeedManager, TransformConfig,
};
pub use schema_inference::{
FieldStatistics, InferenceConfig, InferredDataType, InferredField, InferredSchema,
SchemaInferenceEngine,
};
pub use simd_transforms::{
BenchmarkResult as SimdBenchmarkResult, SimdBenchmark, SimdColorConvert, SimdConvolution,
SimdElementWise, SimdHistogram, SimdHistogramTransform, SimdMatrixOps, SimdNormalize,
SimdOperation, SimdStats,
};
pub use smart_cache::{
AccessPatternPredictor, CacheConfig, CacheLevel, EvictionPolicy, PredictiveSmartCache,
SmartCache, SmartCachedDataset,
};
pub use statistics::{
AdvancedStatistics, AdvancedStatisticsExt, CorrelationAnalyzer, DatasetStatisticsComputer,
DatasetStatisticsExt, DatasetStats, Histogram, MultivariateStatistics, PCAResult,
StatisticsConfig,
};
pub use stream_prefetch_optimizer::{
AccessEvent, AccessPatternAnalyzer, AccessType, PatternPrediction, PatternSignature,
PrefetchMetrics, PrefetchOptimizerConfig, StreamPrefetchOptimizer,
};
pub use streaming_optimized::{
AdaptiveBuffer, CompressionType, StreamingOptimizedConfig, StreamingOptimizedDataset,
StreamingOptimizedDatasetBuilder, StreamingOptimizedIterator,
StreamingStats as OptimizedStreamingStats,
};
pub use synthetic::{
ContrastiveLearningDataset, DatasetGenerator, Episode, FewShotDataset, GeometricShape,
GradientDirection, ImagePatternConfig, ImagePatternGenerator, ImagePatternType,
MetaLearningDataset, ModernMLConfig, NoiseDistribution, SelfSupervisedDataset,
StripeOrientation, SyntheticConfig, SyntheticDataset, SyntheticTextCorpus, TaskDataset,
TextCorpusConfig, TextSynthesisTask, TimeSeriesPattern,
};
pub use throughput_benchmark::{
MemoryStats as ThroughputMemoryStats, ThreadStats as ThroughputThreadStats,
ThroughputBenchmarkConfig, ThroughputBenchmarkHarness, ThroughputBenchmarkResult,
};
pub use validation::{
DataValidator, DatasetValidationExt, RangeConstraint, SchemaInfo, ValidationConfig,
ValidationResult,
};
pub use versioning::{
DatasetLineage, DatasetSizeInfo, DatasetVersionManager, LineageTree, TransformationRecord,
VersionId, VersionMetadata, VersionedDataset,
};
pub use visualization::{
ClassDistribution, DatasetVisualizationExt, DatasetVisualizer, DistributionInfo,
FeatureHistogram, FeatureStats, SampleInfo, SamplePreview,
};
pub use work_stealing::WorkStealingQueue;
pub use zero_copy::{MemoryMappedDataset, TensorView, ZeroCopyDataset};
#[cfg(feature = "mmap")]
pub use zero_copy::{MemoryMappedFileDataset, MemoryMappedFileStats};
pub use dataset_core::{
BatchedDataset, ConcatDataset, Dataset, DatasetSplit, DatasetSplitter, DatasetUtilsExt,
FilteredDataset, MergeStrategy, MergedDataset, SubsetDataset, TensorDataset,
};