torsh-data 0.1.2

Data loading and preprocessing utilities for ToRSh
Documentation
//! Data transformation and augmentation framework for ToRSh
//!
//! This module provides a comprehensive data transformation framework that supports
//! various preprocessing, augmentation, and data manipulation operations for machine
//! learning workflows.
//!
//! # Architecture
//!
//! The transformation framework is organized into specialized modules:
//!
//! - **Core Framework**: Basic transform traits, combinators, and builder patterns
//! - **Tensor Transforms**: Computer vision transformations for image and tensor data
//! - **Text Processing**: Natural language processing transformations and tokenization
//! - **Zero-Copy Operations**: Memory-efficient tensor operations and buffer management
//! - **Augmentation Pipeline**: Data augmentation pipelines for training robustness
//! - **Online Transforms**: Real-time, adaptive, and performance-aware transformations
//!
//! # Quick Start
//!
//! ```rust,ignore
//! use torsh_data::transforms::{Transform, TransformExt};
//! use torsh_data::core_framework::lambda;
//!
//! // Create a simple transform chain
//! let transform = lambda(|x: i32| Ok(x * 2))
//!     .then(lambda(|x: i32| Ok(x + 1)));
//!
//! let result = transform.transform(5).unwrap();
//! assert_eq!(result, 11); // (5 * 2) + 1
//! ```
//!
//! # Computer Vision Transformations
//!
//! ```rust,ignore
//! use torsh_data::tensor_transforms::*;
//! use torsh_data::augmentation_pipeline::*;
//!
//! // Create an augmentation pipeline
//! let pipeline = AugmentationPipeline::light_augmentation();
//! ```
//!
//! # Text Processing
//!
//! ```rust,ignore
//! use torsh_data::text_processing::*;
//!
//! // Create text preprocessing pipeline
//! let stemmer = PorterStemmer;
//! let ngrams = NGramGenerator::new(2);
//! ```
//!
//! # Zero-Copy Operations
//!
//! ```rust,ignore
//! use torsh_data::zero_copy::*;
//!
//! // Create tensor pool for memory efficiency
//! let pool = TensorPool::<f32>::new(1000);
//! ```
//!
//! # Online Augmentation
//!
//! ```rust,ignore
//! use torsh_data::online_transforms::*;
//! use torsh_data::transforms::{Transform, TransformExt};
//! use torsh_data::core_framework::lambda;
//!
//! // Create online augmentation engine
//! let transform = lambda(|x: i32| Ok(x * 2));
//! let engine = OnlineAugmentationEngine::new(transform).with_cache(500);
//! ```

// Re-export all specialized modules
pub use crate::augmentation_pipeline as augmentation;
pub use crate::core_framework;
pub use crate::online_transforms as online;
pub use crate::tensor_transforms as tensor;
pub use crate::text_processing as text;
pub use crate::zero_copy;

// NOTE: Advanced re-exports are available but currently commented out to maintain
// a stable minimal API. These can be enabled in future versions with proper testing.
// The minimal implementations above are sufficient for current usage patterns.
// pub use crate::core_framework::{
//     compose, lambda, normalize, to_type, Chain, Compose, Conditional, Lambda, Normalize, ToType,
//     Transform, TransformBuilder, TransformExt,
// };

// // Tensor transform re-exports
// pub use crate::tensor_transforms::{
//     BlurKernel, ColorJitter, Flip, FlipDirection, GaussianBlur, InterpolationMode, RandomCrop,
//     RandomGrayscale, RandomHorizontalFlip, RandomRotation, Reshape, Resize, RotationMode,
//     Transpose,
// };

// // Text processing re-exports
// pub use crate::text_processing::{
//     CaseMode, CaseTransform, FilterByLength, FilterCriterion, NGramGenerator, PaddingStrategy,
//     PorterStemmer, RemovePunctuation, RemoveStopwords, SequencePadding, TextNormalizer,
//     TokenFilter, Tokenizer,
// };

// // Zero-copy re-exports
// pub use crate::zero_copy::{
//     BufferManager, MappingOptions, MemoryMapper, PoolConfig, TensorPool, TensorView, TensorViewMut,
//     ViewError, ZeroCopySlice, ZeroCopyTensor,
// };

// // Augmentation pipeline re-exports
// pub use crate::augmentation_pipeline::{
//     AugmentationPipeline, ConditionalTransform, GaussianNoise, RandomBrightness, RandomContrast,
//     RandomErasing, RandomHue, RandomSaturation, RandomVerticalFlip,
// };

// // Online transforms re-exports
// pub use crate::online_transforms::{
//     AdaptiveAugmentation, AugmentationQueue, AugmentationStats, DynamicAugmentationStrategy,
//     OnlineAugmentationEngine, ProgressionMode, ProgressiveAugmentation, StrategyConfig,
// };

// Minimal working implementations for Transform types
// NOTE: These are intentionally lightweight implementations. Fuller implementations
// exist in core_framework.rs but are not currently integrated to maintain API stability.
// Future enhancement: Consider migrating to core_framework implementations with proper testing.

use torsh_core::error::Result;

/// Core transform trait - all transformations must implement this
pub trait Transform<T>: Send + Sync {
    type Output;

    /// Apply the transformation to the input
    fn transform(&self, input: T) -> Result<Self::Output>;

    /// Check if the transform is deterministic
    ///
    /// A deterministic transform always produces the same output for the same input.
    /// Non-deterministic transforms include random augmentations.
    fn is_deterministic(&self) -> bool {
        true
    }
}

/// Extension trait providing composition and chaining operations
pub trait TransformExt<T>: Transform<T> {
    /// Chain this transform with another
    fn then<U: Transform<Self::Output>>(self, other: U) -> Chain<Self, U>
    where
        Self: Sized,
    {
        Chain {
            first: self,
            second: other,
        }
    }
}

impl<T, U: Transform<T>> TransformExt<T> for U {}

/// Builder pattern for creating complex transformations
pub struct TransformBuilder<T> {
    _phantom: std::marker::PhantomData<T>,
}

/// Chain two transforms together
#[derive(Debug, Clone)]
pub struct Chain<T, U> {
    first: T,
    second: U,
}

unsafe impl<T: Send, U: Send> Send for Chain<T, U> {}
unsafe impl<T: Sync, U: Sync> Sync for Chain<T, U> {}

impl<T, U, V> Transform<T> for Chain<U, V>
where
    U: Transform<T>,
    V: Transform<U::Output>,
{
    type Output = V::Output;

    fn transform(&self, input: T) -> Result<Self::Output> {
        let intermediate = self.first.transform(input)?;
        self.second.transform(intermediate)
    }
}

/// Compose multiple transforms
#[derive(Debug, Clone)]
pub struct Compose<T> {
    _phantom: std::marker::PhantomData<T>,
}

unsafe impl<T: Send> Send for Compose<T> {}
unsafe impl<T: Sync> Sync for Compose<T> {}

/// Conditional transform application
#[derive(Debug, Clone)]
pub struct Conditional<T> {
    _phantom: std::marker::PhantomData<T>,
}

unsafe impl<T: Send> Send for Conditional<T> {}
unsafe impl<T: Sync> Sync for Conditional<T> {}

/// Lambda transform wrapper
#[derive(Debug, Clone)]
pub struct Lambda<F> {
    func: F,
}

unsafe impl<F: Send> Send for Lambda<F> {}
unsafe impl<F: Sync> Sync for Lambda<F> {}

impl<F, T, R> Transform<T> for Lambda<F>
where
    F: Fn(T) -> Result<R> + Send + Sync,
{
    type Output = R;

    fn transform(&self, input: T) -> Result<Self::Output> {
        (self.func)(input)
    }
}

/// Normalization transform
#[derive(Debug, Clone)]
pub struct Normalize<T> {
    _phantom: std::marker::PhantomData<T>,
}

unsafe impl<T: Send> Send for Normalize<T> {}
unsafe impl<T: Sync> Sync for Normalize<T> {}

/// Type conversion transform
#[derive(Debug, Clone)]
pub struct ToType<T> {
    _phantom: std::marker::PhantomData<T>,
}

unsafe impl<T: Send> Send for ToType<T> {}
unsafe impl<T: Sync> Sync for ToType<T> {}

/// Convenience function to create lambda transforms
pub fn lambda<F, T, R>(func: F) -> Lambda<F>
where
    F: Fn(T) -> Result<R> + Send + Sync,
{
    Lambda { func }
}

/// Prelude module for convenient importing of common transform types
pub mod prelude {
    pub use super::{lambda, Transform, TransformExt};
    // NOTE: Additional convenience imports available but not yet enabled:
    // pub use crate::augmentation_pipeline::AugmentationPipeline;
    // pub use crate::core_framework::{lambda, Transform, TransformExt};
    // pub use crate::online_transforms::OnlineAugmentationEngine;
    // pub use crate::tensor_transforms::{RandomCrop, RandomHorizontalFlip, Resize};
    // pub use crate::text_processing::{NGramGenerator, PorterStemmer, Tokenizer};
    // pub use crate::zero_copy::{TensorPool, ZeroCopyTensor};
}

/// Common transform utilities and factory functions
pub mod utils {
    // NOTE: Additional utilities can be enabled when needed with proper testing
    // use super::*;
    // use torsh_core::dtype::TensorElement;
    // use torsh_tensor::Tensor;

    // /// Create a standard computer vision preprocessing pipeline
    // pub fn vision_preprocessing_pipeline<T: TensorElement>() -> Compose<Tensor<T>> {
    //     let mut pipeline = Compose::new(vec![]);
    //     // Add common vision preprocessing transforms here
    //     pipeline
    // }

    // /// Create a standard text preprocessing pipeline
    // pub fn text_preprocessing_pipeline() -> Compose<String> {
    //     let mut pipeline = Compose::new(vec![]);
    //     // Add common text preprocessing transforms here
    //     pipeline
    // }

    // /// Create a memory-efficient tensor processing pipeline
    // pub fn efficient_tensor_pipeline<T: TensorElement + Clone>() -> TensorPool<T> {
    //     TensorPool::new(1000) // Default pool size
    // }

    // /// Create a basic augmentation pipeline for training
    // pub fn basic_training_augmentation() -> AugmentationPipeline<Tensor<f32>> {
    //     AugmentationPipeline::light_augmentation()
    // }

    // /// Create an advanced augmentation pipeline for training
    // pub fn advanced_training_augmentation() -> AugmentationPipeline<Tensor<f32>> {
    //     AugmentationPipeline::heavy_augmentation()
    // }

    // /// Create an online augmentation engine with caching
    // pub fn cached_augmentation_engine<T: Clone + Send + Sync + 'static>(
    //     pipeline: impl Transform<T, Output = T> + Send + Sync + 'static,
    //     cache_size: usize,
    // ) -> OnlineAugmentationEngine<T> {
    //     OnlineAugmentationEngine::new(pipeline).with_cache(cache_size)
    // }
}

// NOTE: Additional transform tests can be enabled when needed
// #[cfg(test)]
// mod tests {
// use super::*;
// use torsh_core::device::DeviceType;
// use torsh_tensor::Tensor;

// // Mock tensor for testing
// fn mock_tensor() -> Tensor<f32> {
//     Tensor::from_data(vec![1.0f32, 2.0, 3.0, 4.0], vec![2, 2], DeviceType::Cpu).unwrap()
// }

// #[test]
// fn test_transform_chain() {
//     let transform = lambda(|x: i32| Ok(x * 2)).then(lambda(|x: i32| Ok(x + 1)));

//     let result = transform.transform(5).unwrap();
//     assert_eq!(result, 11); // (5 * 2) + 1
// }

// All tests commented out until transform modules are implemented
// }