Skip to main content

axonml_data/
lib.rs

1//! axonml-data - Data Loading Utilities
2//!
3//! # File
4//! `crates/axonml-data/src/lib.rs`
5//!
6//! # Author
7//! Andrew Jewell Sr - AutomataNexus
8//!
9//! # Updated
10//! March 8, 2026
11//!
12//! # Disclaimer
13//! Use at own risk. This software is provided "as is", without warranty of any
14//! kind, express or implied. The author and AutomataNexus shall not be held
15//! liable for any damages arising from the use of this software.
16
17#![warn(missing_docs)]
18#![warn(clippy::all)]
19#![warn(clippy::pedantic)]
20// ML/tensor-specific allowances
21#![allow(clippy::cast_possible_truncation)]
22#![allow(clippy::cast_sign_loss)]
23#![allow(clippy::cast_precision_loss)]
24#![allow(clippy::cast_possible_wrap)]
25#![allow(clippy::missing_errors_doc)]
26#![allow(clippy::missing_panics_doc)]
27#![allow(clippy::must_use_candidate)]
28#![allow(clippy::module_name_repetitions)]
29#![allow(clippy::similar_names)]
30#![allow(clippy::many_single_char_names)]
31#![allow(clippy::too_many_arguments)]
32#![allow(clippy::doc_markdown)]
33#![allow(clippy::cast_lossless)]
34#![allow(clippy::needless_pass_by_value)]
35#![allow(clippy::redundant_closure_for_method_calls)]
36#![allow(clippy::uninlined_format_args)]
37#![allow(clippy::ptr_arg)]
38#![allow(clippy::return_self_not_must_use)]
39#![allow(clippy::not_unsafe_ptr_arg_deref)]
40#![allow(clippy::items_after_statements)]
41#![allow(clippy::unreadable_literal)]
42#![allow(clippy::if_same_then_else)]
43#![allow(clippy::needless_range_loop)]
44#![allow(clippy::trivially_copy_pass_by_ref)]
45#![allow(clippy::unnecessary_wraps)]
46#![allow(clippy::match_same_arms)]
47#![allow(clippy::unused_self)]
48#![allow(clippy::too_many_lines)]
49#![allow(clippy::single_match_else)]
50#![allow(clippy::fn_params_excessive_bools)]
51#![allow(clippy::struct_excessive_bools)]
52#![allow(clippy::format_push_string)]
53#![allow(clippy::erasing_op)]
54#![allow(clippy::type_repetition_in_bounds)]
55#![allow(clippy::iter_without_into_iter)]
56#![allow(clippy::should_implement_trait)]
57#![allow(clippy::use_debug)]
58#![allow(clippy::case_sensitive_file_extension_comparisons)]
59#![allow(clippy::large_enum_variant)]
60#![allow(clippy::panic)]
61#![allow(clippy::struct_field_names)]
62#![allow(clippy::missing_fields_in_debug)]
63#![allow(clippy::upper_case_acronyms)]
64#![allow(clippy::assigning_clones)]
65#![allow(clippy::option_if_let_else)]
66#![allow(clippy::manual_let_else)]
67#![allow(clippy::explicit_iter_loop)]
68#![allow(clippy::default_trait_access)]
69#![allow(clippy::only_used_in_recursion)]
70#![allow(clippy::manual_clamp)]
71#![allow(clippy::ref_option)]
72#![allow(clippy::multiple_bound_locations)]
73#![allow(clippy::comparison_chain)]
74#![allow(clippy::manual_assert)]
75#![allow(clippy::unnecessary_debug_formatting)]
76
77// =============================================================================
78// Module Declarations
79// =============================================================================
80
81pub mod collate;
82pub mod dataloader;
83pub mod dataset;
84pub mod sampler;
85pub mod transforms;
86
87// =============================================================================
88// Re-exports
89// =============================================================================
90
91pub use collate::{Collate, DefaultCollate, StackCollate};
92pub use dataloader::{Batch, DataLoader, DataLoaderIter, GpuPrefetchIter};
93pub use dataset::{
94    ConcatDataset, Dataset, InMemoryDataset, MapDataset, SubsetDataset, TensorDataset,
95};
96pub use sampler::{
97    BatchSampler, RandomSampler, Sampler, SequentialSampler, SubsetRandomSampler,
98    WeightedRandomSampler,
99};
100pub use transforms::{Compose, Normalize, RandomNoise, ToTensor, Transform};
101
102// =============================================================================
103// Prelude
104// =============================================================================
105
106/// Common imports for data loading.
107pub mod prelude {
108    pub use crate::{
109        Batch, BatchSampler, Collate, Compose, ConcatDataset, DataLoader, DataLoaderIter, Dataset,
110        DefaultCollate, GpuPrefetchIter, InMemoryDataset, MapDataset, Normalize, RandomNoise,
111        RandomSampler, Sampler, SequentialSampler, StackCollate, SubsetDataset,
112        SubsetRandomSampler, TensorDataset, ToTensor, Transform, WeightedRandomSampler,
113    };
114    pub use axonml_tensor::Tensor;
115}
116
117// =============================================================================
118// Tests
119// =============================================================================
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124    use axonml_tensor::Tensor;
125
126    #[test]
127    fn test_tensor_dataset() {
128        let x = Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[3, 2]).unwrap();
129        let y = Tensor::from_vec(vec![0.0, 1.0, 0.0], &[3]).unwrap();
130        let dataset = TensorDataset::new(x, y);
131
132        assert_eq!(dataset.len(), 3);
133        let (x_item, y_item) = dataset.get(0).unwrap();
134        assert_eq!(x_item.to_vec(), vec![1.0, 2.0]);
135        assert_eq!(y_item.to_vec(), vec![0.0]);
136    }
137
138    #[test]
139    fn test_dataloader_basic() {
140        let x = Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[6, 1]).unwrap();
141        let y = Tensor::from_vec(vec![0.0, 1.0, 0.0, 1.0, 0.0, 1.0], &[6]).unwrap();
142        let dataset = TensorDataset::new(x, y);
143        let loader = DataLoader::new(dataset, 2);
144
145        let batches: Vec<_> = loader.iter().collect();
146        assert_eq!(batches.len(), 3); // 6 items / 2 batch_size = 3 batches
147    }
148
149    #[test]
150    fn test_dataloader_shuffle() {
151        let x = Tensor::from_vec((0..100).map(|i| i as f32).collect(), &[100, 1]).unwrap();
152        let y = Tensor::from_vec((0..100).map(|i| i as f32).collect(), &[100]).unwrap();
153        let dataset = TensorDataset::new(x, y);
154        let loader = DataLoader::new(dataset, 10).shuffle(true);
155
156        // Collect first batch from two iterations - they should differ if shuffled
157        let batch1: Vec<_> = loader.iter().take(1).collect();
158        let batch2: Vec<_> = loader.iter().take(1).collect();
159
160        // Due to randomness, we can't guarantee they're different,
161        // but at least verify the loader works
162        assert!(!batch1.is_empty());
163        assert!(!batch2.is_empty());
164    }
165
166    #[test]
167    fn test_transform_compose() {
168        let normalize = Normalize::new(0.0, 1.0);
169        let noise = RandomNoise::new(0.0);
170        let transform = Compose::new(vec![Box::new(normalize), Box::new(noise)]);
171
172        let input = Tensor::from_vec(vec![1.0, 2.0, 3.0], &[3]).unwrap();
173        let output = transform.apply(&input);
174        assert_eq!(output.shape(), &[3]);
175    }
176
177    #[test]
178    fn test_samplers() {
179        let sequential = SequentialSampler::new(10);
180        let indices: Vec<_> = sequential.iter().collect();
181        assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
182
183        let random = RandomSampler::new(10);
184        let indices: Vec<_> = random.iter().collect();
185        assert_eq!(indices.len(), 10);
186    }
187}