ai_dataloader/indexable/dataset.rs
1use std::collections::VecDeque;
2
3mod len;
4pub use len::Len;
5mod ndarray_dataset;
6pub use ndarray_dataset::NdarrayDataset;
7mod get_sample;
8pub use get_sample::GetSample;
9
10/// A dataset is just something that has a length and is indexable.
11/// A `Vec` of `dataset` collate output must also be collatable.
12///
13/// We use a custom [`GetSample`] trait instead of `std::ops::Index` because
14/// it provides more flexibility.
15/// Indeed we could have provided this implementation:
16///
17/// ```
18/// use ai_dataloader::collate::Collate;
19/// use ai_dataloader::Len;
20///
21/// pub trait Dataset<T>: Len + std::ops::Index<usize>
22/// where
23/// T: Collate<Vec<Self::Output>>,
24/// Self::Output: Sized,
25/// {
26/// }
27/// ```
28/// But as `Index::Output` must refer as something exist, it will not cover most of our use cases.
29/// For instance if the dataset is something like that:
30/// ```
31/// struct Dataset {
32/// labels: Vec<i32>,
33/// texts: Vec<String>,
34/// }
35/// ```
36/// And we want to return a tuple (label, text) when indexing, it will no be possible with `std:ops::Index`.
37pub trait Dataset: Len + GetSample {}
38
39/// Dataset could become something like that when functor trait will be available.
40#[doc(hidden)]
41trait FunctorDataset<F>: Len + GetSample
42where
43 F: Fn(Vec<Self::Sample>) -> Self::CollateOutput,
44{
45 type CollateOutput;
46}
47
48impl<T> Dataset for Vec<T> where T: Clone {}
49impl<T> Dataset for VecDeque<T> where T: Clone {}