1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
use std::collections::VecDeque;

mod len;
pub use len::Len;
mod ndarray_dataset;
pub use ndarray_dataset::NdarrayDataset;
mod get_sample;
pub use get_sample::GetSample;

/// A dataset is just something that has a length and is indexable.
/// A `Vec` of `dataset` collate output must also be collatable.
///
/// We use a custom [`GetSample`] trait instead of `std::ops::Index` because
/// it provides more flexibility.
/// Indeed we could have provided this implementation:
///
/// ```
/// use ai_dataloader::collate::Collate;
/// use ai_dataloader::Len;
///
/// pub trait Dataset<T>: Len + std::ops::Index<usize>
/// where
/// T: Collate<Vec<Self::Output>>,
/// Self::Output: Sized,
/// {
/// }
/// ```
/// But as `Index::Output` must refer as something exist, it will not cover most of our use cases.
/// For instance if the dataset is something like that:
/// ```
/// struct Dataset {
///     labels: Vec<i32>,
///     texts: Vec<String>,
/// }
/// ```
/// And we want to return a tuple (label, text) when indexing, it will no be possible with `std:ops::Index`.
pub trait Dataset: Len + GetSample {}

/// Dataset could become something like that when functor trait will be available.
#[doc(hidden)]
trait FunctorDataset<F>: Len + GetSample
where
    F: Fn(Vec<Self::Sample>) -> Self::CollateOutput,
{
    type CollateOutput;
}

impl<T> Dataset for Vec<T> where T: Clone {}
impl<T> Dataset for VecDeque<T> where T: Clone {}