1use std::ops::Range;
4
5use arrow2::{array::Array, chunk::Chunk, datatypes::DataType};
6use sample_std::{sample_all, Chained, Sample, VecSampler};
7
8use crate::{array::ArbitraryArray, datatypes::DataTypeSampler};
9
10pub type ChainedChunk = Chained<(Vec<DataType>, usize), Chunk<Box<dyn Array>>>;
11pub type ChunkSampler = Box<dyn Sample<Output = ChainedChunk> + Send + Sync>;
12
13pub type ChainedMultiChunk = Chained<(Vec<DataType>, Vec<usize>), Vec<Chunk<Box<dyn Array>>>>;
14pub type MultiChunkSampler = Box<dyn Sample<Output = ChainedMultiChunk> + Send + Sync>;
15
16pub struct ArbitraryChunk<N, V> {
17 pub chunk_len: Range<usize>,
18 pub array_count: Range<usize>,
19 pub data_type: DataTypeSampler,
20 pub array: ArbitraryArray<N, V>,
21}
22
23impl<N, V> ArbitraryChunk<N, V>
24where
25 N: Sample<Output = String> + Send + Sync + Clone + 'static,
26 V: Sample<Output = bool> + Send + Sync + Clone + 'static,
27{
28 pub fn sample_one(self) -> ChunkSampler {
29 Box::new(
30 VecSampler {
31 length: self.array_count,
32 el: self.data_type,
33 }
34 .zip(self.chunk_len)
35 .chain_resample(move |seed| Self::from_seed(&self.array, seed), 100),
36 )
37 }
38
39 pub fn sample_many(self, chunk_count: Range<usize>) -> MultiChunkSampler {
40 Box::new(
41 VecSampler {
42 length: self.array_count,
43 el: self.data_type,
44 }
45 .zip(VecSampler {
46 length: chunk_count,
47 el: self.chunk_len,
48 })
49 .chain_resample(
50 move |(dts, lens)| {
51 sample_all(
52 lens.into_iter()
53 .map(|len| Self::from_seed(&self.array, (dts.clone(), len)))
54 .collect(),
55 )
56 },
57 100,
58 ),
59 )
60 }
61
62 pub fn from_seed(
63 array: &ArbitraryArray<N, V>,
64 seed: (Vec<DataType>, usize),
65 ) -> Box<dyn Sample<Output = Chunk<Box<dyn Array>>> + Send + Sync> {
66 let (dts, len) = seed;
67 Box::new(
68 sample_all(
69 dts.into_iter()
70 .map(|data_type| array.with_len(len).sampler_from_data_type(&data_type))
71 .collect(),
72 )
73 .try_convert(Chunk::new, |chunk| Some(chunk.to_vec())),
74 )
75 }
76}