sample-arrow2 0.17.2

Samplers for arrow2 for use with sample-test
Documentation
//! Samplers for generating an arrow [`DataType`].

use arrow2::datatypes::{DataType, Field};
use sample_std::{sampler_choice, Always, Random, Sample, VecSampler};

pub type DataTypeSampler = Box<dyn Sample<Output = DataType> + Send + Sync>;

struct FieldSampler<N, V> {
    names: N,
    nullable: V,
    inner: DataTypeSampler,
}

impl<N, V> Sample for FieldSampler<N, V>
where
    N: Sample<Output = String>,
    V: Sample<Output = bool>,
{
    type Output = Field;

    fn generate(&mut self, g: &mut Random) -> Self::Output {
        Field::new(
            self.names.generate(g),
            self.inner.generate(g),
            self.nullable.generate(g),
        )
    }
}

struct StructDataTypeSampler<S, F> {
    size: S,
    field: F,
}

impl<S, F> Sample for StructDataTypeSampler<S, F>
where
    S: Sample<Output = usize>,
    F: Sample<Output = Field>,
{
    type Output = DataType;

    fn generate(&mut self, g: &mut Random) -> Self::Output {
        let size = self.size.generate(g);
        DataType::Struct((0..size).map(|_| self.field.generate(g)).collect())
    }
}

pub fn sample_flat() -> DataTypeSampler {
    Box::new(sampler_choice([
        Always(DataType::Float32),
        Always(DataType::Float64),
        Always(DataType::Int8),
        Always(DataType::Int16),
        Always(DataType::Int32),
        Always(DataType::Int64),
        Always(DataType::UInt8),
        Always(DataType::UInt16),
        Always(DataType::UInt32),
        Always(DataType::UInt64),
    ]))
}

pub struct ArbitraryDataType<N, V, B, F> {
    pub names: N,
    pub nullable: V,
    pub struct_branch: B,
    pub flat: F,
}

impl<N, V, B, F> ArbitraryDataType<N, V, B, F>
where
    N: Sample<Output = String> + Clone + Send + Sync + 'static,
    V: Sample<Output = bool> + Clone + Send + Sync + 'static,
    B: Sample<Output = usize> + Clone + Send + Sync + 'static,
    F: Fn() -> DataTypeSampler,
{
    pub fn sample_nested<IF>(&self, inner: IF) -> DataTypeSampler
    where
        IF: Fn() -> DataTypeSampler,
    {
        let field = || FieldSampler {
            names: self.names.clone(),
            nullable: self.nullable.clone(),
            inner: inner(),
        };

        Box::new(sampler_choice([
            Box::new((self.flat)()) as DataTypeSampler,
            Box::new(
                VecSampler {
                    length: self.struct_branch.clone(),
                    el: field(),
                }
                .try_convert(DataType::Struct, |_| None),
            ),
            Box::new(field().try_convert(|f| DataType::List(Box::new(f)), |_| None)),
        ]))
    }

    pub fn sample_depth(&self, depth: usize) -> DataTypeSampler {
        let flats = (self.flat)();
        if depth == 0 {
            flats
        } else {
            let inner = || self.sample_depth(depth - 1);
            Box::new(sampler_choice([self.sample_nested(inner), flats]))
        }
    }
}