vortex-array 0.54.0

Vortex in memory columnar data format
Documentation
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![allow(clippy::unwrap_used)]

use divan::Bencher;
use rand::distr::Uniform;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use vortex_array::IntoArray;
use vortex_array::arrays::StructArray;
use vortex_array::compute::take;
use vortex_array::validity::Validity;
use vortex_buffer::Buffer;
use vortex_dtype::FieldNames;

fn main() {
    divan::main();
}

const ARRAY_SIZE: usize = 100_000;
const TAKE_SIZE: usize = 1000;

#[divan::bench]
fn take_struct_simple(bencher: Bencher) {
    let mut rng = StdRng::seed_from_u64(0);
    let range = Uniform::new(0i64, 100_000_000).unwrap();

    // Create single field for the struct
    let field = (0..ARRAY_SIZE)
        .map(|_| rng.sample(range))
        .collect::<Buffer<i64>>()
        .into_array();

    let struct_array = StructArray::try_new(
        FieldNames::from(["value"]),
        vec![field],
        ARRAY_SIZE,
        Validity::NonNullable,
    )
    .unwrap();

    let indices: Buffer<u64> = (0..TAKE_SIZE)
        .map(|_| rng.random_range(0..ARRAY_SIZE) as u64)
        .collect();
    let indices_array = indices.into_array();

    bencher
        .with_inputs(|| (&struct_array, &indices_array))
        .bench_refs(|(array, indices)| {
            divan::black_box(take(array.as_ref(), indices.as_ref()).unwrap());
        });
}

#[divan::bench(args = [8])]
fn take_struct_wide(bencher: Bencher, width: usize) {
    let mut rng = StdRng::seed_from_u64(0);
    let range = Uniform::new(0i64, 100_000_000).unwrap();

    let fields: Vec<_> = (0..width)
        .map(|_| {
            (0..ARRAY_SIZE)
                .map(|_| rng.sample(range))
                .collect::<Buffer<i64>>()
                .into_array()
        })
        .collect();

    let field_names = FieldNames::from([
        "field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8",
    ]);

    let struct_array =
        StructArray::try_new(field_names, fields, ARRAY_SIZE, Validity::NonNullable).unwrap();

    let indices: Buffer<u64> = (0..TAKE_SIZE)
        .map(|_| rng.random_range(0..ARRAY_SIZE) as u64)
        .collect();
    let indices_array = indices.into_array();

    bencher
        .with_inputs(|| (&struct_array, &indices_array))
        .bench_refs(|(array, indices)| {
            divan::black_box(take(array.as_ref(), indices.as_ref()).unwrap());
        });
}

#[divan::bench]
fn take_struct_sequential_indices(bencher: Bencher) {
    let mut rng = StdRng::seed_from_u64(0);
    let range = Uniform::new(0i64, 100_000_000).unwrap();

    // Create single field for the struct
    let field = (0..ARRAY_SIZE)
        .map(|_| rng.sample(range))
        .collect::<Buffer<i64>>()
        .into_array();

    let struct_array = StructArray::try_new(
        FieldNames::from(["value"]),
        vec![field],
        ARRAY_SIZE,
        Validity::NonNullable,
    )
    .unwrap();

    // Sequential indices for better cache performance
    let indices: Buffer<u64> = (0..TAKE_SIZE as u64).collect();
    let indices_array = indices.into_array();

    bencher
        .with_inputs(|| (&struct_array, &indices_array))
        .bench_refs(|(array, indices)| {
            divan::black_box(take(array.as_ref(), indices.as_ref()).unwrap());
        });
}