use std::{fmt::Debug, sync::Arc, vec};
use arrow_array::{ArrayRef, Float64Array, Int64Array};
use arrow_schema::DataType;
use datafusion_common::{exec_datafusion_err, Result, ScalarValue};
use datafusion_expr::{AggregateUDF, ScalarUDF};
use geo_types::Rect;
use rand::{distr::Uniform, rngs::StdRng, Rng, RngExt, SeedableRng};
use sedona_common::sedona_internal_err;
use sedona_geometry::types::GeometryTypeId;
use sedona_schema::datatypes::{SedonaType, RASTER, WKB_GEOMETRY};
use sedona_schema::raster::BandDataType;
use crate::{
datagen::RandomPartitionedDataBuilder,
rasters::generate_tiled_rasters,
testers::{AggregateUdfTester, ScalarUdfTester},
};
pub const ROWS_PER_BATCH: usize = 8192;
pub const ROWS_PER_BATCH_TINY: usize = 1024;
pub const NUM_BATCHES_SMALL: usize = 16;
pub const NUM_BATCHES_TINY: usize = 1;
#[cfg(feature = "criterion")]
pub mod benchmark {
use super::*;
use criterion::Criterion;
use sedona_expr::function_set::FunctionSet;
pub fn scalar(
c: &mut Criterion,
functions: &FunctionSet,
lib: &str,
name: &str,
config: impl Into<BenchmarkArgs>,
) {
let not_found_err = format!("{name} was not found in function set");
let udf: ScalarUDF = functions
.scalar_udf(name)
.expect(¬_found_err)
.clone()
.into();
let data = config
.into()
.build_data(
Config::default().num_batches(),
Config::default().rows_per_batch(),
)
.unwrap();
c.bench_function(&data.make_label(lib, name), |b| {
b.iter(|| data.invoke_scalar(&udf).unwrap())
});
}
pub fn aggregate(
c: &mut Criterion,
functions: &FunctionSet,
lib: &str,
name: &str,
config: impl Into<BenchmarkArgs>,
) {
let not_found_err = format!("{name} was not found in function set");
let udf: AggregateUDF = functions
.aggregate_udf(name)
.expect(¬_found_err)
.clone()
.into();
let data = config
.into()
.build_data(
Config::default().num_batches(),
Config::default().rows_per_batch(),
)
.unwrap();
c.bench_function(&data.make_label(lib, name), |b| {
b.iter(|| data.invoke_aggregate(&udf).unwrap())
});
}
pub enum Config {
Tiny,
Small,
}
impl Default for Config {
fn default() -> Self {
#[cfg(debug_assertions)]
return Self::Tiny;
#[cfg(not(debug_assertions))]
return Self::Small;
}
}
impl Config {
fn num_batches(&self) -> usize {
match self {
Config::Tiny => NUM_BATCHES_TINY,
Config::Small => NUM_BATCHES_SMALL,
}
}
fn rows_per_batch(&self) -> usize {
match self {
Config::Tiny => ROWS_PER_BATCH_TINY,
Config::Small => ROWS_PER_BATCH,
}
}
}
}
#[derive(Debug, Clone)]
pub enum BenchmarkArgs {
Array(BenchmarkArgSpec),
ScalarArray(BenchmarkArgSpec, BenchmarkArgSpec),
ArrayScalar(BenchmarkArgSpec, BenchmarkArgSpec),
ArrayArray(BenchmarkArgSpec, BenchmarkArgSpec),
ArrayScalarScalar(BenchmarkArgSpec, BenchmarkArgSpec, BenchmarkArgSpec),
ArrayArrayScalar(BenchmarkArgSpec, BenchmarkArgSpec, BenchmarkArgSpec),
ArrayArrayArray(BenchmarkArgSpec, BenchmarkArgSpec, BenchmarkArgSpec),
ArrayArrayArrayArray(
BenchmarkArgSpec,
BenchmarkArgSpec,
BenchmarkArgSpec,
BenchmarkArgSpec,
),
}
impl From<BenchmarkArgSpec> for BenchmarkArgs {
fn from(value: BenchmarkArgSpec) -> Self {
BenchmarkArgs::Array(value)
}
}
impl BenchmarkArgs {
fn sedona_types(&self) -> Vec<SedonaType> {
self.specs().iter().map(|col| col.sedona_type()).collect()
}
pub fn build_data(&self, num_batches: usize, rows_per_batch: usize) -> Result<BenchmarkData> {
let array_configs = match self {
BenchmarkArgs::Array(_)
| BenchmarkArgs::ArrayArray(_, _)
| BenchmarkArgs::ArrayArrayScalar(_, _, _)
| BenchmarkArgs::ArrayArrayArray(_, _, _)
| BenchmarkArgs::ArrayArrayArrayArray(_, _, _, _) => self.specs(),
BenchmarkArgs::ScalarArray(_, col)
| BenchmarkArgs::ArrayScalar(col, _)
| BenchmarkArgs::ArrayScalarScalar(col, _, _) => {
vec![col.clone()]
}
};
let scalar_configs = match self {
BenchmarkArgs::ScalarArray(col, _)
| BenchmarkArgs::ArrayScalar(_, col)
| BenchmarkArgs::ArrayArrayScalar(_, _, col) => {
vec![col.clone()]
}
BenchmarkArgs::ArrayScalarScalar(_, col0, col1) => {
vec![col0.clone(), col1.clone()]
}
_ => vec![],
};
let arrays = array_configs
.iter()
.enumerate()
.map(|(i, col)| col.build_arrays(i, num_batches, rows_per_batch))
.collect::<Result<Vec<_>>>()?;
let scalars = scalar_configs
.iter()
.enumerate()
.map(|(i, col)| col.build_scalar(i))
.collect::<Result<Vec<_>>>()?;
Ok(BenchmarkData {
config: self.clone(),
num_batches,
arrays,
scalars,
})
}
fn specs(&self) -> Vec<BenchmarkArgSpec> {
match self {
BenchmarkArgs::Array(col) => vec![col.clone()],
BenchmarkArgs::ScalarArray(col0, col1)
| BenchmarkArgs::ArrayScalar(col0, col1)
| BenchmarkArgs::ArrayArray(col0, col1) => {
vec![col0.clone(), col1.clone()]
}
BenchmarkArgs::ArrayScalarScalar(col0, col1, col2)
| BenchmarkArgs::ArrayArrayScalar(col0, col1, col2)
| BenchmarkArgs::ArrayArrayArray(col0, col1, col2) => {
vec![col0.clone(), col1.clone(), col2.clone()]
}
BenchmarkArgs::ArrayArrayArrayArray(col0, col1, col2, col3) => {
vec![col0.clone(), col1.clone(), col2.clone(), col3.clone()]
}
}
}
}
#[derive(Clone)]
pub enum BenchmarkArgSpec {
Point,
LineString(usize),
Polygon(usize),
PolygonWithHole(usize),
MultiPoint(usize),
Int64(i64, i64),
Float64(f64, f64),
Int32(i32, i32),
Transformed(Box<BenchmarkArgSpec>, ScalarUDF),
String(String),
Raster(usize, usize),
}
impl Debug for BenchmarkArgSpec {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Point => write!(f, "Point"),
Self::LineString(arg0) => f.debug_tuple("LineString").field(arg0).finish(),
Self::Polygon(arg0) => f.debug_tuple("Polygon").field(arg0).finish(),
Self::PolygonWithHole(arg0) => f.debug_tuple("PolygonWithHole").field(arg0).finish(),
Self::MultiPoint(arg0) => f.debug_tuple("MultiPoint").field(arg0).finish(),
Self::Int64(arg0, arg1) => f.debug_tuple("Int64").field(arg0).field(arg1).finish(),
Self::Float64(arg0, arg1) => f.debug_tuple("Float64").field(arg0).field(arg1).finish(),
Self::Int32(arg0, arg1) => f.debug_tuple("Int32").field(arg0).field(arg1).finish(),
Self::Transformed(inner, t) => write!(f, "{}({:?})", t.name(), inner),
Self::String(s) => write!(f, "String({s})"),
Self::Raster(w, h) => f.debug_tuple("Raster").field(w).field(h).finish(),
}
}
}
impl BenchmarkArgSpec {
pub fn sedona_type(&self) -> SedonaType {
match self {
BenchmarkArgSpec::Point
| BenchmarkArgSpec::Polygon(_)
| BenchmarkArgSpec::PolygonWithHole(_)
| BenchmarkArgSpec::LineString(_)
| BenchmarkArgSpec::MultiPoint(_) => WKB_GEOMETRY,
BenchmarkArgSpec::Int64(_, _) => SedonaType::Arrow(DataType::Int64),
BenchmarkArgSpec::Float64(_, _) => SedonaType::Arrow(DataType::Float64),
BenchmarkArgSpec::Int32(_, _) => SedonaType::Arrow(DataType::Int32),
BenchmarkArgSpec::Transformed(inner, t) => {
let tester = ScalarUdfTester::new(t.clone(), vec![inner.sedona_type()]);
tester.return_type().unwrap()
}
BenchmarkArgSpec::String(_) => SedonaType::Arrow(DataType::Utf8),
BenchmarkArgSpec::Raster(_, _) => RASTER,
}
}
pub fn build_scalar(&self, i: usize) -> Result<ScalarValue> {
let array = self.build_arrays(i, 1, 1)?;
ScalarValue::try_from_array(&array[0], 0)
}
pub fn build_arrays(
&self,
i: usize,
num_batches: usize,
rows_per_batch: usize,
) -> Result<Vec<ArrayRef>> {
match self {
BenchmarkArgSpec::Point => self.build_geometry(
i,
GeometryTypeId::Point,
num_batches,
1,
1,
rows_per_batch,
None,
),
BenchmarkArgSpec::LineString(vertex_count) => self.build_geometry(
i,
GeometryTypeId::LineString,
num_batches,
*vertex_count,
1,
rows_per_batch,
None,
),
BenchmarkArgSpec::Polygon(vertex_count) => self.build_geometry(
i,
GeometryTypeId::Polygon,
num_batches,
*vertex_count,
1,
rows_per_batch,
None,
),
BenchmarkArgSpec::PolygonWithHole(vertex_count) => self.build_geometry(
i,
GeometryTypeId::Polygon,
num_batches,
*vertex_count,
1,
rows_per_batch,
Some(1.0),
),
BenchmarkArgSpec::MultiPoint(part_count) => self.build_geometry(
i,
GeometryTypeId::MultiPoint,
num_batches,
1,
*part_count,
rows_per_batch,
None,
),
BenchmarkArgSpec::Int64(lo, hi) => {
let mut rng = self.rng(i);
let dist = Uniform::new(lo, hi)
.map_err(|e| exec_datafusion_err!("Invalid Int64 range [{lo}, {hi}): {e}"))?;
(0..num_batches)
.map(|_| -> Result<ArrayRef> {
let int64_array: Int64Array =
(0..rows_per_batch).map(|_| rng.sample(dist)).collect();
Ok(Arc::new(int64_array))
})
.collect()
}
BenchmarkArgSpec::Float64(lo, hi) => {
let mut rng = self.rng(i);
let dist = Uniform::new(lo, hi)
.map_err(|e| exec_datafusion_err!("Invalid Float64 range [{lo}, {hi}): {e}"))?;
(0..num_batches)
.map(|_| -> Result<ArrayRef> {
let float64_array: Float64Array =
(0..rows_per_batch).map(|_| rng.sample(dist)).collect();
Ok(Arc::new(float64_array))
})
.collect()
}
BenchmarkArgSpec::Int32(lo, hi) => {
let mut rng = self.rng(i);
let dist = Uniform::new(lo, hi)
.map_err(|e| exec_datafusion_err!("Invalid Int32 range [{lo}, {hi}): {e}"))?;
(0..num_batches)
.map(|_| -> Result<ArrayRef> {
let int32_array: arrow_array::Int32Array =
(0..rows_per_batch).map(|_| rng.sample(dist)).collect();
Ok(Arc::new(int32_array))
})
.collect()
}
BenchmarkArgSpec::Transformed(inner, t) => {
let inner_type = inner.sedona_type();
let inner_arrays = inner.build_arrays(i, num_batches, rows_per_batch)?;
let tester = ScalarUdfTester::new(t.clone(), vec![inner_type]);
inner_arrays
.into_iter()
.map(|array| tester.invoke_array(array))
.collect::<Result<Vec<_>>>()
}
BenchmarkArgSpec::String(s) => {
let string_array = (0..num_batches)
.map(|_| {
let array = arrow_array::StringArray::from_iter_values(
std::iter::repeat_n(s, rows_per_batch),
);
Ok(Arc::new(array) as ArrayRef)
})
.collect::<Result<Vec<_>>>()?;
Ok(string_array)
}
BenchmarkArgSpec::Raster(width, height) => {
let mut arrays = vec![];
for _ in 0..num_batches {
let tile_size = (*width, *height);
let tile_count = (rows_per_batch, 1);
let raster = generate_tiled_rasters(
tile_size,
tile_count,
BandDataType::UInt8,
Some(43),
)?;
arrays.push(Arc::new(raster) as ArrayRef);
}
Ok(arrays)
}
}
}
#[allow(clippy::too_many_arguments)]
fn build_geometry(
&self,
i: usize,
geom_type: GeometryTypeId,
num_batches: usize,
vertex_count: usize,
num_parts_count: usize,
rows_per_batch: usize,
polygon_hole_rate: Option<f64>,
) -> Result<Vec<ArrayRef>> {
let builder = RandomPartitionedDataBuilder::new()
.num_partitions(1)
.rows_per_batch(rows_per_batch)
.batches_per_partition(num_batches)
.bounds(Rect::new((-10.0, -10.0), (10.0, 10.0)))
.size_range((0.1, 2.0))
.vertices_per_linestring_range((vertex_count, vertex_count))
.num_parts_range((num_parts_count, num_parts_count))
.geometry_type(geom_type)
.polygon_hole_rate(polygon_hole_rate.unwrap_or_default())
.sedona_type(WKB_GEOMETRY);
builder
.partition_reader(self.rng(i), 0)
.map(|batch| -> Result<ArrayRef> { Ok(batch?.column(2).clone()) })
.collect()
}
fn rng(&self, i: usize) -> impl Rng {
StdRng::seed_from_u64(42 + i as u64)
}
}
pub struct BenchmarkData {
config: BenchmarkArgs,
num_batches: usize,
arrays: Vec<Vec<ArrayRef>>,
scalars: Vec<ScalarValue>,
}
impl BenchmarkData {
pub fn make_label(&self, lib: &str, name: &str) -> String {
format!("{lib}-{name}-{:?}", self.config)
}
pub fn invoke_scalar(&self, udf: &ScalarUDF) -> Result<()> {
let tester = ScalarUdfTester::new(udf.clone(), self.config.sedona_types().clone());
match self.config {
BenchmarkArgs::Array(_) => {
for i in 0..self.num_batches {
tester.invoke_array(self.arrays[0][i].clone())?;
}
}
BenchmarkArgs::ScalarArray(_, _) => {
let scalar = &self.scalars[0];
for i in 0..self.num_batches {
tester.invoke_scalar_array(scalar.clone(), self.arrays[0][i].clone())?;
}
}
BenchmarkArgs::ArrayScalar(_, _) => {
let scalar = &self.scalars[0];
for i in 0..self.num_batches {
tester.invoke_array_scalar(self.arrays[0][i].clone(), scalar.clone())?;
}
}
BenchmarkArgs::ArrayArray(_, _) => {
for i in 0..self.num_batches {
tester
.invoke_array_array(self.arrays[0][i].clone(), self.arrays[1][i].clone())?;
}
}
BenchmarkArgs::ArrayScalarScalar(_, _, _) => {
let scalar0 = &self.scalars[0];
let scalar1 = &self.scalars[1];
for i in 0..self.num_batches {
tester.invoke_array_scalar_scalar(
self.arrays[0][i].clone(),
scalar0.clone(),
scalar1.clone(),
)?;
}
}
BenchmarkArgs::ArrayArrayScalar(_, _, _) => {
for i in 0..self.num_batches {
tester.invoke_array_array_scalar(
self.arrays[0][i].clone(),
self.arrays[1][i].clone(),
self.scalars[0].clone(),
)?;
}
}
BenchmarkArgs::ArrayArrayArray(_, _, _) => {
for i in 0..self.num_batches {
tester.invoke_arrays(vec![
self.arrays[0][i].clone(),
self.arrays[1][i].clone(),
self.arrays[2][i].clone(),
])?;
}
}
BenchmarkArgs::ArrayArrayArrayArray(_, _, _, _) => {
for i in 0..self.num_batches {
tester.invoke_arrays(vec![
self.arrays[0][i].clone(),
self.arrays[1][i].clone(),
self.arrays[2][i].clone(),
self.arrays[3][i].clone(),
])?;
}
}
}
Ok(())
}
pub fn invoke_aggregate(&self, udf: &AggregateUDF) -> Result<ScalarValue> {
if !matches!(self.config, BenchmarkArgs::Array(_)) {
return sedona_internal_err!(
"invoke_aggregate() not implemented for {:?}",
self.config
);
}
let tester = AggregateUdfTester::new(udf.clone(), self.config.sedona_types().clone());
tester.aggregate(&self.arrays[0])
}
}
#[cfg(test)]
mod test {
use arrow_array::{Array, StructArray};
use datafusion_common::cast::as_binary_array;
use datafusion_expr::{ColumnarValue, SimpleScalarUDF};
use geo_traits::Dimensions;
use rstest::rstest;
use sedona_geometry::{analyze::analyze_geometry, types::GeometryTypeAndDimensions};
use super::*;
#[test]
fn arg_spec_scalar() {
let spec = BenchmarkArgSpec::Point;
assert_eq!(spec.sedona_type(), WKB_GEOMETRY);
let scalar = spec.build_scalar(0).unwrap();
assert_eq!(spec.build_scalar(0).unwrap(), scalar);
assert_ne!(spec.build_scalar(1).unwrap(), scalar);
if let ScalarValue::Binary(Some(wkb_bytes)) = scalar {
let wkb = wkb::reader::read_wkb(&wkb_bytes).unwrap();
let analysis = analyze_geometry(&wkb).unwrap();
assert_eq!(analysis.point_count, 1);
assert_eq!(
analysis.geometry_type,
GeometryTypeAndDimensions::new(GeometryTypeId::Point, Dimensions::Xy)
)
} else {
unreachable!("Unexpected scalar output {scalar}")
}
}
#[rstest]
fn arg_spec_geometry(
#[values(
(BenchmarkArgSpec::Point, GeometryTypeId::Point, 1),
(BenchmarkArgSpec::LineString(10), GeometryTypeId::LineString, 10),
(BenchmarkArgSpec::Polygon(10), GeometryTypeId::Polygon, 11),
(BenchmarkArgSpec::MultiPoint(10), GeometryTypeId::MultiPoint, 10),
)]
config: (BenchmarkArgSpec, GeometryTypeId, i64),
) {
let (spec, geometry_type, point_count) = config;
assert_eq!(spec.sedona_type(), WKB_GEOMETRY);
let arrays = spec.build_arrays(0, 2, ROWS_PER_BATCH).unwrap();
assert_eq!(arrays.len(), 2);
assert_eq!(spec.build_arrays(0, 2, ROWS_PER_BATCH).unwrap(), arrays);
assert_ne!(spec.build_arrays(1, 2, ROWS_PER_BATCH).unwrap(), arrays);
for array in arrays {
assert_eq!(array.data_type(), WKB_GEOMETRY.storage_type());
assert_eq!(array.len(), ROWS_PER_BATCH);
let binary_array = as_binary_array(&array).unwrap();
assert_eq!(binary_array.null_count(), 0);
for wkb_bytes in binary_array {
let wkb = wkb::reader::read_wkb(wkb_bytes.unwrap()).unwrap();
let analysis = analyze_geometry(&wkb).unwrap();
assert_eq!(analysis.point_count, point_count);
assert_eq!(
analysis.geometry_type,
GeometryTypeAndDimensions::new(geometry_type, Dimensions::Xy)
)
}
}
}
#[test]
fn arg_spec_float() {
let spec = BenchmarkArgSpec::Float64(1.0, 2.0);
assert_eq!(spec.sedona_type(), SedonaType::Arrow(DataType::Float64));
let arrays = spec.build_arrays(0, 2, ROWS_PER_BATCH).unwrap();
assert_eq!(arrays.len(), 2);
assert_eq!(spec.build_arrays(0, 2, ROWS_PER_BATCH).unwrap(), arrays);
assert_ne!(spec.build_arrays(1, 2, ROWS_PER_BATCH).unwrap(), arrays);
for array in arrays {
assert_eq!(array.data_type(), &DataType::Float64);
assert_eq!(array.len(), ROWS_PER_BATCH);
assert_eq!(array.null_count(), 0);
}
}
#[test]
fn arg_spec_int() {
let spec = BenchmarkArgSpec::Int32(1, 10);
assert_eq!(spec.sedona_type(), SedonaType::Arrow(DataType::Int32));
let arrays = spec.build_arrays(0, 2, ROWS_PER_BATCH).unwrap();
assert_eq!(arrays.len(), 2);
assert_eq!(spec.build_arrays(0, 2, ROWS_PER_BATCH).unwrap(), arrays);
assert_ne!(spec.build_arrays(1, 2, ROWS_PER_BATCH).unwrap(), arrays);
for array in arrays {
assert_eq!(array.data_type(), &DataType::Int32);
assert_eq!(array.len(), ROWS_PER_BATCH);
assert_eq!(array.null_count(), 0);
}
}
#[test]
fn arg_spec_transformed() {
let udf = SimpleScalarUDF::new(
"float32",
vec![DataType::Float64],
DataType::Float32,
datafusion_expr::Volatility::Immutable,
Arc::new(|args| -> Result<ColumnarValue> { args[0].cast_to(&DataType::Float32, None) }),
);
let spec =
BenchmarkArgSpec::Transformed(BenchmarkArgSpec::Float64(1.0, 2.0).into(), udf.into());
assert_eq!(spec.sedona_type(), SedonaType::Arrow(DataType::Float32));
assert_eq!(format!("{spec:?}"), "float32(Float64(1.0, 2.0))");
let arrays = spec.build_arrays(0, 2, ROWS_PER_BATCH).unwrap();
assert_eq!(arrays.len(), 2);
assert_eq!(spec.build_arrays(0, 2, ROWS_PER_BATCH).unwrap(), arrays);
assert_ne!(spec.build_arrays(1, 2, ROWS_PER_BATCH).unwrap(), arrays);
for array in arrays {
assert_eq!(array.data_type(), &DataType::Float32);
assert_eq!(array.len(), ROWS_PER_BATCH);
assert_eq!(array.null_count(), 0);
}
}
#[test]
fn args_array() {
let spec = BenchmarkArgs::Array(BenchmarkArgSpec::Point);
assert_eq!(spec.sedona_types(), [WKB_GEOMETRY]);
let data = spec.build_data(2, ROWS_PER_BATCH).unwrap();
assert_eq!(data.num_batches, 2);
assert_eq!(data.arrays.len(), 1);
assert_eq!(data.scalars.len(), 0);
assert_eq!(data.arrays[0].len(), 2);
assert_eq!(WKB_GEOMETRY.storage_type(), data.arrays[0][0].data_type());
}
#[test]
fn args_array_scalar() {
let spec = BenchmarkArgs::ArrayScalar(
BenchmarkArgSpec::Point,
BenchmarkArgSpec::Float64(1.0, 2.0),
);
assert_eq!(
spec.sedona_types(),
[WKB_GEOMETRY, SedonaType::Arrow(DataType::Float64)]
);
let data = spec.build_data(2, ROWS_PER_BATCH).unwrap();
assert_eq!(data.num_batches, 2);
assert_eq!(data.arrays.len(), 1);
assert_eq!(data.arrays[0].len(), 2);
assert_eq!(WKB_GEOMETRY.storage_type(), data.arrays[0][0].data_type());
assert_eq!(data.scalars.len(), 1);
assert_eq!(data.scalars[0].data_type(), DataType::Float64);
}
#[test]
fn args_scalar_array() {
let spec = BenchmarkArgs::ScalarArray(
BenchmarkArgSpec::Point,
BenchmarkArgSpec::Float64(1.0, 2.0),
);
assert_eq!(
spec.sedona_types(),
[WKB_GEOMETRY, SedonaType::Arrow(DataType::Float64)]
);
let data = spec.build_data(2, ROWS_PER_BATCH).unwrap();
assert_eq!(data.num_batches, 2);
assert_eq!(data.scalars.len(), 1);
assert_eq!(WKB_GEOMETRY.storage_type(), &data.scalars[0].data_type());
assert_eq!(data.arrays.len(), 1);
assert_eq!(data.arrays[0].len(), 2);
assert_eq!(data.arrays[0][0].data_type(), &DataType::Float64);
}
#[test]
fn args_array_array() {
let spec =
BenchmarkArgs::ArrayArray(BenchmarkArgSpec::Point, BenchmarkArgSpec::Float64(1.0, 2.0));
assert_eq!(
spec.sedona_types(),
[WKB_GEOMETRY, SedonaType::Arrow(DataType::Float64)]
);
let data = spec.build_data(2, ROWS_PER_BATCH).unwrap();
assert_eq!(data.num_batches, 2);
assert_eq!(data.arrays.len(), 2);
assert_eq!(data.scalars.len(), 0);
assert_eq!(data.arrays[0].len(), 2);
assert_eq!(WKB_GEOMETRY.storage_type(), data.arrays[0][0].data_type());
assert_eq!(data.arrays[1].len(), 2);
assert_eq!(data.arrays[1][0].data_type(), &DataType::Float64);
}
#[test]
fn args_array_scalar_scalar() {
let spec = BenchmarkArgs::ArrayScalarScalar(
BenchmarkArgSpec::Point,
BenchmarkArgSpec::Float64(1.0, 2.0),
BenchmarkArgSpec::String("test".to_string()),
);
assert_eq!(
spec.sedona_types(),
[
WKB_GEOMETRY,
SedonaType::Arrow(DataType::Float64),
SedonaType::Arrow(DataType::Utf8)
]
);
let data = spec.build_data(2, ROWS_PER_BATCH).unwrap();
assert_eq!(data.num_batches, 2);
assert_eq!(data.arrays.len(), 1);
assert_eq!(data.scalars.len(), 2);
assert_eq!(data.arrays[0].len(), 2);
assert_eq!(WKB_GEOMETRY.storage_type(), data.arrays[0][0].data_type());
assert_eq!(data.scalars[0].data_type(), DataType::Float64);
assert_eq!(data.scalars[1].data_type(), DataType::Utf8);
}
#[test]
fn args_array_array_scalar() {
let spec = BenchmarkArgs::ArrayArrayScalar(
BenchmarkArgSpec::Point,
BenchmarkArgSpec::Point,
BenchmarkArgSpec::Float64(1.0, 2.0),
);
assert_eq!(
spec.sedona_types(),
[
WKB_GEOMETRY,
WKB_GEOMETRY,
SedonaType::Arrow(DataType::Float64)
]
);
let data = spec.build_data(2, ROWS_PER_BATCH).unwrap();
assert_eq!(data.num_batches, 2);
assert_eq!(data.arrays.len(), 3);
assert_eq!(data.scalars.len(), 1);
assert_eq!(data.arrays[0].len(), 2);
assert_eq!(WKB_GEOMETRY.storage_type(), data.arrays[0][0].data_type());
assert_eq!(data.arrays[1].len(), 2);
assert_eq!(WKB_GEOMETRY.storage_type(), data.arrays[1][0].data_type());
assert_eq!(data.scalars[0].data_type(), DataType::Float64);
}
#[test]
fn args_array_array_array() {
let spec = BenchmarkArgs::ArrayArrayArray(
BenchmarkArgSpec::Point,
BenchmarkArgSpec::Point,
BenchmarkArgSpec::Float64(1.0, 2.0),
);
assert_eq!(
spec.sedona_types(),
[
WKB_GEOMETRY,
WKB_GEOMETRY,
SedonaType::Arrow(DataType::Float64)
]
);
let data = spec.build_data(2, ROWS_PER_BATCH).unwrap();
assert_eq!(data.num_batches, 2);
assert_eq!(data.arrays.len(), 3);
assert_eq!(data.scalars.len(), 0);
assert_eq!(data.arrays[0].len(), 2);
assert_eq!(WKB_GEOMETRY.storage_type(), data.arrays[0][0].data_type());
assert_eq!(data.arrays[1].len(), 2);
assert_eq!(WKB_GEOMETRY.storage_type(), data.arrays[1][0].data_type());
assert_eq!(data.arrays[2].len(), 2);
assert_eq!(data.arrays[2][0].data_type(), &DataType::Float64);
}
#[test]
fn args_array_array_array_array() {
let spec = BenchmarkArgs::ArrayArrayArrayArray(
BenchmarkArgSpec::Float64(1.0, 2.0),
BenchmarkArgSpec::Float64(3.0, 4.0),
BenchmarkArgSpec::Float64(5.0, 6.0),
BenchmarkArgSpec::Float64(7.0, 8.0),
);
assert_eq!(
spec.sedona_types(),
[
SedonaType::Arrow(DataType::Float64),
SedonaType::Arrow(DataType::Float64),
SedonaType::Arrow(DataType::Float64),
SedonaType::Arrow(DataType::Float64)
]
);
let data = spec.build_data(2, ROWS_PER_BATCH).unwrap();
assert_eq!(data.num_batches, 2);
assert_eq!(data.arrays.len(), 4);
assert_eq!(data.scalars.len(), 0);
assert_eq!(data.arrays[0].len(), 2);
assert_eq!(data.arrays[0][0].data_type(), &DataType::Float64);
assert_eq!(data.arrays[1].len(), 2);
assert_eq!(data.arrays[1][0].data_type(), &DataType::Float64);
assert_eq!(data.arrays[2].len(), 2);
assert_eq!(data.arrays[2][0].data_type(), &DataType::Float64);
assert_eq!(data.arrays[3].len(), 2);
assert_eq!(data.arrays[3][0].data_type(), &DataType::Float64);
}
#[test]
fn arg_spec_raster() {
use sedona_raster::array::RasterStructArray;
use sedona_raster::traits::RasterRef;
let spec = BenchmarkArgSpec::Raster(10, 5);
assert_eq!(spec.sedona_type(), RASTER);
let data = spec.build_arrays(0, 2, ROWS_PER_BATCH).unwrap();
assert_eq!(data.len(), 2);
assert_eq!(data[0].data_type(), RASTER.storage_type());
let raster_array = data[0].as_any().downcast_ref::<StructArray>().unwrap();
let rasters = RasterStructArray::new(raster_array);
assert_eq!(rasters.len(), ROWS_PER_BATCH);
let raster = rasters.get(0).unwrap();
let metadata = raster.metadata();
assert_eq!(metadata.width(), 10);
assert_eq!(metadata.height(), 5);
}
}