use crate::{
features::{Feature, FeatureSet, Field},
field_type::FieldType,
geometry::EsriGeometry,
};
use std::{result::Result, sync::Arc};
use geoarrow::GeometryArrayTrait;
use serde_json::Value;
use std::collections::HashMap;
use arrow::{
array::{
make_builder, Array, ArrayBuilder, BooleanBuilder, Date32Builder, Date64Builder,
Float32Builder, Float64Builder, Int16Builder, Int32Builder, Int64Builder, Int8Builder,
NullBuilder, StringBuilder, UInt16Builder, UInt32Builder, UInt64Builder, UInt8Builder,
},
datatypes::{DataType, Field as AField, Schema, SchemaBuilder},
record_batch::RecordBatch,
};
pub fn featureset_to_arrow<const N: usize>(
x: FeatureSet<N>,
) -> Result<RecordBatch, arrow::error::ArrowError> {
let schema = field_to_schema(x.fields.unwrap());
let (mut arrays, geometries) = create_array_vecs(&schema, x.features);
let mut res_arrs = schema
.fields()
.iter()
.map(|fi| {
let arr = arrays.get_mut(fi.name()).unwrap();
arr.1.finish()
})
.collect::<Vec<_>>();
if x.geometryType.is_some() {
let (geo_field, geo_arr) = as_geoarrow_array(x.geometryType.unwrap().as_str(), geometries);
let mut sb = SchemaBuilder::from(schema);
sb.push(geo_field);
let schema = sb.finish();
res_arrs.push(geo_arr);
RecordBatch::try_new(schema.into(), res_arrs)
} else {
RecordBatch::try_new(schema.into(), res_arrs)
}
}
use geoarrow::table::GeoTable;
pub fn featureset_to_geoarrow<const N: usize>(
x: FeatureSet<N>,
) -> Result<GeoTable, geoarrow::error::GeoArrowError> {
let arrow_res = featureset_to_arrow(x)?;
let schema_ref = arrow_res.schema_ref().clone();
let geometry_index = arrow_res.schema().fields.len();
GeoTable::try_new(schema_ref, vec![arrow_res], geometry_index)
}
impl From<Field> for AField {
fn from(value: Field) -> Self {
let dtype = match value.field_type {
FieldType::EsriFieldTypeSmallInteger => DataType::Int16,
FieldType::EsriFieldTypeInteger => DataType::Int32,
FieldType::EsriFieldTypeSingle => DataType::Float32,
FieldType::EsriFieldTypeDouble => DataType::Float64,
FieldType::EsriFieldTypeString => DataType::Utf8,
FieldType::EsriFieldTypeDate => DataType::Date64,
FieldType::EsriFieldTypeOid => DataType::Int64,
FieldType::EsriFieldTypeBlob => DataType::LargeBinary,
FieldType::EsriFieldTypeGuid => DataType::Utf8,
FieldType::EsriFieldTypeGlobalId => DataType::Utf8,
FieldType::EsriFieldTypeXml => DataType::LargeUtf8,
FieldType::EsriFieldTypeRaster => unimplemented!(),
FieldType::EsriFieldTypeGeometry => unimplemented!(),
};
Self::new(value.name, dtype, true)
}
}
fn field_to_schema(fields: Vec<Field>) -> Schema {
let mut sbuilder = SchemaBuilder::with_capacity(fields.len());
for field in fields.into_iter() {
let arrow_field = AField::from(field);
sbuilder.push(arrow_field);
}
sbuilder.finish()
}
fn create_array_vecs<const N: usize>(
schema: &Schema,
feats: Vec<Feature<N>>,
) -> (
HashMap<&String, (&AField, Box<dyn ArrayBuilder>)>,
Vec<Option<EsriGeometry<N>>>,
) {
let n = feats.len();
let mut map: HashMap<&String, (&AField, Box<dyn ArrayBuilder>)> = HashMap::new();
let mut geometries = Vec::with_capacity(n);
schema.fields.iter().for_each(|f| {
let b = make_builder(f.data_type(), n);
map.insert(f.name(), (&f, b));
});
feats.into_iter().for_each(|m| {
let a1 = m.attributes.unwrap();
a1.into_iter().for_each(|(k, v)| {
let (field, builder) = map.get_mut(&k).unwrap();
append_value(v, field, builder);
});
geometries.push(m.geometry);
});
(map, geometries)
}
fn as_geoarrow_array<const N: usize>(
geom_type: &str,
geoms: Vec<Option<EsriGeometry<N>>>,
) -> (Arc<AField>, Arc<dyn Array>) {
match geom_type {
"esriGeometryPoint" => {
let res = geoms
.into_iter()
.map(|pi| match pi {
Some(pp) => pp.as_point(),
None => None,
})
.collect::<Vec<_>>();
let arr = geoarrow::array::PointArray::from(res);
(arr.extension_field(), arr.into_array_ref())
}
"esriGeometryMultipoint" => {
let res = geoms
.into_iter()
.map(|pi| match pi {
Some(pp) => pp.as_multipoint(),
None => None,
})
.collect::<Vec<_>>();
let arr = geoarrow::array::MultiPointArray::<i32>::from(res);
(arr.extension_field(), arr.into_array_ref())
}
"esriGeometryPolyline" => {
let res = geoms
.into_iter()
.map(|pi| match pi {
Some(pp) => pp.as_polyline(),
None => None,
})
.collect::<Vec<_>>();
let arr = geoarrow::array::MultiLineStringArray::<i32>::from(res);
(arr.extension_field(), arr.into_array_ref())
}
"esriGeometryPolygon" => {
let res = geoms
.into_iter()
.map(|pi| match pi {
Some(pp) => pp.as_polygon(),
None => None,
})
.collect::<Vec<_>>();
let arr = geoarrow::array::PolygonArray::<i32>::from(res);
(arr.extension_field(), arr.into_array_ref())
}
_ => unimplemented!(),
}
}
fn append_value(v: Value, f: &AField, builder: &mut Box<dyn ArrayBuilder>) -> () {
let bb = builder.as_any_mut();
match f.data_type() {
DataType::Null => {
bb.downcast_mut::<NullBuilder>()
.unwrap()
.append_empty_value();
}
DataType::Boolean => {
bb.downcast_mut::<BooleanBuilder>()
.unwrap()
.append_option(v.as_bool());
}
DataType::Int8 => {
let builder = bb.downcast_mut::<Int8Builder>().unwrap();
match v.as_i64() {
Some(v) => builder.append_value(v as i8),
None => builder.append_null(),
};
}
DataType::Int16 => {
let builder = bb.downcast_mut::<Int16Builder>().unwrap();
match v.as_i64() {
Some(v) => builder.append_value(v as i16),
None => builder.append_null(),
};
}
DataType::Int32 => {
let builder = bb.downcast_mut::<Int32Builder>().unwrap();
match v.as_i64() {
Some(v) => builder.append_value(v as i32),
None => builder.append_null(),
};
}
DataType::Int64 => {
bb.downcast_mut::<Int64Builder>()
.unwrap()
.append_option(v.as_i64());
}
DataType::UInt8 => {
let builder = bb.downcast_mut::<UInt8Builder>().unwrap();
match v.as_u64() {
Some(v) => builder.append_value(v as u8),
None => builder.append_null(),
};
}
DataType::UInt16 => {
let builder = bb.downcast_mut::<UInt16Builder>().unwrap();
match v.as_u64() {
Some(v) => builder.append_value(v as u16),
None => builder.append_null(),
};
}
DataType::UInt32 => {
let builder = bb.downcast_mut::<UInt32Builder>().unwrap();
match v.as_u64() {
Some(v) => builder.append_value(v as u32),
None => builder.append_null(),
};
}
DataType::UInt64 => {
bb.downcast_mut::<UInt64Builder>()
.unwrap()
.append_option(v.as_u64());
}
DataType::Float16 => {
todo!()
}
DataType::Float32 => {
let builder = bb.downcast_mut::<Float32Builder>().unwrap();
match v.as_f64() {
Some(v) => builder.append_value(v as f32),
None => builder.append_null(),
};
}
DataType::Float64 => {
bb.downcast_mut::<Float64Builder>()
.unwrap()
.append_option(v.as_f64());
}
DataType::Timestamp(_, _) => todo!(),
DataType::Date32 => {
let builder = bb.downcast_mut::<Date32Builder>().unwrap();
match v.as_i64() {
Some(v) => builder.append_value((v / 100000_i64) as i32),
None => builder.append_null(),
};
}
DataType::Date64 => {
let builder = bb.downcast_mut::<Date64Builder>().unwrap();
match v.as_i64() {
Some(v) => builder.append_value(v),
None => builder.append_null(),
};
}
DataType::Time32(_) => todo!(),
DataType::Time64(_) => todo!(),
DataType::Duration(_) => todo!(),
DataType::Interval(_) => todo!(),
DataType::Binary => todo!(),
DataType::FixedSizeBinary(_) => todo!(),
DataType::LargeBinary => todo!(),
DataType::Utf8 => {
bb.downcast_mut::<StringBuilder>()
.unwrap()
.append_option(v.as_str());
}
DataType::LargeUtf8 => {
bb.downcast_mut::<StringBuilder>()
.unwrap()
.append_option(v.as_str());
}
DataType::List(_) => todo!(),
DataType::FixedSizeList(_, _) => todo!(),
DataType::LargeList(_) => todo!(),
DataType::Struct(_) => todo!(),
DataType::Union(_, _) => todo!(),
DataType::Dictionary(_, _) => todo!(),
DataType::Decimal128(_, _) => todo!(),
DataType::Decimal256(_, _) => todo!(),
DataType::Map(_, _) => todo!(),
DataType::RunEndEncoded(_, _) => todo!(),
_ => todo!(),
}
}