geoarrow_array/array/
wkb.rs

1use std::sync::Arc;
2
3use arrow_array::builder::GenericByteBuilder;
4use arrow_array::cast::AsArray;
5use arrow_array::{
6    Array, ArrayRef, BinaryArray, GenericBinaryArray, LargeBinaryArray, OffsetSizeTrait,
7};
8use arrow_buffer::NullBuffer;
9use arrow_schema::{DataType, Field};
10use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
11use geoarrow_schema::{GeoArrowType, Metadata, WkbType};
12use wkb::reader::Wkb;
13
14use crate::array::WkbViewArray;
15use crate::capacity::WkbCapacity;
16use crate::trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow};
17use crate::util::{offsets_buffer_i32_to_i64, offsets_buffer_i64_to_i32};
18
19/// An immutable array of WKB geometries.
20///
21/// This is stored either as an Arrow [`BinaryArray`] or [`LargeBinaryArray`] and is semantically
22/// equivalent to `Vec<Option<Wkb>>` due to the internal validity bitmap.
23///
24/// Refer to [`crate::cast`] for converting this array to other GeoArrow array types.
25#[derive(Debug, Clone, PartialEq)]
26pub struct GenericWkbArray<O: OffsetSizeTrait> {
27    pub(crate) data_type: WkbType,
28    pub(crate) array: GenericBinaryArray<O>,
29}
30
31// Implement geometry accessors
32impl<O: OffsetSizeTrait> GenericWkbArray<O> {
33    /// Create a new GenericWkbArray from a BinaryArray
34    pub fn new(array: GenericBinaryArray<O>, metadata: Arc<Metadata>) -> Self {
35        Self {
36            data_type: WkbType::new(metadata),
37            array,
38        }
39    }
40
41    /// Returns true if the array is empty
42    pub fn is_empty(&self) -> bool {
43        self.len() == 0
44    }
45
46    /// Access the underlying binary array.
47    pub fn inner(&self) -> &GenericBinaryArray<O> {
48        &self.array
49    }
50
51    /// The lengths of each buffer contained in this array.
52    pub fn buffer_lengths(&self) -> WkbCapacity {
53        WkbCapacity::new(
54            self.array.offsets().last().unwrap().to_usize().unwrap(),
55            self.len(),
56        )
57    }
58
59    /// The number of bytes occupied by this array.
60    pub fn num_bytes(&self) -> usize {
61        let validity_len = self
62            .array
63            .nulls()
64            .as_ref()
65            .map(|v| v.buffer().len())
66            .unwrap_or(0);
67        validity_len + self.buffer_lengths().num_bytes::<O>()
68    }
69
70    /// Slice this [`GenericWkbArray`].
71    ///
72    ///
73    /// # Panic
74    /// This function panics iff `offset + length > self.len()`.
75    #[inline]
76    pub fn slice(&self, offset: usize, length: usize) -> Self {
77        assert!(
78            offset + length <= self.len(),
79            "offset + length may not exceed length of array"
80        );
81        Self {
82            array: self.array.slice(offset, length),
83            data_type: self.data_type.clone(),
84        }
85    }
86
87    /// Replace the [Metadata] in the array with the given metadata
88    pub fn with_metadata(&self, metadata: Arc<Metadata>) -> Self {
89        let mut arr = self.clone();
90        arr.data_type = self.data_type.clone().with_metadata(metadata);
91        arr
92    }
93}
94
95impl<O: OffsetSizeTrait> GeoArrowArray for GenericWkbArray<O> {
96    fn as_any(&self) -> &dyn std::any::Any {
97        self
98    }
99
100    fn into_array_ref(self) -> ArrayRef {
101        Arc::new(self.into_arrow())
102    }
103
104    fn to_array_ref(&self) -> ArrayRef {
105        self.clone().into_array_ref()
106    }
107
108    #[inline]
109    fn len(&self) -> usize {
110        self.array.len()
111    }
112
113    #[inline]
114    fn logical_nulls(&self) -> Option<NullBuffer> {
115        self.array.logical_nulls()
116    }
117
118    #[inline]
119    fn logical_null_count(&self) -> usize {
120        self.array.logical_null_count()
121    }
122
123    #[inline]
124    fn is_null(&self, i: usize) -> bool {
125        self.array.is_null(i)
126    }
127
128    fn data_type(&self) -> GeoArrowType {
129        if O::IS_LARGE {
130            GeoArrowType::LargeWkb(self.data_type.clone())
131        } else {
132            GeoArrowType::Wkb(self.data_type.clone())
133        }
134    }
135
136    fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray> {
137        Arc::new(self.slice(offset, length))
138    }
139
140    fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray> {
141        Arc::new(Self::with_metadata(&self, metadata))
142    }
143}
144
145impl<'a, O: OffsetSizeTrait> GeoArrowArrayAccessor<'a> for GenericWkbArray<O> {
146    type Item = Wkb<'a>;
147
148    unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
149        let buf = self.array.value(index);
150        Wkb::try_new(buf).map_err(|err| GeoArrowError::External(Box::new(err)))
151    }
152}
153
154impl<O: OffsetSizeTrait> IntoArrow for GenericWkbArray<O> {
155    type ArrowArray = GenericBinaryArray<O>;
156    type ExtensionType = WkbType;
157
158    fn into_arrow(self) -> Self::ArrowArray {
159        self.array
160    }
161
162    fn extension_type(&self) -> &Self::ExtensionType {
163        &self.data_type
164    }
165}
166
167impl<O: OffsetSizeTrait> From<(GenericBinaryArray<O>, WkbType)> for GenericWkbArray<O> {
168    fn from((value, typ): (GenericBinaryArray<O>, WkbType)) -> Self {
169        Self {
170            data_type: typ,
171            array: value,
172        }
173    }
174}
175
176impl TryFrom<(&dyn Array, WkbType)> for GenericWkbArray<i32> {
177    type Error = GeoArrowError;
178    fn try_from((value, typ): (&dyn Array, WkbType)) -> GeoArrowResult<Self> {
179        match value.data_type() {
180            DataType::Binary => Ok((value.as_binary::<i32>().clone(), typ).into()),
181            DataType::LargeBinary => {
182                let geom_array: GenericWkbArray<i64> =
183                    (value.as_binary::<i64>().clone(), typ).into();
184                geom_array.try_into()
185            }
186            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
187                "Unexpected GenericWkbArray DataType: {dt:?}",
188            ))),
189        }
190    }
191}
192
193impl TryFrom<(&dyn Array, WkbType)> for GenericWkbArray<i64> {
194    type Error = GeoArrowError;
195    fn try_from((value, typ): (&dyn Array, WkbType)) -> GeoArrowResult<Self> {
196        match value.data_type() {
197            DataType::Binary => {
198                let geom_array: GenericWkbArray<i32> =
199                    (value.as_binary::<i32>().clone(), typ).into();
200                Ok(geom_array.into())
201            }
202            DataType::LargeBinary => Ok((value.as_binary::<i64>().clone(), typ).into()),
203            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
204                "Unexpected GenericWkbArray DataType: {dt:?}",
205            ))),
206        }
207    }
208}
209
210impl TryFrom<(&dyn Array, &Field)> for GenericWkbArray<i32> {
211    type Error = GeoArrowError;
212
213    fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult<Self> {
214        let typ = field
215            .try_extension_type::<WkbType>()
216            .ok()
217            .unwrap_or_default();
218        (arr, typ).try_into()
219    }
220}
221
222impl TryFrom<(&dyn Array, &Field)> for GenericWkbArray<i64> {
223    type Error = GeoArrowError;
224
225    fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult<Self> {
226        let typ = field
227            .try_extension_type::<WkbType>()
228            .ok()
229            .unwrap_or_default();
230        (arr, typ).try_into()
231    }
232}
233
234impl From<GenericWkbArray<i32>> for GenericWkbArray<i64> {
235    fn from(value: GenericWkbArray<i32>) -> Self {
236        let binary_array = value.array;
237        let (offsets, values, nulls) = binary_array.into_parts();
238        let array = LargeBinaryArray::new(offsets_buffer_i32_to_i64(&offsets), values, nulls);
239        Self {
240            data_type: value.data_type,
241            array,
242        }
243    }
244}
245
246impl TryFrom<GenericWkbArray<i64>> for GenericWkbArray<i32> {
247    type Error = GeoArrowError;
248
249    fn try_from(value: GenericWkbArray<i64>) -> GeoArrowResult<Self> {
250        let binary_array = value.array;
251        let (offsets, values, nulls) = binary_array.into_parts();
252        let array = BinaryArray::new(offsets_buffer_i64_to_i32(&offsets)?, values, nulls);
253        Ok(Self {
254            data_type: value.data_type,
255            array,
256        })
257    }
258}
259
260impl<O: OffsetSizeTrait> From<WkbViewArray> for GenericWkbArray<O> {
261    fn from(value: WkbViewArray) -> Self {
262        let wkb_type = value.data_type;
263        let binary_view_array = value.array;
264
265        // Copy the bytes from the binary view array into a new byte array
266        let mut builder = GenericByteBuilder::new();
267        binary_view_array
268            .iter()
269            .for_each(|value| builder.append_option(value));
270
271        Self {
272            data_type: wkb_type,
273            array: builder.finish(),
274        }
275    }
276}
277
278/// A [`GenericWkbArray`] using `i32` offsets
279///
280/// The byte length of each element is represented by an i32.
281///
282/// See [`GenericWkbArray`] for more information and examples
283pub type WkbArray = GenericWkbArray<i32>;
284
285/// A [`GenericWkbArray`] using `i64` offsets
286///
287/// The byte length of each element is represented by an i64.
288///
289/// See [`GenericWkbArray`] for more information and examples
290pub type LargeWkbArray = GenericWkbArray<i64>;
291
292#[cfg(test)]
293mod test {
294    use arrow_array::builder::{BinaryBuilder, LargeBinaryBuilder};
295
296    use super::*;
297    use crate::GeoArrowArray;
298    use crate::builder::WkbBuilder;
299    use crate::test::point;
300
301    fn wkb_data<O: OffsetSizeTrait>() -> GenericWkbArray<O> {
302        let mut builder = WkbBuilder::new(WkbType::new(Default::default()));
303        builder.push_geometry(Some(&point::p0())).unwrap();
304        builder.push_geometry(Some(&point::p1())).unwrap();
305        builder.push_geometry(Some(&point::p2())).unwrap();
306        builder.finish()
307    }
308
309    #[test]
310    fn parse_dyn_array_i32() {
311        let wkb_array = wkb_data::<i32>();
312        let array = wkb_array.to_array_ref();
313        let field = Field::new("geometry", array.data_type().clone(), true)
314            .with_extension_type(wkb_array.data_type.clone());
315        let wkb_array_retour: GenericWkbArray<i32> = (array.as_ref(), &field).try_into().unwrap();
316
317        assert_eq!(wkb_array, wkb_array_retour);
318    }
319
320    #[test]
321    fn parse_dyn_array_i64() {
322        let wkb_array = wkb_data::<i64>();
323        let array = wkb_array.to_array_ref();
324        let field = Field::new("geometry", array.data_type().clone(), true)
325            .with_extension_type(wkb_array.data_type.clone());
326        let wkb_array_retour: GenericWkbArray<i64> = (array.as_ref(), &field).try_into().unwrap();
327
328        assert_eq!(wkb_array, wkb_array_retour);
329    }
330
331    #[test]
332    fn convert_i32_to_i64() {
333        let wkb_array = wkb_data::<i32>();
334        let wkb_array_i64: GenericWkbArray<i64> = wkb_array.clone().into();
335        let wkb_array_i32: GenericWkbArray<i32> = wkb_array_i64.clone().try_into().unwrap();
336
337        assert_eq!(wkb_array, wkb_array_i32);
338    }
339
340    #[test]
341    fn convert_i64_to_i32_to_i64() {
342        let wkb_array = wkb_data::<i64>();
343        let wkb_array_i32: GenericWkbArray<i32> = wkb_array.clone().try_into().unwrap();
344        let wkb_array_i64: GenericWkbArray<i64> = wkb_array_i32.clone().into();
345
346        assert_eq!(wkb_array, wkb_array_i64);
347    }
348
349    /// Passing a field without an extension name should not panic
350    #[test]
351    fn allow_field_without_extension_name() {
352        // String array
353        let mut builder = BinaryBuilder::new();
354        builder.append_value(b"a");
355        let array = Arc::new(builder.finish()) as ArrayRef;
356        let field = Field::new("geometry", array.data_type().clone(), true);
357        let _wkt_arr = GenericWkbArray::<i32>::try_from((array.as_ref(), &field)).unwrap();
358
359        // Large string
360        let mut builder = LargeBinaryBuilder::new();
361        builder.append_value(b"a");
362        let array = Arc::new(builder.finish()) as ArrayRef;
363        let field = Field::new("geometry", array.data_type().clone(), true);
364        let _wkt_arr = GenericWkbArray::<i64>::try_from((array.as_ref(), &field)).unwrap();
365    }
366}