geoarrow_array/array/
multilinestring.rs

1use std::sync::Arc;
2
3use arrow_array::cast::AsArray;
4use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
5use arrow_buffer::{NullBuffer, OffsetBuffer};
6use arrow_schema::{DataType, Field};
7use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
8use geoarrow_schema::type_id::GeometryTypeId;
9use geoarrow_schema::{CoordType, Dimension, GeoArrowType, Metadata, MultiLineStringType};
10
11use crate::array::{CoordBuffer, GenericWkbArray, LineStringArray};
12use crate::builder::MultiLineStringBuilder;
13use crate::capacity::MultiLineStringCapacity;
14use crate::eq::offset_buffer_eq;
15use crate::scalar::MultiLineString;
16use crate::trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow};
17use crate::util::{OffsetBufferUtils, offsets_buffer_i64_to_i32};
18
19/// An immutable array of MultiLineString geometries.
20///
21/// This is semantically equivalent to `Vec<Option<MultiLineString>>` due to the internal validity
22/// bitmap.
23#[derive(Debug, Clone)]
24pub struct MultiLineStringArray {
25    pub(crate) data_type: MultiLineStringType,
26
27    pub(crate) coords: CoordBuffer,
28
29    /// Offsets into the ring array where each geometry starts
30    pub(crate) geom_offsets: OffsetBuffer<i32>,
31
32    /// Offsets into the coordinate array where each ring starts
33    pub(crate) ring_offsets: OffsetBuffer<i32>,
34
35    /// Validity bitmap
36    pub(crate) nulls: Option<NullBuffer>,
37}
38
39pub(super) fn check(
40    coords: &CoordBuffer,
41    geom_offsets: &OffsetBuffer<i32>,
42    ring_offsets: &OffsetBuffer<i32>,
43    validity_len: Option<usize>,
44) -> GeoArrowResult<()> {
45    if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) {
46        return Err(GeoArrowError::InvalidGeoArrow(
47            "nulls mask length must match the number of values".to_string(),
48        ));
49    }
50
51    if *ring_offsets.last() as usize != coords.len() {
52        return Err(GeoArrowError::InvalidGeoArrow(
53            "largest ring offset must match coords length".to_string(),
54        ));
55    }
56
57    if *geom_offsets.last() as usize != ring_offsets.len_proxy() {
58        return Err(GeoArrowError::InvalidGeoArrow(
59            "largest geometry offset must match ring offsets length".to_string(),
60        ));
61    }
62
63    Ok(())
64}
65
66impl MultiLineStringArray {
67    /// Create a new MultiLineStringArray from parts
68    ///
69    /// # Implementation
70    ///
71    /// This function is `O(1)`.
72    ///
73    /// # Panics
74    ///
75    /// - if the nulls is not `None` and its length is different from the number of geometries
76    /// - if the largest ring offset does not match the number of coordinates
77    /// - if the largest geometry offset does not match the size of ring offsets
78    pub fn new(
79        coords: CoordBuffer,
80        geom_offsets: OffsetBuffer<i32>,
81        ring_offsets: OffsetBuffer<i32>,
82        nulls: Option<NullBuffer>,
83        metadata: Arc<Metadata>,
84    ) -> Self {
85        Self::try_new(coords, geom_offsets, ring_offsets, nulls, metadata).unwrap()
86    }
87
88    /// Create a new MultiLineStringArray from parts
89    ///
90    /// # Implementation
91    ///
92    /// This function is `O(1)`.
93    ///
94    /// # Errors
95    ///
96    /// - if the nulls is not `None` and its length is different from the number of geometries
97    /// - if the largest ring offset does not match the number of coordinates
98    /// - if the largest geometry offset does not match the size of ring offsets
99    pub fn try_new(
100        coords: CoordBuffer,
101        geom_offsets: OffsetBuffer<i32>,
102        ring_offsets: OffsetBuffer<i32>,
103        nulls: Option<NullBuffer>,
104        metadata: Arc<Metadata>,
105    ) -> GeoArrowResult<Self> {
106        check(
107            &coords,
108            &geom_offsets,
109            &ring_offsets,
110            nulls.as_ref().map(|v| v.len()),
111        )?;
112        Ok(Self {
113            data_type: MultiLineStringType::new(coords.dim(), metadata)
114                .with_coord_type(coords.coord_type()),
115            coords,
116            geom_offsets,
117            ring_offsets,
118            nulls,
119        })
120    }
121
122    fn vertices_field(&self) -> Arc<Field> {
123        Field::new("vertices", self.coords.storage_type(), false).into()
124    }
125
126    fn linestrings_field(&self) -> Arc<Field> {
127        Field::new_list("linestrings", self.vertices_field(), false).into()
128    }
129
130    /// Access the underlying coordinate buffer
131    pub fn coords(&self) -> &CoordBuffer {
132        &self.coords
133    }
134
135    /// Access the underlying geometry offsets buffer
136    pub fn geom_offsets(&self) -> &OffsetBuffer<i32> {
137        &self.geom_offsets
138    }
139
140    /// Access the underlying ring offsets buffer
141    pub fn ring_offsets(&self) -> &OffsetBuffer<i32> {
142        &self.ring_offsets
143    }
144
145    /// The lengths of each buffer contained in this array.
146    pub fn buffer_lengths(&self) -> MultiLineStringCapacity {
147        MultiLineStringCapacity::new(
148            *self.ring_offsets.last() as usize,
149            *self.geom_offsets.last() as usize,
150            self.len(),
151        )
152    }
153
154    /// The number of bytes occupied by this array.
155    pub fn num_bytes(&self) -> usize {
156        let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0);
157        validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension())
158    }
159
160    /// Slice this [`MultiLineStringArray`].
161    ///
162    /// # Panic
163    ///
164    /// This function panics iff `offset + length > self.len()`.
165    #[inline]
166    pub fn slice(&self, offset: usize, length: usize) -> Self {
167        assert!(
168            offset + length <= self.len(),
169            "offset + length may not exceed length of array"
170        );
171        // Note: we **only** slice the geom_offsets and not any actual data. Otherwise the offsets
172        // would be in the wrong location.
173        Self {
174            data_type: self.data_type.clone(),
175            coords: self.coords.clone(),
176            geom_offsets: self.geom_offsets.slice(offset, length),
177            ring_offsets: self.ring_offsets.clone(),
178            nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)),
179        }
180    }
181
182    /// Change the [`CoordType`] of this array.
183    pub fn into_coord_type(self, coord_type: CoordType) -> Self {
184        Self {
185            data_type: self.data_type.with_coord_type(coord_type),
186            coords: self.coords.into_coord_type(coord_type),
187            ..self
188        }
189    }
190
191    /// Change the [`Metadata`] of this array.
192    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
193        Self {
194            data_type: self.data_type.with_metadata(metadata),
195            ..self
196        }
197    }
198}
199
200impl GeoArrowArray for MultiLineStringArray {
201    fn as_any(&self) -> &dyn std::any::Any {
202        self
203    }
204
205    fn into_array_ref(self) -> ArrayRef {
206        Arc::new(self.into_arrow())
207    }
208
209    fn to_array_ref(&self) -> ArrayRef {
210        self.clone().into_array_ref()
211    }
212
213    #[inline]
214    fn len(&self) -> usize {
215        self.geom_offsets.len_proxy()
216    }
217
218    #[inline]
219    fn logical_nulls(&self) -> Option<NullBuffer> {
220        self.nulls.clone()
221    }
222
223    #[inline]
224    fn logical_null_count(&self) -> usize {
225        self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0)
226    }
227
228    #[inline]
229    fn is_null(&self, i: usize) -> bool {
230        self.nulls
231            .as_ref()
232            .map(|n| n.is_null(i))
233            .unwrap_or_default()
234    }
235
236    fn data_type(&self) -> GeoArrowType {
237        GeoArrowType::MultiLineString(self.data_type.clone())
238    }
239
240    fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray> {
241        Arc::new(self.slice(offset, length))
242    }
243
244    fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray> {
245        Arc::new(self.with_metadata(metadata))
246    }
247}
248
249impl<'a> GeoArrowArrayAccessor<'a> for MultiLineStringArray {
250    type Item = MultiLineString<'a>;
251
252    unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
253        Ok(MultiLineString::new(
254            &self.coords,
255            &self.geom_offsets,
256            &self.ring_offsets,
257            index,
258        ))
259    }
260}
261
262impl IntoArrow for MultiLineStringArray {
263    type ArrowArray = GenericListArray<i32>;
264    type ExtensionType = MultiLineStringType;
265
266    fn into_arrow(self) -> Self::ArrowArray {
267        let vertices_field = self.vertices_field();
268        let linestrings_field = self.linestrings_field();
269        let nulls = self.nulls;
270        let coord_array = self.coords.into_array_ref();
271        let ring_array = Arc::new(GenericListArray::new(
272            vertices_field,
273            self.ring_offsets,
274            coord_array,
275            None,
276        ));
277        GenericListArray::new(linestrings_field, self.geom_offsets, ring_array, nulls)
278    }
279
280    fn extension_type(&self) -> &Self::ExtensionType {
281        &self.data_type
282    }
283}
284
285impl TryFrom<(&GenericListArray<i32>, MultiLineStringType)> for MultiLineStringArray {
286    type Error = GeoArrowError;
287
288    fn try_from(
289        (geom_array, typ): (&GenericListArray<i32>, MultiLineStringType),
290    ) -> GeoArrowResult<Self> {
291        let geom_offsets = geom_array.offsets();
292        let nulls = geom_array.nulls();
293
294        let rings_dyn_array = geom_array.values();
295        let rings_array = rings_dyn_array.as_list::<i32>();
296
297        let ring_offsets = rings_array.offsets();
298        let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?;
299
300        Ok(Self::new(
301            coords,
302            geom_offsets.clone(),
303            ring_offsets.clone(),
304            nulls.cloned(),
305            typ.metadata().clone(),
306        ))
307    }
308}
309
310impl TryFrom<(&GenericListArray<i64>, MultiLineStringType)> for MultiLineStringArray {
311    type Error = GeoArrowError;
312
313    fn try_from(
314        (geom_array, typ): (&GenericListArray<i64>, MultiLineStringType),
315    ) -> GeoArrowResult<Self> {
316        let geom_offsets = offsets_buffer_i64_to_i32(geom_array.offsets())?;
317        let nulls = geom_array.nulls();
318
319        let rings_dyn_array = geom_array.values();
320        let rings_array = rings_dyn_array.as_list::<i64>();
321
322        let ring_offsets = offsets_buffer_i64_to_i32(rings_array.offsets())?;
323        let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?;
324
325        Ok(Self::new(
326            coords,
327            geom_offsets.clone(),
328            ring_offsets.clone(),
329            nulls.cloned(),
330            typ.metadata().clone(),
331        ))
332    }
333}
334
335impl TryFrom<(&dyn Array, MultiLineStringType)> for MultiLineStringArray {
336    type Error = GeoArrowError;
337
338    fn try_from((value, typ): (&dyn Array, MultiLineStringType)) -> GeoArrowResult<Self> {
339        match value.data_type() {
340            DataType::List(_) => (value.as_list::<i32>(), typ).try_into(),
341            DataType::LargeList(_) => (value.as_list::<i64>(), typ).try_into(),
342            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
343                "Unexpected MultiLineString DataType: {dt:?}",
344            ))),
345        }
346    }
347}
348
349impl TryFrom<(&dyn Array, &Field)> for MultiLineStringArray {
350    type Error = GeoArrowError;
351
352    fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult<Self> {
353        let typ = field.try_extension_type::<MultiLineStringType>()?;
354        (arr, typ).try_into()
355    }
356}
357
358impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, MultiLineStringType)>
359    for MultiLineStringArray
360{
361    type Error = GeoArrowError;
362
363    fn try_from(value: (GenericWkbArray<O>, MultiLineStringType)) -> GeoArrowResult<Self> {
364        let mut_arr: MultiLineStringBuilder = value.try_into()?;
365        Ok(mut_arr.finish())
366    }
367}
368
369impl From<LineStringArray> for MultiLineStringArray {
370    fn from(value: LineStringArray) -> Self {
371        let (coord_type, dimension, metadata) = value.data_type.into_inner();
372        let new_type = MultiLineStringType::new(dimension, metadata).with_coord_type(coord_type);
373
374        let coords = value.coords;
375        let geom_offsets = OffsetBuffer::from_lengths(vec![1; coords.len()]);
376        let ring_offsets = value.geom_offsets;
377        let nulls = value.nulls;
378        Self {
379            data_type: new_type,
380            coords,
381            geom_offsets,
382            ring_offsets,
383            nulls,
384        }
385    }
386}
387
388impl PartialEq for MultiLineStringArray {
389    fn eq(&self, other: &Self) -> bool {
390        self.nulls == other.nulls
391            && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets)
392            && offset_buffer_eq(&self.ring_offsets, &other.ring_offsets)
393            && self.coords == other.coords
394    }
395}
396
397impl GeometryTypeId for MultiLineStringArray {
398    const GEOMETRY_TYPE_OFFSET: i8 = 5;
399
400    fn dimension(&self) -> Dimension {
401        self.data_type.dimension()
402    }
403}
404
405#[cfg(test)]
406mod test {
407    use geo_traits::to_geo::ToGeoMultiLineString;
408    use geoarrow_schema::{CoordType, Dimension};
409
410    use super::*;
411    use crate::test::multilinestring;
412
413    #[test]
414    fn geo_round_trip() {
415        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
416            let geoms = [
417                Some(multilinestring::ml0()),
418                None,
419                Some(multilinestring::ml1()),
420                None,
421            ];
422            let typ = MultiLineStringType::new(Dimension::XY, Default::default())
423                .with_coord_type(coord_type);
424            let geo_arr =
425                MultiLineStringBuilder::from_nullable_multi_line_strings(&geoms, typ).finish();
426
427            for (i, g) in geo_arr.iter().enumerate() {
428                assert_eq!(
429                    geoms[i],
430                    g.transpose().unwrap().map(|g| g.to_multi_line_string())
431                );
432            }
433
434            // Test sliced
435            for (i, g) in geo_arr.slice(2, 2).iter().enumerate() {
436                assert_eq!(
437                    geoms[i + 2],
438                    g.transpose().unwrap().map(|g| g.to_multi_line_string())
439                );
440            }
441        }
442    }
443
444    #[test]
445    fn geo_round_trip2() {
446        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
447            let geo_arr = multilinestring::array(coord_type, Dimension::XY);
448            let geo_geoms = geo_arr
449                .iter()
450                .map(|x| x.transpose().unwrap().map(|g| g.to_multi_line_string()))
451                .collect::<Vec<_>>();
452
453            let typ = MultiLineStringType::new(Dimension::XY, Default::default())
454                .with_coord_type(coord_type);
455            let geo_arr2 =
456                MultiLineStringBuilder::from_nullable_multi_line_strings(&geo_geoms, typ).finish();
457            assert_eq!(geo_arr, geo_arr2);
458        }
459    }
460
461    #[test]
462    fn try_from_arrow() {
463        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
464            for dim in [
465                Dimension::XY,
466                Dimension::XYZ,
467                Dimension::XYM,
468                Dimension::XYZM,
469            ] {
470                let geo_arr = multilinestring::array(coord_type, dim);
471
472                let extension_type = geo_arr.extension_type().clone();
473                let field = extension_type.to_field("geometry", true);
474
475                let arrow_arr = geo_arr.to_array_ref();
476
477                let geo_arr2: MultiLineStringArray =
478                    (arrow_arr.as_ref(), extension_type).try_into().unwrap();
479                let geo_arr3: MultiLineStringArray =
480                    (arrow_arr.as_ref(), &field).try_into().unwrap();
481
482                assert_eq!(geo_arr, geo_arr2);
483                assert_eq!(geo_arr, geo_arr3);
484            }
485        }
486    }
487
488    #[test]
489    fn partial_eq() {
490        for dim in [
491            Dimension::XY,
492            Dimension::XYZ,
493            Dimension::XYM,
494            Dimension::XYZM,
495        ] {
496            let arr1 = multilinestring::array(CoordType::Interleaved, dim);
497            let arr2 = multilinestring::array(CoordType::Separated, dim);
498            assert_eq!(arr1, arr1);
499            assert_eq!(arr2, arr2);
500            assert_eq!(arr1, arr2);
501
502            assert_ne!(arr1, arr2.slice(0, 2));
503        }
504    }
505}