geoarrow_array/array/
multilinestring.rs

1use std::sync::Arc;
2
3use arrow_array::cast::AsArray;
4use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
5use arrow_buffer::{NullBuffer, OffsetBuffer};
6use arrow_schema::{DataType, Field};
7use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
8use geoarrow_schema::type_id::GeometryTypeId;
9use geoarrow_schema::{CoordType, Dimension, GeoArrowType, Metadata, MultiLineStringType};
10
11use crate::array::{CoordBuffer, GenericWkbArray, LineStringArray};
12use crate::builder::MultiLineStringBuilder;
13use crate::capacity::MultiLineStringCapacity;
14use crate::eq::offset_buffer_eq;
15use crate::scalar::MultiLineString;
16use crate::trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow};
17use crate::util::{OffsetBufferUtils, offsets_buffer_i64_to_i32};
18
19/// An immutable array of MultiLineString geometries.
20///
21/// This is semantically equivalent to `Vec<Option<MultiLineString>>` due to the internal validity
22/// bitmap.
23#[derive(Debug, Clone)]
24pub struct MultiLineStringArray {
25    pub(crate) data_type: MultiLineStringType,
26
27    pub(crate) coords: CoordBuffer,
28
29    /// Offsets into the ring array where each geometry starts
30    pub(crate) geom_offsets: OffsetBuffer<i32>,
31
32    /// Offsets into the coordinate array where each ring starts
33    pub(crate) ring_offsets: OffsetBuffer<i32>,
34
35    /// Validity bitmap
36    pub(crate) nulls: Option<NullBuffer>,
37}
38
39pub(super) fn check(
40    coords: &CoordBuffer,
41    geom_offsets: &OffsetBuffer<i32>,
42    ring_offsets: &OffsetBuffer<i32>,
43    validity_len: Option<usize>,
44) -> GeoArrowResult<()> {
45    if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) {
46        return Err(GeoArrowError::InvalidGeoArrow(
47            "nulls mask length must match the number of values".to_string(),
48        ));
49    }
50
51    if *ring_offsets.last() as usize != coords.len() {
52        return Err(GeoArrowError::InvalidGeoArrow(
53            "largest ring offset must match coords length".to_string(),
54        ));
55    }
56
57    // Offset can be smaller than length if sliced
58    if *geom_offsets.last() as usize > ring_offsets.len_proxy() {
59        return Err(GeoArrowError::InvalidGeoArrow(
60            "largest geometry offset must not be longer than ring offsets length".to_string(),
61        ));
62    }
63
64    Ok(())
65}
66
67impl MultiLineStringArray {
68    /// Create a new MultiLineStringArray from parts
69    ///
70    /// # Implementation
71    ///
72    /// This function is `O(1)`.
73    ///
74    /// # Panics
75    ///
76    /// - if the nulls is not `None` and its length is different from the number of geometries
77    /// - if the largest ring offset does not match the number of coordinates
78    /// - if the largest geometry offset does not match the size of ring offsets
79    pub fn new(
80        coords: CoordBuffer,
81        geom_offsets: OffsetBuffer<i32>,
82        ring_offsets: OffsetBuffer<i32>,
83        nulls: Option<NullBuffer>,
84        metadata: Arc<Metadata>,
85    ) -> Self {
86        Self::try_new(coords, geom_offsets, ring_offsets, nulls, metadata).unwrap()
87    }
88
89    /// Create a new MultiLineStringArray from parts
90    ///
91    /// # Implementation
92    ///
93    /// This function is `O(1)`.
94    ///
95    /// # Errors
96    ///
97    /// - if the nulls is not `None` and its length is different from the number of geometries
98    /// - if the largest ring offset does not match the number of coordinates
99    /// - if the largest geometry offset does not match the size of ring offsets
100    pub fn try_new(
101        coords: CoordBuffer,
102        geom_offsets: OffsetBuffer<i32>,
103        ring_offsets: OffsetBuffer<i32>,
104        nulls: Option<NullBuffer>,
105        metadata: Arc<Metadata>,
106    ) -> GeoArrowResult<Self> {
107        check(
108            &coords,
109            &geom_offsets,
110            &ring_offsets,
111            nulls.as_ref().map(|v| v.len()),
112        )?;
113        Ok(Self {
114            data_type: MultiLineStringType::new(coords.dim(), metadata)
115                .with_coord_type(coords.coord_type()),
116            coords,
117            geom_offsets,
118            ring_offsets,
119            nulls,
120        })
121    }
122
123    fn vertices_field(&self) -> Arc<Field> {
124        Field::new("vertices", self.coords.storage_type(), false).into()
125    }
126
127    fn linestrings_field(&self) -> Arc<Field> {
128        Field::new_list("linestrings", self.vertices_field(), false).into()
129    }
130
131    /// Access the underlying coordinate buffer
132    pub fn coords(&self) -> &CoordBuffer {
133        &self.coords
134    }
135
136    /// Access the underlying geometry offsets buffer
137    pub fn geom_offsets(&self) -> &OffsetBuffer<i32> {
138        &self.geom_offsets
139    }
140
141    /// Access the underlying ring offsets buffer
142    pub fn ring_offsets(&self) -> &OffsetBuffer<i32> {
143        &self.ring_offsets
144    }
145
146    /// The lengths of each buffer contained in this array.
147    pub fn buffer_lengths(&self) -> MultiLineStringCapacity {
148        MultiLineStringCapacity::new(
149            *self.ring_offsets.last() as usize,
150            *self.geom_offsets.last() as usize,
151            self.len(),
152        )
153    }
154
155    /// The number of bytes occupied by this array.
156    pub fn num_bytes(&self) -> usize {
157        let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0);
158        validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension())
159    }
160
161    /// Slice this [`MultiLineStringArray`].
162    ///
163    /// # Panic
164    ///
165    /// This function panics iff `offset + length > self.len()`.
166    #[inline]
167    pub fn slice(&self, offset: usize, length: usize) -> Self {
168        assert!(
169            offset + length <= self.len(),
170            "offset + length may not exceed length of array"
171        );
172        // Note: we **only** slice the geom_offsets and not any actual data. Otherwise the offsets
173        // would be in the wrong location.
174        Self {
175            data_type: self.data_type.clone(),
176            coords: self.coords.clone(),
177            geom_offsets: self.geom_offsets.slice(offset, length),
178            ring_offsets: self.ring_offsets.clone(),
179            nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)),
180        }
181    }
182
183    /// Change the [`CoordType`] of this array.
184    pub fn into_coord_type(self, coord_type: CoordType) -> Self {
185        Self {
186            data_type: self.data_type.with_coord_type(coord_type),
187            coords: self.coords.into_coord_type(coord_type),
188            ..self
189        }
190    }
191
192    /// Change the [`Metadata`] of this array.
193    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
194        Self {
195            data_type: self.data_type.with_metadata(metadata),
196            ..self
197        }
198    }
199}
200
201impl GeoArrowArray for MultiLineStringArray {
202    fn as_any(&self) -> &dyn std::any::Any {
203        self
204    }
205
206    fn into_array_ref(self) -> ArrayRef {
207        Arc::new(self.into_arrow())
208    }
209
210    fn to_array_ref(&self) -> ArrayRef {
211        self.clone().into_array_ref()
212    }
213
214    #[inline]
215    fn len(&self) -> usize {
216        self.geom_offsets.len_proxy()
217    }
218
219    #[inline]
220    fn logical_nulls(&self) -> Option<NullBuffer> {
221        self.nulls.clone()
222    }
223
224    #[inline]
225    fn logical_null_count(&self) -> usize {
226        self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0)
227    }
228
229    #[inline]
230    fn is_null(&self, i: usize) -> bool {
231        self.nulls
232            .as_ref()
233            .map(|n| n.is_null(i))
234            .unwrap_or_default()
235    }
236
237    fn data_type(&self) -> GeoArrowType {
238        GeoArrowType::MultiLineString(self.data_type.clone())
239    }
240
241    fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray> {
242        Arc::new(self.slice(offset, length))
243    }
244
245    fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray> {
246        Arc::new(self.with_metadata(metadata))
247    }
248}
249
250impl<'a> GeoArrowArrayAccessor<'a> for MultiLineStringArray {
251    type Item = MultiLineString<'a>;
252
253    unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
254        Ok(MultiLineString::new(
255            &self.coords,
256            &self.geom_offsets,
257            &self.ring_offsets,
258            index,
259        ))
260    }
261}
262
263impl IntoArrow for MultiLineStringArray {
264    type ArrowArray = GenericListArray<i32>;
265    type ExtensionType = MultiLineStringType;
266
267    fn into_arrow(self) -> Self::ArrowArray {
268        let vertices_field = self.vertices_field();
269        let linestrings_field = self.linestrings_field();
270        let nulls = self.nulls;
271        let coord_array = self.coords.into_array_ref();
272        let ring_array = Arc::new(GenericListArray::new(
273            vertices_field,
274            self.ring_offsets,
275            coord_array,
276            None,
277        ));
278        GenericListArray::new(linestrings_field, self.geom_offsets, ring_array, nulls)
279    }
280
281    fn extension_type(&self) -> &Self::ExtensionType {
282        &self.data_type
283    }
284}
285
286impl TryFrom<(&GenericListArray<i32>, MultiLineStringType)> for MultiLineStringArray {
287    type Error = GeoArrowError;
288
289    fn try_from(
290        (geom_array, typ): (&GenericListArray<i32>, MultiLineStringType),
291    ) -> GeoArrowResult<Self> {
292        let geom_offsets = geom_array.offsets();
293        let nulls = geom_array.nulls();
294
295        let rings_dyn_array = geom_array.values();
296        let rings_array = rings_dyn_array.as_list::<i32>();
297
298        let ring_offsets = rings_array.offsets();
299        let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?;
300
301        Ok(Self::new(
302            coords,
303            geom_offsets.clone(),
304            ring_offsets.clone(),
305            nulls.cloned(),
306            typ.metadata().clone(),
307        ))
308    }
309}
310
311impl TryFrom<(&GenericListArray<i64>, MultiLineStringType)> for MultiLineStringArray {
312    type Error = GeoArrowError;
313
314    fn try_from(
315        (geom_array, typ): (&GenericListArray<i64>, MultiLineStringType),
316    ) -> GeoArrowResult<Self> {
317        let geom_offsets = offsets_buffer_i64_to_i32(geom_array.offsets())?;
318        let nulls = geom_array.nulls();
319
320        let rings_dyn_array = geom_array.values();
321        let rings_array = rings_dyn_array.as_list::<i64>();
322
323        let ring_offsets = offsets_buffer_i64_to_i32(rings_array.offsets())?;
324        let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?;
325
326        Ok(Self::new(
327            coords,
328            geom_offsets.clone(),
329            ring_offsets.clone(),
330            nulls.cloned(),
331            typ.metadata().clone(),
332        ))
333    }
334}
335
336impl TryFrom<(&dyn Array, MultiLineStringType)> for MultiLineStringArray {
337    type Error = GeoArrowError;
338
339    fn try_from((value, typ): (&dyn Array, MultiLineStringType)) -> GeoArrowResult<Self> {
340        match value.data_type() {
341            DataType::List(_) => (value.as_list::<i32>(), typ).try_into(),
342            DataType::LargeList(_) => (value.as_list::<i64>(), typ).try_into(),
343            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
344                "Unexpected MultiLineString DataType: {dt:?}",
345            ))),
346        }
347    }
348}
349
350impl TryFrom<(&dyn Array, &Field)> for MultiLineStringArray {
351    type Error = GeoArrowError;
352
353    fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult<Self> {
354        let typ = field.try_extension_type::<MultiLineStringType>()?;
355        (arr, typ).try_into()
356    }
357}
358
359impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, MultiLineStringType)>
360    for MultiLineStringArray
361{
362    type Error = GeoArrowError;
363
364    fn try_from(value: (GenericWkbArray<O>, MultiLineStringType)) -> GeoArrowResult<Self> {
365        let mut_arr: MultiLineStringBuilder = value.try_into()?;
366        Ok(mut_arr.finish())
367    }
368}
369
370impl From<LineStringArray> for MultiLineStringArray {
371    fn from(value: LineStringArray) -> Self {
372        let (coord_type, dimension, metadata) = value.data_type.into_inner();
373        let new_type = MultiLineStringType::new(dimension, metadata).with_coord_type(coord_type);
374
375        let coords = value.coords;
376        let geom_offsets = OffsetBuffer::from_lengths(vec![1; coords.len()]);
377        let ring_offsets = value.geom_offsets;
378        let nulls = value.nulls;
379        Self {
380            data_type: new_type,
381            coords,
382            geom_offsets,
383            ring_offsets,
384            nulls,
385        }
386    }
387}
388
389impl PartialEq for MultiLineStringArray {
390    fn eq(&self, other: &Self) -> bool {
391        self.nulls == other.nulls
392            && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets)
393            && offset_buffer_eq(&self.ring_offsets, &other.ring_offsets)
394            && self.coords == other.coords
395    }
396}
397
398impl GeometryTypeId for MultiLineStringArray {
399    const GEOMETRY_TYPE_OFFSET: i8 = 5;
400
401    fn dimension(&self) -> Dimension {
402        self.data_type.dimension()
403    }
404}
405
406#[cfg(test)]
407mod test {
408    use geo_traits::to_geo::ToGeoMultiLineString;
409    use geoarrow_schema::{CoordType, Dimension};
410
411    use super::*;
412    use crate::test::multilinestring;
413
414    #[test]
415    fn geo_round_trip() {
416        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
417            let geoms = [
418                Some(multilinestring::ml0()),
419                None,
420                Some(multilinestring::ml1()),
421                None,
422            ];
423            let typ = MultiLineStringType::new(Dimension::XY, Default::default())
424                .with_coord_type(coord_type);
425            let geo_arr =
426                MultiLineStringBuilder::from_nullable_multi_line_strings(&geoms, typ).finish();
427
428            for (i, g) in geo_arr.iter().enumerate() {
429                assert_eq!(
430                    geoms[i],
431                    g.transpose().unwrap().map(|g| g.to_multi_line_string())
432                );
433            }
434
435            // Test sliced
436            for (i, g) in geo_arr.slice(2, 2).iter().enumerate() {
437                assert_eq!(
438                    geoms[i + 2],
439                    g.transpose().unwrap().map(|g| g.to_multi_line_string())
440                );
441            }
442        }
443    }
444
445    #[test]
446    fn geo_round_trip2() {
447        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
448            let geo_arr = multilinestring::array(coord_type, Dimension::XY);
449            let geo_geoms = geo_arr
450                .iter()
451                .map(|x| x.transpose().unwrap().map(|g| g.to_multi_line_string()))
452                .collect::<Vec<_>>();
453
454            let typ = MultiLineStringType::new(Dimension::XY, Default::default())
455                .with_coord_type(coord_type);
456            let geo_arr2 =
457                MultiLineStringBuilder::from_nullable_multi_line_strings(&geo_geoms, typ).finish();
458            assert_eq!(geo_arr, geo_arr2);
459        }
460    }
461
462    #[test]
463    fn try_from_arrow() {
464        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
465            for dim in [
466                Dimension::XY,
467                Dimension::XYZ,
468                Dimension::XYM,
469                Dimension::XYZM,
470            ] {
471                let geo_arr = multilinestring::array(coord_type, dim);
472
473                let extension_type = geo_arr.extension_type().clone();
474                let field = extension_type.to_field("geometry", true);
475
476                let arrow_arr = geo_arr.to_array_ref();
477
478                let geo_arr2: MultiLineStringArray =
479                    (arrow_arr.as_ref(), extension_type).try_into().unwrap();
480                let geo_arr3: MultiLineStringArray =
481                    (arrow_arr.as_ref(), &field).try_into().unwrap();
482
483                assert_eq!(geo_arr, geo_arr2);
484                assert_eq!(geo_arr, geo_arr3);
485            }
486        }
487    }
488
489    #[test]
490    fn partial_eq() {
491        for dim in [
492            Dimension::XY,
493            Dimension::XYZ,
494            Dimension::XYM,
495            Dimension::XYZM,
496        ] {
497            let arr1 = multilinestring::array(CoordType::Interleaved, dim);
498            let arr2 = multilinestring::array(CoordType::Separated, dim);
499            assert_eq!(arr1, arr1);
500            assert_eq!(arr2, arr2);
501            assert_eq!(arr1, arr2);
502
503            assert_ne!(arr1, arr2.slice(0, 2));
504        }
505    }
506
507    #[test]
508    fn test_validation_with_sliced_array() {
509        let arr = multilinestring::array(CoordType::Interleaved, Dimension::XY);
510        let sliced = arr.slice(0, 1);
511
512        let back = MultiLineStringArray::try_from((
513            sliced.to_array_ref().as_ref(),
514            arr.extension_type().clone(),
515        ))
516        .unwrap();
517        assert_eq!(back.len(), 1);
518    }
519
520    #[test]
521    fn test_validation_with_array_sliced_by_arrow_rs() {
522        let arr = multilinestring::array(CoordType::Interleaved, Dimension::XY);
523        let sliced = arr.to_array_ref().slice(0, 1);
524
525        let back = MultiLineStringArray::try_from((sliced.as_ref(), arr.extension_type().clone()))
526            .unwrap();
527        assert_eq!(back.len(), 1);
528    }
529}