geoarrow_array/array/
multilinestring.rs

1use std::sync::Arc;
2
3use arrow_array::cast::AsArray;
4use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
5use arrow_buffer::{NullBuffer, OffsetBuffer};
6use arrow_schema::{DataType, Field};
7use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
8use geoarrow_schema::{CoordType, GeoArrowType, Metadata, MultiLineStringType};
9
10use crate::array::{CoordBuffer, GenericWkbArray, LineStringArray};
11use crate::builder::MultiLineStringBuilder;
12use crate::capacity::MultiLineStringCapacity;
13use crate::eq::offset_buffer_eq;
14use crate::scalar::MultiLineString;
15use crate::trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow};
16use crate::util::{OffsetBufferUtils, offsets_buffer_i64_to_i32};
17
18/// An immutable array of MultiLineString geometries.
19///
20/// This is semantically equivalent to `Vec<Option<MultiLineString>>` due to the internal validity
21/// bitmap.
22#[derive(Debug, Clone)]
23pub struct MultiLineStringArray {
24    pub(crate) data_type: MultiLineStringType,
25
26    pub(crate) coords: CoordBuffer,
27
28    /// Offsets into the ring array where each geometry starts
29    pub(crate) geom_offsets: OffsetBuffer<i32>,
30
31    /// Offsets into the coordinate array where each ring starts
32    pub(crate) ring_offsets: OffsetBuffer<i32>,
33
34    /// Validity bitmap
35    pub(crate) nulls: Option<NullBuffer>,
36}
37
38pub(super) fn check(
39    coords: &CoordBuffer,
40    geom_offsets: &OffsetBuffer<i32>,
41    ring_offsets: &OffsetBuffer<i32>,
42    validity_len: Option<usize>,
43) -> GeoArrowResult<()> {
44    if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) {
45        return Err(GeoArrowError::InvalidGeoArrow(
46            "nulls mask length must match the number of values".to_string(),
47        ));
48    }
49
50    if *ring_offsets.last() as usize != coords.len() {
51        return Err(GeoArrowError::InvalidGeoArrow(
52            "largest ring offset must match coords length".to_string(),
53        ));
54    }
55
56    if *geom_offsets.last() as usize != ring_offsets.len_proxy() {
57        return Err(GeoArrowError::InvalidGeoArrow(
58            "largest geometry offset must match ring offsets length".to_string(),
59        ));
60    }
61
62    Ok(())
63}
64
65impl MultiLineStringArray {
66    /// Create a new MultiLineStringArray from parts
67    ///
68    /// # Implementation
69    ///
70    /// This function is `O(1)`.
71    ///
72    /// # Panics
73    ///
74    /// - if the nulls is not `None` and its length is different from the number of geometries
75    /// - if the largest ring offset does not match the number of coordinates
76    /// - if the largest geometry offset does not match the size of ring offsets
77    pub fn new(
78        coords: CoordBuffer,
79        geom_offsets: OffsetBuffer<i32>,
80        ring_offsets: OffsetBuffer<i32>,
81        nulls: Option<NullBuffer>,
82        metadata: Arc<Metadata>,
83    ) -> Self {
84        Self::try_new(coords, geom_offsets, ring_offsets, nulls, metadata).unwrap()
85    }
86
87    /// Create a new MultiLineStringArray from parts
88    ///
89    /// # Implementation
90    ///
91    /// This function is `O(1)`.
92    ///
93    /// # Errors
94    ///
95    /// - if the nulls is not `None` and its length is different from the number of geometries
96    /// - if the largest ring offset does not match the number of coordinates
97    /// - if the largest geometry offset does not match the size of ring offsets
98    pub fn try_new(
99        coords: CoordBuffer,
100        geom_offsets: OffsetBuffer<i32>,
101        ring_offsets: OffsetBuffer<i32>,
102        nulls: Option<NullBuffer>,
103        metadata: Arc<Metadata>,
104    ) -> GeoArrowResult<Self> {
105        check(
106            &coords,
107            &geom_offsets,
108            &ring_offsets,
109            nulls.as_ref().map(|v| v.len()),
110        )?;
111        Ok(Self {
112            data_type: MultiLineStringType::new(coords.dim(), metadata)
113                .with_coord_type(coords.coord_type()),
114            coords,
115            geom_offsets,
116            ring_offsets,
117            nulls,
118        })
119    }
120
121    fn vertices_field(&self) -> Arc<Field> {
122        Field::new("vertices", self.coords.storage_type(), false).into()
123    }
124
125    fn linestrings_field(&self) -> Arc<Field> {
126        Field::new_list("linestrings", self.vertices_field(), false).into()
127    }
128
129    /// Access the underlying coordinate buffer
130    pub fn coords(&self) -> &CoordBuffer {
131        &self.coords
132    }
133
134    /// Access the underlying geometry offsets buffer
135    pub fn geom_offsets(&self) -> &OffsetBuffer<i32> {
136        &self.geom_offsets
137    }
138
139    /// Access the underlying ring offsets buffer
140    pub fn ring_offsets(&self) -> &OffsetBuffer<i32> {
141        &self.ring_offsets
142    }
143
144    /// The lengths of each buffer contained in this array.
145    pub fn buffer_lengths(&self) -> MultiLineStringCapacity {
146        MultiLineStringCapacity::new(
147            *self.ring_offsets.last() as usize,
148            *self.geom_offsets.last() as usize,
149            self.len(),
150        )
151    }
152
153    /// The number of bytes occupied by this array.
154    pub fn num_bytes(&self) -> usize {
155        let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0);
156        validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension())
157    }
158
159    /// Slice this [`MultiLineStringArray`].
160    ///
161    /// # Panic
162    ///
163    /// This function panics iff `offset + length > self.len()`.
164    #[inline]
165    pub fn slice(&self, offset: usize, length: usize) -> Self {
166        assert!(
167            offset + length <= self.len(),
168            "offset + length may not exceed length of array"
169        );
170        // Note: we **only** slice the geom_offsets and not any actual data. Otherwise the offsets
171        // would be in the wrong location.
172        Self {
173            data_type: self.data_type.clone(),
174            coords: self.coords.clone(),
175            geom_offsets: self.geom_offsets.slice(offset, length),
176            ring_offsets: self.ring_offsets.clone(),
177            nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)),
178        }
179    }
180
181    /// Change the [`CoordType`] of this array.
182    pub fn into_coord_type(self, coord_type: CoordType) -> Self {
183        Self {
184            data_type: self.data_type.with_coord_type(coord_type),
185            coords: self.coords.into_coord_type(coord_type),
186            ..self
187        }
188    }
189
190    /// Change the [`Metadata`] of this array.
191    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
192        Self {
193            data_type: self.data_type.with_metadata(metadata),
194            ..self
195        }
196    }
197}
198
199impl GeoArrowArray for MultiLineStringArray {
200    fn as_any(&self) -> &dyn std::any::Any {
201        self
202    }
203
204    fn into_array_ref(self) -> ArrayRef {
205        Arc::new(self.into_arrow())
206    }
207
208    fn to_array_ref(&self) -> ArrayRef {
209        self.clone().into_array_ref()
210    }
211
212    #[inline]
213    fn len(&self) -> usize {
214        self.geom_offsets.len_proxy()
215    }
216
217    #[inline]
218    fn logical_nulls(&self) -> Option<NullBuffer> {
219        self.nulls.clone()
220    }
221
222    #[inline]
223    fn logical_null_count(&self) -> usize {
224        self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0)
225    }
226
227    #[inline]
228    fn is_null(&self, i: usize) -> bool {
229        self.nulls
230            .as_ref()
231            .map(|n| n.is_null(i))
232            .unwrap_or_default()
233    }
234
235    fn data_type(&self) -> GeoArrowType {
236        GeoArrowType::MultiLineString(self.data_type.clone())
237    }
238
239    fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray> {
240        Arc::new(self.slice(offset, length))
241    }
242
243    fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray> {
244        Arc::new(self.with_metadata(metadata))
245    }
246}
247
248impl<'a> GeoArrowArrayAccessor<'a> for MultiLineStringArray {
249    type Item = MultiLineString<'a>;
250
251    unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
252        Ok(MultiLineString::new(
253            &self.coords,
254            &self.geom_offsets,
255            &self.ring_offsets,
256            index,
257        ))
258    }
259}
260
261impl IntoArrow for MultiLineStringArray {
262    type ArrowArray = GenericListArray<i32>;
263    type ExtensionType = MultiLineStringType;
264
265    fn into_arrow(self) -> Self::ArrowArray {
266        let vertices_field = self.vertices_field();
267        let linestrings_field = self.linestrings_field();
268        let nulls = self.nulls;
269        let coord_array = self.coords.into_array_ref();
270        let ring_array = Arc::new(GenericListArray::new(
271            vertices_field,
272            self.ring_offsets,
273            coord_array,
274            None,
275        ));
276        GenericListArray::new(linestrings_field, self.geom_offsets, ring_array, nulls)
277    }
278
279    fn extension_type(&self) -> &Self::ExtensionType {
280        &self.data_type
281    }
282}
283
284impl TryFrom<(&GenericListArray<i32>, MultiLineStringType)> for MultiLineStringArray {
285    type Error = GeoArrowError;
286
287    fn try_from(
288        (geom_array, typ): (&GenericListArray<i32>, MultiLineStringType),
289    ) -> GeoArrowResult<Self> {
290        let geom_offsets = geom_array.offsets();
291        let nulls = geom_array.nulls();
292
293        let rings_dyn_array = geom_array.values();
294        let rings_array = rings_dyn_array.as_list::<i32>();
295
296        let ring_offsets = rings_array.offsets();
297        let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?;
298
299        Ok(Self::new(
300            coords,
301            geom_offsets.clone(),
302            ring_offsets.clone(),
303            nulls.cloned(),
304            typ.metadata().clone(),
305        ))
306    }
307}
308
309impl TryFrom<(&GenericListArray<i64>, MultiLineStringType)> for MultiLineStringArray {
310    type Error = GeoArrowError;
311
312    fn try_from(
313        (geom_array, typ): (&GenericListArray<i64>, MultiLineStringType),
314    ) -> GeoArrowResult<Self> {
315        let geom_offsets = offsets_buffer_i64_to_i32(geom_array.offsets())?;
316        let nulls = geom_array.nulls();
317
318        let rings_dyn_array = geom_array.values();
319        let rings_array = rings_dyn_array.as_list::<i64>();
320
321        let ring_offsets = offsets_buffer_i64_to_i32(rings_array.offsets())?;
322        let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?;
323
324        Ok(Self::new(
325            coords,
326            geom_offsets.clone(),
327            ring_offsets.clone(),
328            nulls.cloned(),
329            typ.metadata().clone(),
330        ))
331    }
332}
333
334impl TryFrom<(&dyn Array, MultiLineStringType)> for MultiLineStringArray {
335    type Error = GeoArrowError;
336
337    fn try_from((value, typ): (&dyn Array, MultiLineStringType)) -> GeoArrowResult<Self> {
338        match value.data_type() {
339            DataType::List(_) => (value.as_list::<i32>(), typ).try_into(),
340            DataType::LargeList(_) => (value.as_list::<i64>(), typ).try_into(),
341            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
342                "Unexpected MultiLineString DataType: {dt:?}",
343            ))),
344        }
345    }
346}
347
348impl TryFrom<(&dyn Array, &Field)> for MultiLineStringArray {
349    type Error = GeoArrowError;
350
351    fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult<Self> {
352        let typ = field.try_extension_type::<MultiLineStringType>()?;
353        (arr, typ).try_into()
354    }
355}
356
357impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, MultiLineStringType)>
358    for MultiLineStringArray
359{
360    type Error = GeoArrowError;
361
362    fn try_from(value: (GenericWkbArray<O>, MultiLineStringType)) -> GeoArrowResult<Self> {
363        let mut_arr: MultiLineStringBuilder = value.try_into()?;
364        Ok(mut_arr.finish())
365    }
366}
367
368impl From<LineStringArray> for MultiLineStringArray {
369    fn from(value: LineStringArray) -> Self {
370        let (coord_type, dimension, metadata) = value.data_type.into_inner();
371        let new_type = MultiLineStringType::new(dimension, metadata).with_coord_type(coord_type);
372
373        let coords = value.coords;
374        let geom_offsets = OffsetBuffer::from_lengths(vec![1; coords.len()]);
375        let ring_offsets = value.geom_offsets;
376        let nulls = value.nulls;
377        Self {
378            data_type: new_type,
379            coords,
380            geom_offsets,
381            ring_offsets,
382            nulls,
383        }
384    }
385}
386
387impl PartialEq for MultiLineStringArray {
388    fn eq(&self, other: &Self) -> bool {
389        self.nulls == other.nulls
390            && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets)
391            && offset_buffer_eq(&self.ring_offsets, &other.ring_offsets)
392            && self.coords == other.coords
393    }
394}
395
396#[cfg(test)]
397mod test {
398    use geo_traits::to_geo::ToGeoMultiLineString;
399    use geoarrow_schema::{CoordType, Dimension};
400
401    use super::*;
402    use crate::test::multilinestring;
403
404    #[test]
405    fn geo_round_trip() {
406        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
407            let geoms = [
408                Some(multilinestring::ml0()),
409                None,
410                Some(multilinestring::ml1()),
411                None,
412            ];
413            let typ = MultiLineStringType::new(Dimension::XY, Default::default())
414                .with_coord_type(coord_type);
415            let geo_arr =
416                MultiLineStringBuilder::from_nullable_multi_line_strings(&geoms, typ).finish();
417
418            for (i, g) in geo_arr.iter().enumerate() {
419                assert_eq!(
420                    geoms[i],
421                    g.transpose().unwrap().map(|g| g.to_multi_line_string())
422                );
423            }
424
425            // Test sliced
426            for (i, g) in geo_arr.slice(2, 2).iter().enumerate() {
427                assert_eq!(
428                    geoms[i + 2],
429                    g.transpose().unwrap().map(|g| g.to_multi_line_string())
430                );
431            }
432        }
433    }
434
435    #[test]
436    fn geo_round_trip2() {
437        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
438            let geo_arr = multilinestring::array(coord_type, Dimension::XY);
439            let geo_geoms = geo_arr
440                .iter()
441                .map(|x| x.transpose().unwrap().map(|g| g.to_multi_line_string()))
442                .collect::<Vec<_>>();
443
444            let typ = MultiLineStringType::new(Dimension::XY, Default::default())
445                .with_coord_type(coord_type);
446            let geo_arr2 =
447                MultiLineStringBuilder::from_nullable_multi_line_strings(&geo_geoms, typ).finish();
448            assert_eq!(geo_arr, geo_arr2);
449        }
450    }
451
452    #[test]
453    fn try_from_arrow() {
454        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
455            for dim in [
456                Dimension::XY,
457                Dimension::XYZ,
458                Dimension::XYM,
459                Dimension::XYZM,
460            ] {
461                let geo_arr = multilinestring::array(coord_type, dim);
462
463                let extension_type = geo_arr.extension_type().clone();
464                let field = extension_type.to_field("geometry", true);
465
466                let arrow_arr = geo_arr.to_array_ref();
467
468                let geo_arr2: MultiLineStringArray =
469                    (arrow_arr.as_ref(), extension_type).try_into().unwrap();
470                let geo_arr3: MultiLineStringArray =
471                    (arrow_arr.as_ref(), &field).try_into().unwrap();
472
473                assert_eq!(geo_arr, geo_arr2);
474                assert_eq!(geo_arr, geo_arr3);
475            }
476        }
477    }
478
479    #[test]
480    fn partial_eq() {
481        for dim in [
482            Dimension::XY,
483            Dimension::XYZ,
484            Dimension::XYM,
485            Dimension::XYZM,
486        ] {
487            let arr1 = multilinestring::array(CoordType::Interleaved, dim);
488            let arr2 = multilinestring::array(CoordType::Separated, dim);
489            assert_eq!(arr1, arr1);
490            assert_eq!(arr2, arr2);
491            assert_eq!(arr1, arr2);
492
493            assert_ne!(arr1, arr2.slice(0, 2));
494        }
495    }
496}