geoarrow_array/array/
linestring.rs

1use std::sync::Arc;
2
3use arrow_array::cast::AsArray;
4use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
5use arrow_buffer::{NullBuffer, OffsetBuffer};
6use arrow_schema::{DataType, Field};
7use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
8use geoarrow_schema::type_id::GeometryTypeId;
9use geoarrow_schema::{CoordType, Dimension, GeoArrowType, LineStringType, Metadata};
10
11use crate::array::{CoordBuffer, GenericWkbArray};
12use crate::builder::LineStringBuilder;
13use crate::capacity::LineStringCapacity;
14use crate::eq::offset_buffer_eq;
15use crate::scalar::LineString;
16use crate::trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow};
17use crate::util::{OffsetBufferUtils, offsets_buffer_i64_to_i32};
18
19/// An immutable array of LineString geometries.
20///
21/// This is semantically equivalent to `Vec<Option<LineString>>` due to the internal validity
22/// bitmap.
23#[derive(Debug, Clone)]
24pub struct LineStringArray {
25    pub(crate) data_type: LineStringType,
26
27    pub(crate) coords: CoordBuffer,
28
29    /// Offsets into the coordinate array where each geometry starts
30    pub(crate) geom_offsets: OffsetBuffer<i32>,
31
32    /// Validity bitmap
33    pub(crate) nulls: Option<NullBuffer>,
34}
35
36pub(super) fn check(
37    coords: &CoordBuffer,
38    validity_len: Option<usize>,
39    geom_offsets: &OffsetBuffer<i32>,
40) -> GeoArrowResult<()> {
41    if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) {
42        return Err(GeoArrowError::InvalidGeoArrow(
43            "nulls mask length must match the number of values".to_string(),
44        ));
45    }
46
47    // Offset can be smaller than coords length if sliced
48    if *geom_offsets.last() as usize > coords.len() {
49        return Err(GeoArrowError::InvalidGeoArrow(
50            "largest geometry offset must not be longer than coords length".to_string(),
51        ));
52    }
53
54    Ok(())
55}
56
57impl LineStringArray {
58    /// Create a new LineStringArray from parts
59    ///
60    /// # Implementation
61    ///
62    /// This function is `O(1)`.
63    ///
64    /// # Panics
65    ///
66    /// - if the nulls is not `None` and its length is different from the number of geometries
67    /// - if the largest geometry offset does not match the number of coordinates
68    pub fn new(
69        coords: CoordBuffer,
70        geom_offsets: OffsetBuffer<i32>,
71        nulls: Option<NullBuffer>,
72        metadata: Arc<Metadata>,
73    ) -> Self {
74        Self::try_new(coords, geom_offsets, nulls, metadata).unwrap()
75    }
76
77    /// Create a new LineStringArray from parts
78    ///
79    /// # Implementation
80    ///
81    /// This function is `O(1)`.
82    ///
83    /// # Errors
84    ///
85    /// - if the nulls buffer does not have the same length as the number of geometries
86    /// - if the geometry offsets do not match the number of coordinates
87    pub fn try_new(
88        coords: CoordBuffer,
89        geom_offsets: OffsetBuffer<i32>,
90        nulls: Option<NullBuffer>,
91        metadata: Arc<Metadata>,
92    ) -> GeoArrowResult<Self> {
93        check(&coords, nulls.as_ref().map(|v| v.len()), &geom_offsets)?;
94        Ok(Self {
95            data_type: LineStringType::new(coords.dim(), metadata)
96                .with_coord_type(coords.coord_type()),
97            coords,
98            geom_offsets,
99            nulls,
100        })
101    }
102
103    /// Access the underlying coordinate buffer
104    pub fn coords(&self) -> &CoordBuffer {
105        &self.coords
106    }
107
108    /// Access the underlying geometry offsets buffer
109    pub fn geom_offsets(&self) -> &OffsetBuffer<i32> {
110        &self.geom_offsets
111    }
112
113    /// The lengths of each buffer contained in this array.
114    pub fn buffer_lengths(&self) -> LineStringCapacity {
115        LineStringCapacity::new(*self.geom_offsets.last() as usize, self.len())
116    }
117
118    /// The number of bytes occupied by this array.
119    pub fn num_bytes(&self) -> usize {
120        let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0);
121        validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension())
122    }
123
124    /// Slice this [`LineStringArray`].
125    ///
126    /// # Implementation
127    ///
128    /// This operation is `O(1)` as it amounts to increasing a few ref counts.
129    ///
130    /// # Panic
131    ///
132    /// This function panics iff `offset + length > self.len()`.
133    #[inline]
134    pub fn slice(&self, offset: usize, length: usize) -> Self {
135        assert!(
136            offset + length <= self.len(),
137            "offset + length may not exceed length of array"
138        );
139        // Note: we **only** slice the geom_offsets and not any actual data. Otherwise the offsets
140        // would be in the wrong location.
141        Self {
142            data_type: self.data_type.clone(),
143            coords: self.coords.clone(),
144            geom_offsets: self.geom_offsets.slice(offset, length),
145            nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)),
146        }
147    }
148
149    /// Change the [`CoordType`] of this array.
150    pub fn into_coord_type(self, coord_type: CoordType) -> Self {
151        Self {
152            data_type: self.data_type.with_coord_type(coord_type),
153            coords: self.coords.into_coord_type(coord_type),
154            ..self
155        }
156    }
157
158    /// Change the [`Metadata`] of this array.
159    pub fn with_metadata(self, metadata: Arc<Metadata>) -> Self {
160        Self {
161            data_type: self.data_type.with_metadata(metadata),
162            ..self
163        }
164    }
165}
166
167impl GeoArrowArray for LineStringArray {
168    fn as_any(&self) -> &dyn std::any::Any {
169        self
170    }
171
172    fn into_array_ref(self) -> ArrayRef {
173        Arc::new(self.into_arrow())
174    }
175
176    fn to_array_ref(&self) -> ArrayRef {
177        self.clone().into_array_ref()
178    }
179
180    #[inline]
181    fn len(&self) -> usize {
182        self.geom_offsets.len_proxy()
183    }
184
185    #[inline]
186    fn logical_nulls(&self) -> Option<NullBuffer> {
187        self.nulls.clone()
188    }
189
190    #[inline]
191    fn logical_null_count(&self) -> usize {
192        self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0)
193    }
194
195    fn is_null(&self, i: usize) -> bool {
196        self.nulls
197            .as_ref()
198            .map(|n| n.is_null(i))
199            .unwrap_or_default()
200    }
201
202    fn data_type(&self) -> GeoArrowType {
203        GeoArrowType::LineString(self.data_type.clone())
204    }
205
206    fn slice(&self, offset: usize, length: usize) -> Arc<dyn GeoArrowArray> {
207        Arc::new(self.slice(offset, length))
208    }
209
210    fn with_metadata(self, metadata: Arc<Metadata>) -> Arc<dyn GeoArrowArray> {
211        Arc::new(self.with_metadata(metadata))
212    }
213}
214
215impl<'a> GeoArrowArrayAccessor<'a> for LineStringArray {
216    type Item = LineString<'a>;
217
218    unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult<Self::Item> {
219        Ok(LineString::new(&self.coords, &self.geom_offsets, index))
220    }
221}
222
223impl IntoArrow for LineStringArray {
224    type ArrowArray = GenericListArray<i32>;
225    type ExtensionType = LineStringType;
226
227    fn into_arrow(self) -> Self::ArrowArray {
228        let vertices_field = match self.data_type.data_type() {
229            DataType::List(inner_field) => inner_field,
230            _ => unreachable!(),
231        };
232        let nulls = self.nulls;
233        let coord_array = self.coords.into_array_ref();
234        GenericListArray::new(vertices_field, self.geom_offsets, coord_array, nulls)
235    }
236
237    fn extension_type(&self) -> &Self::ExtensionType {
238        &self.data_type
239    }
240}
241
242impl TryFrom<(&GenericListArray<i32>, LineStringType)> for LineStringArray {
243    type Error = GeoArrowError;
244
245    fn try_from((value, typ): (&GenericListArray<i32>, LineStringType)) -> GeoArrowResult<Self> {
246        let coords = CoordBuffer::from_arrow(value.values().as_ref(), typ.dimension())?;
247        let geom_offsets = value.offsets();
248        let nulls = value.nulls();
249
250        Ok(Self::new(
251            coords,
252            geom_offsets.clone(),
253            nulls.cloned(),
254            typ.metadata().clone(),
255        ))
256    }
257}
258
259impl TryFrom<(&GenericListArray<i64>, LineStringType)> for LineStringArray {
260    type Error = GeoArrowError;
261
262    fn try_from((value, typ): (&GenericListArray<i64>, LineStringType)) -> GeoArrowResult<Self> {
263        let coords = CoordBuffer::from_arrow(value.values().as_ref(), typ.dimension())?;
264        let geom_offsets = offsets_buffer_i64_to_i32(value.offsets())?;
265        let nulls = value.nulls();
266
267        Ok(Self::new(
268            coords,
269            geom_offsets,
270            nulls.cloned(),
271            typ.metadata().clone(),
272        ))
273    }
274}
275impl TryFrom<(&dyn Array, LineStringType)> for LineStringArray {
276    type Error = GeoArrowError;
277
278    fn try_from((value, typ): (&dyn Array, LineStringType)) -> GeoArrowResult<Self> {
279        match value.data_type() {
280            DataType::List(_) => (value.as_list::<i32>(), typ).try_into(),
281            DataType::LargeList(_) => (value.as_list::<i64>(), typ).try_into(),
282            dt => Err(GeoArrowError::InvalidGeoArrow(format!(
283                "Unexpected LineString DataType: {dt:?}",
284            ))),
285        }
286    }
287}
288
289impl TryFrom<(&dyn Array, &Field)> for LineStringArray {
290    type Error = GeoArrowError;
291
292    fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult<Self> {
293        let typ = field.try_extension_type::<LineStringType>()?;
294        (arr, typ).try_into()
295    }
296}
297
298impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, LineStringType)> for LineStringArray {
299    type Error = GeoArrowError;
300
301    fn try_from(value: (GenericWkbArray<O>, LineStringType)) -> GeoArrowResult<Self> {
302        let mut_arr: LineStringBuilder = value.try_into()?;
303        Ok(mut_arr.finish())
304    }
305}
306
307impl PartialEq for LineStringArray {
308    fn eq(&self, other: &Self) -> bool {
309        self.nulls == other.nulls
310            && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets)
311            && self.coords == other.coords
312    }
313}
314
315impl GeometryTypeId for LineStringArray {
316    const GEOMETRY_TYPE_OFFSET: i8 = 2;
317
318    fn dimension(&self) -> Dimension {
319        self.data_type.dimension()
320    }
321}
322
323#[cfg(test)]
324mod test {
325    use arrow_array::RecordBatch;
326    use arrow_schema::Schema;
327    use geo_traits::to_geo::ToGeoLineString;
328    use geoarrow_schema::{CoordType, Dimension};
329
330    use super::*;
331    use crate::test::linestring;
332
333    #[test]
334    fn geo_round_trip() {
335        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
336            let geoms = [Some(linestring::ls0()), None, Some(linestring::ls1()), None];
337            let typ =
338                LineStringType::new(Dimension::XY, Default::default()).with_coord_type(coord_type);
339            let geo_arr = LineStringBuilder::from_nullable_line_strings(&geoms, typ).finish();
340
341            for (i, g) in geo_arr.iter().enumerate() {
342                assert_eq!(geoms[i], g.transpose().unwrap().map(|g| g.to_line_string()));
343            }
344
345            // Test sliced
346            for (i, g) in geo_arr.slice(2, 2).iter().enumerate() {
347                assert_eq!(
348                    geoms[i + 2],
349                    g.transpose().unwrap().map(|g| g.to_line_string())
350                );
351            }
352        }
353    }
354
355    #[test]
356    fn geo_round_trip2() {
357        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
358            let geo_arr = linestring::array(coord_type, Dimension::XY);
359            let geo_geoms = geo_arr
360                .iter()
361                .map(|x| x.transpose().unwrap().map(|g| g.to_line_string()))
362                .collect::<Vec<_>>();
363
364            let typ =
365                LineStringType::new(Dimension::XY, Default::default()).with_coord_type(coord_type);
366            let geo_arr2 = LineStringBuilder::from_nullable_line_strings(&geo_geoms, typ).finish();
367            assert_eq!(geo_arr, geo_arr2);
368        }
369    }
370
371    #[test]
372    fn try_from_arrow() {
373        for coord_type in [CoordType::Interleaved, CoordType::Separated] {
374            for dim in [
375                Dimension::XY,
376                Dimension::XYZ,
377                Dimension::XYM,
378                Dimension::XYZM,
379            ] {
380                let geo_arr = linestring::array(coord_type, dim);
381
382                let extension_type = geo_arr.extension_type().clone();
383                let field = extension_type.to_field("geometry", true);
384
385                let arrow_arr = geo_arr.to_array_ref();
386
387                let geo_arr2: LineStringArray =
388                    (arrow_arr.as_ref(), extension_type).try_into().unwrap();
389                let geo_arr3: LineStringArray = (arrow_arr.as_ref(), &field).try_into().unwrap();
390
391                assert_eq!(geo_arr, geo_arr2);
392                assert_eq!(geo_arr, geo_arr3);
393            }
394        }
395    }
396
397    #[test]
398    fn partial_eq() {
399        let arr1 = linestring::ls_array(CoordType::Interleaved);
400        let arr2 = linestring::ls_array(CoordType::Separated);
401        assert_eq!(arr1, arr1);
402        assert_eq!(arr2, arr2);
403        assert_eq!(arr1, arr2);
404
405        assert_ne!(arr1, arr2.slice(0, 2));
406    }
407
408    #[test]
409    fn test_validation_with_sliced_array() {
410        let arr = linestring::array(CoordType::Interleaved, Dimension::XY);
411        let sliced = arr.slice(0, 1);
412
413        let back = LineStringArray::try_from((
414            sliced.to_array_ref().as_ref(),
415            arr.extension_type().clone(),
416        ))
417        .unwrap();
418        assert_eq!(back.len(), 1);
419    }
420
421    #[test]
422    fn slice_then_go_through_arrow() {
423        let arr = linestring::array(CoordType::Separated, Dimension::XY);
424        let sliced_array = arr.slice(0, 1);
425
426        let ls_array: LineStringArray = (
427            sliced_array.to_array_ref().as_ref(),
428            arr.extension_type().clone(),
429        )
430            .try_into()
431            .unwrap();
432        assert_eq!(ls_array.len(), 1);
433    }
434
435    #[test]
436    fn slice_back_from_arrow_rs_record_batch() {
437        let arr = linestring::array(CoordType::Separated, Dimension::XY);
438        let field = arr.extension_type().to_field("geometry", true);
439        let schema = Schema::new(vec![field]);
440
441        let batch = RecordBatch::try_new(Arc::new(schema), vec![arr.to_array_ref()]).unwrap();
442        let sliced_batch = batch.slice(0, 1);
443
444        let array = sliced_batch.column(0);
445        let field = sliced_batch.schema_ref().field(0);
446        let ls_array: LineStringArray = (array.as_ref(), field).try_into().unwrap();
447        assert_eq!(ls_array.len(), 1);
448    }
449
450    #[test]
451    fn slice_back_from_arrow_rs_array() {
452        let arr = linestring::array(CoordType::Separated, Dimension::XY);
453        let field = arr.extension_type().to_field("geometry", true);
454
455        let array = arr.to_array_ref();
456        let sliced_array = array.slice(0, 1);
457
458        let ls_array: LineStringArray = (sliced_array.as_ref(), &field).try_into().unwrap();
459        assert_eq!(ls_array.len(), 1);
460    }
461
462    #[test]
463    fn slice_back_from_arrow_rs_array_with_nulls() {
464        let arr = linestring::ls_array(CoordType::Separated);
465        let field = arr.extension_type().to_field("geometry", true);
466
467        let array = arr.to_array_ref();
468        let sliced_array = array.slice(0, 1);
469
470        let ls_array: LineStringArray = (sliced_array.as_ref(), &field).try_into().unwrap();
471        assert_eq!(ls_array.len(), 1);
472    }
473}