geoarrow_array/array/coord/
separated.rs

1use std::sync::Arc;
2
3use arrow_array::cast::AsArray;
4use arrow_array::types::Float64Type;
5use arrow_array::{ArrayRef, Float64Array, StructArray};
6use arrow_buffer::ScalarBuffer;
7use arrow_schema::{DataType, Field};
8use geo_traits::CoordTrait;
9use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
10use geoarrow_schema::{CoordType, Dimension, PointType};
11
12use crate::builder::SeparatedCoordBufferBuilder;
13use crate::scalar::SeparatedCoord;
14
15/// An array of coordinates stored in separate buffers of the same length.
16///
17/// This stores all coordinates in separated fashion as multiple underlying buffers: e.g. `xxx` and
18/// `yyy` for 2D coordinates.
19#[derive(Debug, Clone, PartialEq)]
20pub struct SeparatedCoordBuffer {
21    /// We always store a buffer for all 4 dimensions. The buffers for dimension 3 and 4 may be
22    /// empty.
23    pub(crate) buffers: [ScalarBuffer<f64>; 4],
24    pub(crate) dim: Dimension,
25}
26
27fn check(buffers: &[ScalarBuffer<f64>; 4], dim: Dimension) -> GeoArrowResult<()> {
28    let all_same_length = match dim {
29        Dimension::XY => buffers[0].len() == buffers[1].len(),
30        Dimension::XYZ | Dimension::XYM => {
31            buffers[0].len() == buffers[1].len() && buffers[1].len() == buffers[2].len()
32        }
33        Dimension::XYZM => {
34            buffers[0].len() == buffers[1].len()
35                && buffers[1].len() == buffers[2].len()
36                && buffers[2].len() == buffers[3].len()
37        }
38    };
39
40    if !all_same_length {
41        return Err(GeoArrowError::InvalidGeoArrow(
42            "all buffers must have the same length".to_string(),
43        ));
44    }
45
46    Ok(())
47}
48
49impl SeparatedCoordBuffer {
50    /// The underlying coordinate type
51    pub const COORD_TYPE: CoordType = CoordType::Separated;
52
53    /// Construct a new SeparatedCoordBuffer from an array of existing buffers.
54    ///
55    /// The number of _valid_ buffers in the array must match the dimension size. E.g. if the `dim`
56    /// is `Dimension::XY`, then only the first two buffers must have non-zero length, and the last
57    /// two buffers in the array can have length zero.
58    pub fn from_array(buffers: [ScalarBuffer<f64>; 4], dim: Dimension) -> GeoArrowResult<Self> {
59        check(&buffers, dim)?;
60        Ok(Self { buffers, dim })
61    }
62
63    /// Construct a new SeparatedCoordBuffer from a `Vec` of existing buffers.
64    ///
65    /// All buffers within `buffers` must have the same length, and the length of `buffers` must
66    /// equal the dimension size.
67    pub fn from_vec(buffers: Vec<ScalarBuffer<f64>>, dim: Dimension) -> GeoArrowResult<Self> {
68        if buffers.len() != dim.size() {
69            return Err(GeoArrowError::InvalidGeoArrow(
70                "Buffers must match dimension length ".into(),
71            ));
72        }
73
74        let mut buffers = buffers.into_iter().map(Some).collect::<Vec<_>>();
75
76        // Fill buffers with empty buffers past needed dimensions
77        let buffers = core::array::from_fn(|i| {
78            if i < buffers.len() {
79                buffers[i].take().unwrap()
80            } else {
81                Vec::new().into()
82            }
83        });
84
85        Self::from_array(buffers, dim)
86    }
87
88    /// Access the underlying coordinate buffers.
89    ///
90    /// Note that not all four buffers may be valid. Only so many buffers have defined meaning as
91    /// there are dimensions, so for an XY buffer, only the first two buffers have defined meaning,
92    /// and the last two may be any buffer, or empty.
93    pub fn raw_buffers(&self) -> &[ScalarBuffer<f64>; 4] {
94        &self.buffers
95    }
96
97    /// Access the underlying coordinate buffers.
98    ///
99    /// In comparison to raw_buffers, all of the returned buffers are valid.
100    pub fn buffers(&self) -> Vec<ScalarBuffer<f64>> {
101        match self.dim {
102            Dimension::XY => {
103                vec![self.buffers[0].clone(), self.buffers[1].clone()]
104            }
105            Dimension::XYZ | Dimension::XYM => {
106                vec![
107                    self.buffers[0].clone(),
108                    self.buffers[1].clone(),
109                    self.buffers[2].clone(),
110                ]
111            }
112            Dimension::XYZM => {
113                vec![
114                    self.buffers[0].clone(),
115                    self.buffers[1].clone(),
116                    self.buffers[2].clone(),
117                    self.buffers[3].clone(),
118                ]
119            }
120        }
121    }
122
123    /// The dimension of this coordinate buffer
124    pub fn dim(&self) -> Dimension {
125        self.dim
126    }
127
128    pub(crate) fn values_array(&self) -> Vec<ArrayRef> {
129        match self.dim {
130            Dimension::XY => {
131                vec![
132                    Arc::new(Float64Array::new(self.buffers[0].clone(), None)),
133                    Arc::new(Float64Array::new(self.buffers[1].clone(), None)),
134                ]
135            }
136            Dimension::XYZ | Dimension::XYM => {
137                vec![
138                    Arc::new(Float64Array::new(self.buffers[0].clone(), None)),
139                    Arc::new(Float64Array::new(self.buffers[1].clone(), None)),
140                    Arc::new(Float64Array::new(self.buffers[2].clone(), None)),
141                ]
142            }
143            Dimension::XYZM => {
144                vec![
145                    Arc::new(Float64Array::new(self.buffers[0].clone(), None)),
146                    Arc::new(Float64Array::new(self.buffers[1].clone(), None)),
147                    Arc::new(Float64Array::new(self.buffers[2].clone(), None)),
148                    Arc::new(Float64Array::new(self.buffers[3].clone(), None)),
149                ]
150            }
151        }
152    }
153
154    pub(crate) fn values_field(&self) -> Vec<Field> {
155        match self.dim {
156            Dimension::XY => {
157                vec![
158                    Field::new("x", DataType::Float64, false),
159                    Field::new("y", DataType::Float64, false),
160                ]
161            }
162            Dimension::XYZ => {
163                vec![
164                    Field::new("x", DataType::Float64, false),
165                    Field::new("y", DataType::Float64, false),
166                    Field::new("z", DataType::Float64, false),
167                ]
168            }
169            Dimension::XYM => {
170                vec![
171                    Field::new("x", DataType::Float64, false),
172                    Field::new("y", DataType::Float64, false),
173                    Field::new("m", DataType::Float64, false),
174                ]
175            }
176            Dimension::XYZM => {
177                vec![
178                    Field::new("x", DataType::Float64, false),
179                    Field::new("y", DataType::Float64, false),
180                    Field::new("z", DataType::Float64, false),
181                    Field::new("m", DataType::Float64, false),
182                ]
183            }
184        }
185    }
186
187    pub(crate) fn slice(&self, offset: usize, length: usize) -> Self {
188        assert!(
189            offset + length <= self.len(),
190            "offset + length may not exceed length of array"
191        );
192
193        // Initialize array with existing buffers, then overwrite them
194        let mut sliced_buffers = self.buffers.clone();
195        for (i, buffer) in self.buffers.iter().enumerate().take(self.dim.size()) {
196            sliced_buffers[i] = buffer.slice(offset, length);
197        }
198
199        Self {
200            buffers: sliced_buffers,
201            dim: self.dim,
202        }
203    }
204
205    pub(crate) fn storage_type(&self) -> DataType {
206        PointType::new(self.dim, Default::default())
207            .with_coord_type(Self::COORD_TYPE)
208            .data_type()
209    }
210
211    /// The number of coordinates
212    pub fn len(&self) -> usize {
213        self.buffers[0].len()
214    }
215
216    /// Whether the coordinate buffer is empty
217    pub fn is_empty(&self) -> bool {
218        self.len() == 0
219    }
220
221    pub(crate) fn value(&self, index: usize) -> SeparatedCoord<'_> {
222        assert!(index <= self.len());
223        self.value_unchecked(index)
224    }
225
226    pub(crate) fn value_unchecked(&self, index: usize) -> SeparatedCoord<'_> {
227        SeparatedCoord {
228            buffers: &self.buffers,
229            i: index,
230            dim: self.dim,
231        }
232    }
233
234    pub(crate) fn from_arrow(array: &StructArray, dim: Dimension) -> GeoArrowResult<Self> {
235        let buffers = array
236            .columns()
237            .iter()
238            .map(|c| c.as_primitive::<Float64Type>().values().clone())
239            .collect();
240        Self::from_vec(buffers, dim)
241    }
242
243    /// Construct from an iterator of coordinates
244    pub fn from_coords<'a>(
245        coords: impl ExactSizeIterator<Item = &'a (impl CoordTrait<T = f64> + 'a)>,
246        dim: Dimension,
247    ) -> GeoArrowResult<Self> {
248        Ok(SeparatedCoordBufferBuilder::from_coords(coords, dim)?.finish())
249    }
250}
251
252impl From<SeparatedCoordBuffer> for StructArray {
253    fn from(value: SeparatedCoordBuffer) -> Self {
254        StructArray::new(value.values_field().into(), value.values_array(), None)
255    }
256}
257
258#[cfg(test)]
259mod test {
260    use super::*;
261
262    #[test]
263    fn test_eq_slicing() {
264        let x1 = vec![0., 1., 2.];
265        let y1 = vec![3., 4., 5.];
266
267        let buf1 = SeparatedCoordBuffer::from_vec(vec![x1.into(), y1.into()], Dimension::XY)
268            .unwrap()
269            .slice(1, 1);
270        dbg!(&buf1.buffers[0]);
271        dbg!(&buf1.buffers[1]);
272
273        let x2 = vec![1.];
274        let y2 = vec![4.];
275        let buf2 =
276            SeparatedCoordBuffer::from_vec(vec![x2.into(), y2.into()], Dimension::XY).unwrap();
277
278        assert_eq!(buf1, buf2);
279    }
280}