1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
use std::sync::Arc;
use arrow_array::cast::AsArray;
use arrow_array::types::Float64Type;
use arrow_array::{ArrayRef, Float64Array, StructArray};
use arrow_buffer::ScalarBuffer;
use arrow_schema::{DataType, Field};
use geo_traits::CoordTrait;
use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
use geoarrow_schema::{CoordType, Dimension, PointType};
use crate::builder::SeparatedCoordBufferBuilder;
use crate::scalar::SeparatedCoord;
/// An array of coordinates stored in separate buffers of the same length.
///
/// This stores all coordinates in separated fashion as multiple underlying buffers: e.g. `xxx` and
/// `yyy` for 2D coordinates.
#[derive(Debug, Clone, PartialEq)]
pub struct SeparatedCoordBuffer {
/// We always store a buffer for all 4 dimensions. The buffers for dimension 3 and 4 may be
/// empty.
pub(crate) buffers: [ScalarBuffer<f64>; 4],
pub(crate) dim: Dimension,
}
fn check(buffers: &[ScalarBuffer<f64>; 4], dim: Dimension) -> GeoArrowResult<()> {
let all_same_length = match dim {
Dimension::XY => buffers[0].len() == buffers[1].len(),
Dimension::XYZ | Dimension::XYM => {
buffers[0].len() == buffers[1].len() && buffers[1].len() == buffers[2].len()
}
Dimension::XYZM => {
buffers[0].len() == buffers[1].len()
&& buffers[1].len() == buffers[2].len()
&& buffers[2].len() == buffers[3].len()
}
};
if !all_same_length {
return Err(GeoArrowError::InvalidGeoArrow(
"all buffers must have the same length".to_string(),
));
}
Ok(())
}
impl SeparatedCoordBuffer {
/// The underlying coordinate type
pub const COORD_TYPE: CoordType = CoordType::Separated;
/// Construct a new SeparatedCoordBuffer from an array of existing buffers.
///
/// The number of _valid_ buffers in the array must match the dimension size. E.g. if the `dim`
/// is `Dimension::XY`, then only the first two buffers must have non-zero length, and the last
/// two buffers in the array can have length zero.
pub fn from_array(buffers: [ScalarBuffer<f64>; 4], dim: Dimension) -> GeoArrowResult<Self> {
check(&buffers, dim)?;
Ok(Self { buffers, dim })
}
/// Construct a new SeparatedCoordBuffer from a `Vec` of existing buffers.
///
/// All buffers within `buffers` must have the same length, and the length of `buffers` must
/// equal the dimension size.
pub fn from_vec(buffers: Vec<ScalarBuffer<f64>>, dim: Dimension) -> GeoArrowResult<Self> {
if buffers.len() != dim.size() {
return Err(GeoArrowError::InvalidGeoArrow(
"Buffers must match dimension length ".into(),
));
}
let mut buffers = buffers.into_iter().map(Some).collect::<Vec<_>>();
// Fill buffers with empty buffers past needed dimensions
let buffers = core::array::from_fn(|i| {
if i < buffers.len() {
buffers[i].take().unwrap()
} else {
Vec::new().into()
}
});
Self::from_array(buffers, dim)
}
/// Access the underlying coordinate buffers.
///
/// Note that not all four buffers may be valid. Only so many buffers have defined meaning as
/// there are dimensions, so for an XY buffer, only the first two buffers have defined meaning,
/// and the last two may be any buffer, or empty.
pub fn raw_buffers(&self) -> &[ScalarBuffer<f64>; 4] {
&self.buffers
}
/// Access the underlying coordinate buffers.
///
/// In comparison to raw_buffers, all of the returned buffers are valid.
pub fn buffers(&self) -> Vec<ScalarBuffer<f64>> {
match self.dim {
Dimension::XY => {
vec![self.buffers[0].clone(), self.buffers[1].clone()]
}
Dimension::XYZ | Dimension::XYM => {
vec![
self.buffers[0].clone(),
self.buffers[1].clone(),
self.buffers[2].clone(),
]
}
Dimension::XYZM => {
vec![
self.buffers[0].clone(),
self.buffers[1].clone(),
self.buffers[2].clone(),
self.buffers[3].clone(),
]
}
}
}
/// The dimension of this coordinate buffer
pub fn dim(&self) -> Dimension {
self.dim
}
pub(crate) fn values_array(&self) -> Vec<ArrayRef> {
match self.dim {
Dimension::XY => {
vec![
Arc::new(Float64Array::new(self.buffers[0].clone(), None)),
Arc::new(Float64Array::new(self.buffers[1].clone(), None)),
]
}
Dimension::XYZ | Dimension::XYM => {
vec![
Arc::new(Float64Array::new(self.buffers[0].clone(), None)),
Arc::new(Float64Array::new(self.buffers[1].clone(), None)),
Arc::new(Float64Array::new(self.buffers[2].clone(), None)),
]
}
Dimension::XYZM => {
vec![
Arc::new(Float64Array::new(self.buffers[0].clone(), None)),
Arc::new(Float64Array::new(self.buffers[1].clone(), None)),
Arc::new(Float64Array::new(self.buffers[2].clone(), None)),
Arc::new(Float64Array::new(self.buffers[3].clone(), None)),
]
}
}
}
pub(crate) fn values_field(&self) -> Vec<Field> {
match self.dim {
Dimension::XY => {
vec![
Field::new("x", DataType::Float64, false),
Field::new("y", DataType::Float64, false),
]
}
Dimension::XYZ => {
vec![
Field::new("x", DataType::Float64, false),
Field::new("y", DataType::Float64, false),
Field::new("z", DataType::Float64, false),
]
}
Dimension::XYM => {
vec![
Field::new("x", DataType::Float64, false),
Field::new("y", DataType::Float64, false),
Field::new("m", DataType::Float64, false),
]
}
Dimension::XYZM => {
vec![
Field::new("x", DataType::Float64, false),
Field::new("y", DataType::Float64, false),
Field::new("z", DataType::Float64, false),
Field::new("m", DataType::Float64, false),
]
}
}
}
pub(crate) fn slice(&self, offset: usize, length: usize) -> Self {
assert!(
offset + length <= self.len(),
"offset + length may not exceed length of array"
);
// Initialize array with existing buffers, then overwrite them
let mut sliced_buffers = self.buffers.clone();
for (i, buffer) in self.buffers.iter().enumerate().take(self.dim.size()) {
sliced_buffers[i] = buffer.slice(offset, length);
}
Self {
buffers: sliced_buffers,
dim: self.dim,
}
}
pub(crate) fn storage_type(&self) -> DataType {
PointType::new(self.dim, Default::default())
.with_coord_type(Self::COORD_TYPE)
.data_type()
}
/// The number of coordinates
pub fn len(&self) -> usize {
self.buffers[0].len()
}
/// Whether the coordinate buffer is empty
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Returns the element at index `i`, not considering validity.
///
/// # Examples
///
/// ```
/// use geo_traits::CoordTrait;
/// use geoarrow_array::array::SeparatedCoordBuffer;
/// use geoarrow_schema::Dimension;
///
/// let coords = [
/// geo_types::coord! { x: 1.0, y: 2.0 },
/// geo_types::coord! { x: 3.0, y: 4.0 },
/// ];
/// let coord_buffer = SeparatedCoordBuffer::from_coords(coords.iter(), Dimension::XY).unwrap();
/// let coord = coord_buffer.value(0);
/// assert_eq!(coord.x(), 1.0);
/// assert_eq!(coord.y(), 2.0);
/// ```
///
/// # Panics
///
/// Panics if the value is outside the bounds of the buffer.
pub fn value(&self, index: usize) -> SeparatedCoord<'_> {
assert!(index <= self.len());
unsafe { self.value_unchecked(index) }
}
/// Returns the element at index `i`, not considering validity.
///
/// # Examples
///
/// ```
/// use geo_traits::CoordTrait;
/// use geoarrow_array::array::SeparatedCoordBuffer;
/// use geoarrow_schema::Dimension;
///
/// let coords = [
/// geo_types::coord! { x: 1.0, y: 2.0 },
/// geo_types::coord! { x: 3.0, y: 4.0 },
/// ];
/// let coord_buffer = SeparatedCoordBuffer::from_coords(coords.iter(), Dimension::XY).unwrap();
/// let coord = unsafe { coord_buffer.value_unchecked(0) };
/// assert_eq!(coord.x(), 1.0);
/// assert_eq!(coord.y(), 2.0);
/// ```
///
/// # Safety
///
/// Caller is responsible for ensuring that the index is within the bounds of the buffer.
pub unsafe fn value_unchecked(&self, index: usize) -> SeparatedCoord<'_> {
SeparatedCoord {
buffers: &self.buffers,
i: index,
dim: self.dim,
}
}
pub(crate) fn from_arrow(array: &StructArray, dim: Dimension) -> GeoArrowResult<Self> {
let buffers = array
.columns()
.iter()
.map(|c| c.as_primitive::<Float64Type>().values().clone())
.collect();
Self::from_vec(buffers, dim)
}
/// Construct from an iterator of coordinates
pub fn from_coords<'a>(
coords: impl ExactSizeIterator<Item = &'a (impl CoordTrait<T = f64> + 'a)>,
dim: Dimension,
) -> GeoArrowResult<Self> {
Ok(SeparatedCoordBufferBuilder::from_coords(coords, dim)?.finish())
}
}
impl From<SeparatedCoordBuffer> for StructArray {
fn from(value: SeparatedCoordBuffer) -> Self {
StructArray::new(value.values_field().into(), value.values_array(), None)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_eq_slicing() {
let x1 = vec![0., 1., 2.];
let y1 = vec![3., 4., 5.];
let buf1 = SeparatedCoordBuffer::from_vec(vec![x1.into(), y1.into()], Dimension::XY)
.unwrap()
.slice(1, 1);
dbg!(&buf1.buffers[0]);
dbg!(&buf1.buffers[1]);
let x2 = vec![1.];
let y2 = vec![4.];
let buf2 =
SeparatedCoordBuffer::from_vec(vec![x2.into(), y2.into()], Dimension::XY).unwrap();
assert_eq!(buf1, buf2);
}
}