1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
use arrow_array::OffsetSizeTrait;
use arrow_array::builder::GenericBinaryBuilder;
use geo_traits::GeometryTrait;
use geoarrow_schema::WkbType;
use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
use wkb::Endianness;
use wkb::reader::Wkb;
use wkb::writer::{WriteOptions, write_geometry};
use crate::array::GenericWkbArray;
use crate::capacity::WkbCapacity;
/// The GeoArrow equivalent to `Vec<Option<Wkb>>`: a mutable collection of Wkb buffers.
///
/// Converting a [`WkbBuilder`] into a [`GenericWkbArray`] is `O(1)`.
#[derive(Debug)]
pub struct WkbBuilder<O: OffsetSizeTrait>(GenericBinaryBuilder<O>, WkbType);
impl<O: OffsetSizeTrait> WkbBuilder<O> {
/// Creates a new empty [`WkbBuilder`].
pub fn new(typ: WkbType) -> Self {
Self::with_capacity(typ, Default::default())
}
/// Initializes a new [`WkbBuilder`] with a pre-allocated capacity of slots and values.
pub fn with_capacity(typ: WkbType, capacity: WkbCapacity) -> Self {
Self(
GenericBinaryBuilder::with_capacity(
capacity.offsets_capacity,
capacity.buffer_capacity,
),
typ,
)
}
// Upstream APIs don't exist for this yet. To implement this without upstream changes, we could
// change to using manual `Vec`'s ourselves
// pub fn reserve(&mut self, capacity: WkbCapacity) {
// }
/// Push a Geometry onto the end of this builder
#[inline]
pub fn push_geometry(
&mut self,
geom: Option<&impl GeometryTrait<T = f64>>,
) -> GeoArrowResult<()> {
if let Some(geom) = geom {
let wkb_options = WriteOptions {
endianness: Endianness::LittleEndian,
};
write_geometry(&mut self.0, geom, &wkb_options)
.map_err(|err| GeoArrowError::Wkb(err.to_string()))?;
self.0.append_value("")
} else {
self.0.append_null()
};
Ok(())
}
/// Extend this builder from an iterator of Geometries.
pub fn extend_from_iter<'a>(
&mut self,
geoms: impl Iterator<Item = Option<&'a (impl GeometryTrait<T = f64> + 'a)>>,
) -> GeoArrowResult<()> {
geoms
.into_iter()
.try_for_each(|maybe_geom| self.push_geometry(maybe_geom))?;
Ok(())
}
/// Create this builder from a slice of nullable Geometries.
pub fn from_nullable_geometries(
geoms: &[Option<impl GeometryTrait<T = f64>>],
typ: WkbType,
) -> GeoArrowResult<Self> {
let capacity = WkbCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()));
let mut array = Self::with_capacity(typ, capacity);
array.extend_from_iter(geoms.iter().map(|x| x.as_ref()))?;
Ok(array)
}
/// Push raw WKB bytes onto the end of this builder.
///
/// This method validates that the input bytes represent valid WKB before appending.
/// If the bytes are `None`, a null value is appended.
///
/// # Errors
///
/// Returns an error if the input bytes are not valid WKB format.
///
/// # Example
///
/// ```
/// use geoarrow_array::builder::WkbBuilder;
/// use geoarrow_array::GeoArrowArray;
/// use geoarrow_schema::WkbType;
///
/// let mut builder = WkbBuilder::<i32>::new(WkbType::default());
///
/// // Valid WKB for a Point(1.0, 2.0) in little-endian
/// let wkb_bytes = vec![
/// 0x01, // Little-endian
/// 0x01, 0x00, 0x00, 0x00, // Point type
/// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0
/// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0
/// ];
///
/// builder.push_wkb(Some(&wkb_bytes)).unwrap();
/// builder.push_wkb(None).unwrap(); // Append null
///
/// let array = builder.finish();
/// assert_eq!(array.len(), 2);
/// ```
#[inline]
pub fn push_wkb(&mut self, wkb: Option<&[u8]>) -> GeoArrowResult<()> {
if let Some(bytes) = wkb {
// Validate that the bytes are valid WKB
Wkb::try_new(bytes).map_err(|err| GeoArrowError::Wkb(err.to_string()))?;
self.0.append_value(bytes);
} else {
self.0.append_null();
}
Ok(())
}
/// Push raw WKB bytes onto the end of this builder without validation.
///
/// This method directly appends the input bytes to the underlying buffer without
/// validating that they represent valid WKB. If the bytes are `None`, a null value
/// is appended.
///
/// # Safety
///
/// This function is unsafe because it does not validate that the input bytes are
/// valid WKB format. Calling this with invalid WKB data may result in undefined
/// behavior when the resulting array is used with operations that assume valid WKB.
///
/// The caller must ensure that:
/// - The bytes represent valid WKB according to the OGC WKB specification
/// - The byte order (endianness) is correctly specified in the WKB header
/// - The geometry type and coordinates are properly encoded
///
/// # Example
///
/// ```
/// use geoarrow_array::builder::WkbBuilder;
/// use geoarrow_array::GeoArrowArray;
/// use geoarrow_schema::WkbType;
///
/// let mut builder = WkbBuilder::<i32>::new(WkbType::default());
///
/// // Valid WKB for a Point(1.0, 2.0) in little-endian
/// let wkb_bytes = vec![
/// 0x01, // Little-endian
/// 0x01, 0x00, 0x00, 0x00, // Point type
/// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0
/// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0
/// ];
///
/// unsafe {
/// builder.push_wkb_unchecked(Some(&wkb_bytes));
/// builder.push_wkb_unchecked(None); // Append null
/// }
///
/// let array = builder.finish();
/// assert_eq!(array.len(), 2);
/// ```
#[inline]
pub unsafe fn push_wkb_unchecked(&mut self, wkb: Option<&[u8]>) {
if let Some(bytes) = wkb {
self.0.append_value(bytes);
} else {
self.0.append_null();
}
}
/// Consume this builder and convert to a [GenericWkbArray].
///
/// This is `O(1)`.
pub fn finish(mut self) -> GenericWkbArray<O> {
GenericWkbArray::new(self.0.finish(), self.1.metadata().clone())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::trait_::GeoArrowArray;
/// Valid WKB for Point(1.0, 2.0) in little-endian format
fn point_wkb() -> Vec<u8> {
let point = geo::Point::new(1.0, 2.0);
let mut buf = Vec::new();
wkb::writer::write_point(&mut buf, &point, &Default::default()).unwrap();
buf
}
/// Valid WKB for Point(3.0, 4.0) in little-endian format
fn point_wkb_2() -> Vec<u8> {
let point = geo::Point::new(3.0, 4.0);
let mut buf = Vec::new();
wkb::writer::write_point(&mut buf, &point, &Default::default()).unwrap();
buf
}
/// Invalid WKB (too short)
fn invalid_wkb() -> Vec<u8> {
vec![0x01, 0x01]
}
#[test]
fn test_push_raw_valid() {
let mut builder = WkbBuilder::<i32>::new(WkbType::default());
let wkb = point_wkb();
// Should succeed with valid WKB
builder.push_wkb(Some(&wkb)).unwrap();
let array = builder.finish();
assert_eq!(array.len(), 1);
assert!(!array.is_null(0));
}
#[test]
fn test_push_raw_multiple() {
let mut builder = WkbBuilder::<i32>::new(WkbType::default());
let wkb1 = point_wkb();
let wkb2 = point_wkb_2();
builder.push_wkb(Some(&wkb1)).unwrap();
builder.push_wkb(Some(&wkb2)).unwrap();
let array = builder.finish();
assert_eq!(array.len(), 2);
assert!(!array.is_null(0));
assert!(!array.is_null(1));
}
#[test]
fn test_push_raw_null() {
let mut builder = WkbBuilder::<i32>::new(WkbType::default());
// Push null value
builder.push_wkb(None).unwrap();
let array = builder.finish();
assert_eq!(array.len(), 1);
assert!(array.is_null(0));
}
#[test]
fn test_push_raw_mixed_with_nulls() {
let mut builder = WkbBuilder::<i32>::new(WkbType::default());
let wkb = point_wkb();
builder.push_wkb(Some(&wkb)).unwrap();
builder.push_wkb(None).unwrap();
builder.push_wkb(Some(&wkb)).unwrap();
let array = builder.finish();
assert_eq!(array.len(), 3);
assert!(!array.is_null(0));
assert!(array.is_null(1));
assert!(!array.is_null(2));
}
#[test]
fn test_push_raw_invalid() {
let mut builder = WkbBuilder::<i32>::new(WkbType::default());
let invalid = invalid_wkb();
// Should fail with invalid WKB
let result = builder.push_wkb(Some(&invalid));
assert!(result.is_err());
}
#[test]
fn test_push_raw_unchecked_valid() {
let mut builder = WkbBuilder::<i32>::new(WkbType::default());
let wkb = point_wkb();
unsafe {
builder.push_wkb_unchecked(Some(&wkb));
}
let array = builder.finish();
assert_eq!(array.len(), 1);
assert!(!array.is_null(0));
}
#[test]
fn test_push_raw_unchecked_null() {
let mut builder = WkbBuilder::<i32>::new(WkbType::default());
unsafe {
builder.push_wkb_unchecked(None);
}
let array = builder.finish();
assert_eq!(array.len(), 1);
assert!(array.is_null(0));
}
#[test]
fn test_push_raw_unchecked_multiple() {
let mut builder = WkbBuilder::<i32>::new(WkbType::default());
let wkb1 = point_wkb();
let wkb2 = point_wkb_2();
unsafe {
builder.push_wkb_unchecked(Some(&wkb1));
builder.push_wkb_unchecked(None);
builder.push_wkb_unchecked(Some(&wkb2));
}
let array = builder.finish();
assert_eq!(array.len(), 3);
assert!(!array.is_null(0));
assert!(array.is_null(1));
assert!(!array.is_null(2));
}
#[test]
fn test_push_raw_with_i64_offset() {
let mut builder = WkbBuilder::<i64>::new(WkbType::default());
let wkb = point_wkb();
builder.push_wkb(Some(&wkb)).unwrap();
builder.push_wkb(None).unwrap();
let array = builder.finish();
assert_eq!(array.len(), 2);
assert!(!array.is_null(0));
assert!(array.is_null(1));
}
}