geoarrow_array/builder/
geometry.rs

1use std::sync::Arc;
2
3use arrow_array::OffsetSizeTrait;
4use geo_traits::*;
5use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
6use geoarrow_schema::type_id::GeometryTypeId;
7use geoarrow_schema::{
8    Dimension, GeometryCollectionType, GeometryType, LineStringType, Metadata, MultiLineStringType,
9    MultiPointType, MultiPolygonType, PointType, PolygonType,
10};
11
12use crate::GeoArrowArray;
13use crate::array::{DimensionIndex, GenericWkbArray, GeometryArray};
14use crate::builder::geo_trait_wrappers::{LineWrapper, RectWrapper, TriangleWrapper};
15use crate::builder::{
16    GeometryCollectionBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPointBuilder,
17    MultiPolygonBuilder, PointBuilder, PolygonBuilder,
18};
19use crate::capacity::GeometryCapacity;
20use crate::trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder};
21
22pub(crate) const DEFAULT_PREFER_MULTI: bool = false;
23
24/// The GeoArrow equivalent to a `Vec<Option<Geometry>>`: a mutable collection of Geometries.
25///
26/// Each Geometry can have a different dimension. All geometries must have the same coordinate
27/// type.
28///
29/// This currently has the caveat that these geometries must be a _primitive_ geometry type. This
30/// does not currently support nested GeometryCollection objects.
31///
32/// Converting an [`GeometryBuilder`] into a [`GeometryArray`] is `O(1)`.
33///
34/// # Invariants
35///
36/// - All arrays must have the same coordinate layout (interleaved or separated)
37#[derive(Debug)]
38pub struct GeometryBuilder {
39    metadata: Arc<Metadata>,
40
41    // Invariant: every item in `types` is `> 0 && < fields.len()`
42    types: Vec<i8>,
43
44    /// An array of PointArray, ordered XY, XYZ, XYM, XYZM
45    points: [PointBuilder; 4],
46    line_strings: [LineStringBuilder; 4],
47    polygons: [PolygonBuilder; 4],
48    mpoints: [MultiPointBuilder; 4],
49    mline_strings: [MultiLineStringBuilder; 4],
50    mpolygons: [MultiPolygonBuilder; 4],
51    gcs: [GeometryCollectionBuilder; 4],
52
53    // Invariant: `offsets.len() == types.len()`
54    offsets: Vec<i32>,
55
56    /// Whether to prefer multi or single arrays for new geometries.
57    ///
58    /// E.g. if this is `true` and a Point geometry is added, it will be added to the
59    /// MultiPointBuilder. If this is `false`, the Point geometry will be added to the
60    /// PointBuilder.
61    ///
62    /// The idea is that always adding multi-geometries will make it easier to downcast later.
63    pub(crate) prefer_multi: bool,
64
65    /// The number of nulls that has been deferred and are still to be written.
66    ///
67    /// Adding nulls is tricky. We often want to use this builder as a generic builder for data
68    /// from unknown sources, which then gets downcasted to an array of a specific type.
69    ///
70    /// In a large majority of the time, this builder will have only data of a single type, which
71    /// can then get downcasted to a simple array of a single geometry type and dimension. But in
72    /// order for this process to be easy, we want the nulls to be assigned to the same array type
73    /// as the actual data.
74    ///
75    /// When there's a valid geometry pushed before the null, we can add the null to an existing
76    /// non-null array type, but if there are no valid geometries yet, we don't know which array to
77    /// push the null to. This `deferred_nulls` is the number of initial null values that haven't
78    /// yet been written to an array, because we don't know which array to write them to.
79    deferred_nulls: usize,
80}
81
82impl<'a> GeometryBuilder {
83    /// Creates a new empty [`GeometryBuilder`].
84    pub fn new(typ: GeometryType) -> Self {
85        Self::with_capacity(typ, Default::default())
86    }
87
88    /// Creates a new [`GeometryBuilder`] with given capacity and no validity.
89    pub fn with_capacity(typ: GeometryType, capacity: GeometryCapacity) -> Self {
90        let coord_type = typ.coord_type();
91
92        let points = core::array::from_fn(|i| {
93            let dim = Dimension::from_order(i).unwrap();
94            PointBuilder::with_capacity(
95                PointType::new(dim, Default::default()).with_coord_type(coord_type),
96                capacity.point(dim),
97            )
98        });
99        let line_strings = core::array::from_fn(|i| {
100            let dim = Dimension::from_order(i).unwrap();
101            LineStringBuilder::with_capacity(
102                LineStringType::new(dim, Default::default()).with_coord_type(coord_type),
103                capacity.line_string(dim),
104            )
105        });
106        let polygons = core::array::from_fn(|i| {
107            let dim = Dimension::from_order(i).unwrap();
108            PolygonBuilder::with_capacity(
109                PolygonType::new(dim, Default::default()).with_coord_type(coord_type),
110                capacity.polygon(dim),
111            )
112        });
113        let mpoints = core::array::from_fn(|i| {
114            let dim = Dimension::from_order(i).unwrap();
115            MultiPointBuilder::with_capacity(
116                MultiPointType::new(dim, Default::default()).with_coord_type(coord_type),
117                capacity.multi_point(dim),
118            )
119        });
120        let mline_strings = core::array::from_fn(|i| {
121            let dim = Dimension::from_order(i).unwrap();
122            MultiLineStringBuilder::with_capacity(
123                MultiLineStringType::new(dim, Default::default()).with_coord_type(coord_type),
124                capacity.multi_line_string(dim),
125            )
126        });
127        let mpolygons = core::array::from_fn(|i| {
128            let dim = Dimension::from_order(i).unwrap();
129            MultiPolygonBuilder::with_capacity(
130                MultiPolygonType::new(dim, Default::default()).with_coord_type(coord_type),
131                capacity.multi_polygon(dim),
132            )
133        });
134        let gcs = core::array::from_fn(|i| {
135            let dim = Dimension::from_order(i).unwrap();
136            GeometryCollectionBuilder::with_capacity(
137                GeometryCollectionType::new(dim, Default::default()).with_coord_type(coord_type),
138                capacity.geometry_collection(dim),
139            )
140        });
141
142        // Don't store array metadata on child arrays
143        Self {
144            metadata: typ.metadata().clone(),
145            types: vec![],
146            points,
147            line_strings,
148            polygons,
149            mpoints,
150            mline_strings,
151            mpolygons,
152            gcs,
153            offsets: vec![],
154            deferred_nulls: 0,
155            prefer_multi: DEFAULT_PREFER_MULTI,
156        }
157    }
158
159    /// Change whether to prefer multi or single arrays for new single-part geometries.
160    ///
161    /// If `true`, a new `Point` will be added to the `MultiPointBuilder` child array, a new
162    /// `LineString` will be added to the `MultiLineStringBuilder` child array, and a new `Polygon`
163    /// will be added to the `MultiPolygonBuilder` child array.
164    ///
165    /// This can be desired when the user wants to downcast the array to a single geometry array
166    /// later, as casting to a, say, `MultiPointArray` from a `GeometryArray` could be done
167    /// zero-copy.
168    ///
169    /// Note that only geometries added _after_ this method is called will be affected.
170    pub fn with_prefer_multi(self, prefer_multi: bool) -> Self {
171        Self {
172            prefer_multi,
173            gcs: self.gcs.map(|gc| gc.with_prefer_multi(prefer_multi)),
174            ..self
175        }
176    }
177
178    /// Reserves capacity for at least `additional` more geometries.
179    ///
180    /// The collection may reserve more space to speculatively avoid frequent reallocations. After
181    /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`.
182    /// Does nothing if capacity is already sufficient.
183    pub fn reserve(&mut self, capacity: GeometryCapacity) {
184        let total_num_geoms = capacity.total_num_geoms();
185        self.types.reserve(total_num_geoms);
186        self.offsets.reserve(total_num_geoms);
187
188        capacity.points.iter().enumerate().for_each(|(i, cap)| {
189            self.points[i].reserve(*cap);
190        });
191        capacity
192            .line_strings
193            .iter()
194            .enumerate()
195            .for_each(|(i, cap)| {
196                self.line_strings[i].reserve(*cap);
197            });
198        capacity.polygons.iter().enumerate().for_each(|(i, cap)| {
199            self.polygons[i].reserve(*cap);
200        });
201        capacity.mpoints.iter().enumerate().for_each(|(i, cap)| {
202            self.mpoints[i].reserve(*cap);
203        });
204        capacity
205            .mline_strings
206            .iter()
207            .enumerate()
208            .for_each(|(i, cap)| {
209                self.mline_strings[i].reserve(*cap);
210            });
211        capacity.mpolygons.iter().enumerate().for_each(|(i, cap)| {
212            self.mpolygons[i].reserve(*cap);
213        });
214        capacity.gcs.iter().enumerate().for_each(|(i, cap)| {
215            self.gcs[i].reserve(*cap);
216        });
217    }
218
219    /// Reserves the minimum capacity for at least `additional` more Geometries.
220    ///
221    /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid
222    /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal
223    /// to `self.len() + additional`. Does nothing if the capacity is already sufficient.
224    ///
225    /// Note that the allocator may give the collection more space than it
226    /// requests. Therefore, capacity can not be relied upon to be precisely
227    /// minimal. Prefer [`reserve`] if future insertions are expected.
228    ///
229    /// [`reserve`]: Self::reserve
230    pub fn reserve_exact(&mut self, capacity: GeometryCapacity) {
231        let total_num_geoms = capacity.total_num_geoms();
232
233        self.types.reserve_exact(total_num_geoms);
234        self.offsets.reserve_exact(total_num_geoms);
235
236        capacity.points.iter().enumerate().for_each(|(i, cap)| {
237            self.points[i].reserve_exact(*cap);
238        });
239        capacity
240            .line_strings
241            .iter()
242            .enumerate()
243            .for_each(|(i, cap)| {
244                self.line_strings[i].reserve_exact(*cap);
245            });
246        capacity.polygons.iter().enumerate().for_each(|(i, cap)| {
247            self.polygons[i].reserve_exact(*cap);
248        });
249        capacity.mpoints.iter().enumerate().for_each(|(i, cap)| {
250            self.mpoints[i].reserve_exact(*cap);
251        });
252        capacity
253            .mline_strings
254            .iter()
255            .enumerate()
256            .for_each(|(i, cap)| {
257                self.mline_strings[i].reserve_exact(*cap);
258            });
259        capacity.mpolygons.iter().enumerate().for_each(|(i, cap)| {
260            self.mpolygons[i].reserve_exact(*cap);
261        });
262        capacity.gcs.iter().enumerate().for_each(|(i, cap)| {
263            self.gcs[i].reserve_exact(*cap);
264        });
265    }
266
267    /// Shrinks the capacity of self to fit.
268    pub fn shrink_to_fit(&mut self) {
269        self.points.iter_mut().for_each(PointBuilder::shrink_to_fit);
270        self.line_strings
271            .iter_mut()
272            .for_each(LineStringBuilder::shrink_to_fit);
273        self.polygons
274            .iter_mut()
275            .for_each(PolygonBuilder::shrink_to_fit);
276        self.mpoints
277            .iter_mut()
278            .for_each(MultiPointBuilder::shrink_to_fit);
279        self.mline_strings
280            .iter_mut()
281            .for_each(MultiLineStringBuilder::shrink_to_fit);
282        self.mpolygons
283            .iter_mut()
284            .for_each(MultiPolygonBuilder::shrink_to_fit);
285        self.gcs
286            .iter_mut()
287            .for_each(GeometryCollectionBuilder::shrink_to_fit);
288
289        self.offsets.shrink_to_fit();
290        self.types.shrink_to_fit();
291    }
292
293    /// Consume the builder and convert to an immutable [`GeometryArray`]
294    pub fn finish(mut self) -> GeometryArray {
295        // If there are still deferred nulls to be written, then there aren't any valid geometries
296        // in this array, and just choose a child to write them to.
297        if self.deferred_nulls > 0 {
298            let dim = Dimension::XY;
299            let child = &mut self.points[dim.order()];
300            let type_id = child.geometry_type_id();
301            Self::flush_deferred_nulls(
302                &mut self.deferred_nulls,
303                child,
304                &mut self.offsets,
305                &mut self.types,
306                type_id,
307            );
308        }
309
310        GeometryArray::new(
311            self.types.into(),
312            self.offsets.into(),
313            self.points.map(|arr| arr.finish()),
314            self.line_strings.map(|arr| arr.finish()),
315            self.polygons.map(|arr| arr.finish()),
316            self.mpoints.map(|arr| arr.finish()),
317            self.mline_strings.map(|arr| arr.finish()),
318            self.mpolygons.map(|arr| arr.finish()),
319            self.gcs.map(|arr| arr.finish()),
320            self.metadata,
321        )
322    }
323
324    /// Add a new Point to the end of this array.
325    ///
326    /// If `self.prefer_multi` is `true`, it will be stored in the `MultiPointBuilder` child
327    /// array. Otherwise, it will be stored in the `PointBuilder` child array.
328    #[inline]
329    fn push_point(&mut self, value: Option<&impl PointTrait<T = f64>>) -> GeoArrowResult<()> {
330        if let Some(point) = value {
331            let dim: Dimension = point.dim().try_into().unwrap();
332            let array_idx = dim.order();
333
334            if self.prefer_multi {
335                let child = &mut self.mpoints[array_idx];
336                let type_id = child.geometry_type_id();
337
338                Self::flush_deferred_nulls(
339                    &mut self.deferred_nulls,
340                    child,
341                    &mut self.offsets,
342                    &mut self.types,
343                    type_id,
344                );
345                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
346                child.push_point(Some(point))?;
347            } else {
348                let child = &mut self.points[array_idx];
349                let type_id = child.geometry_type_id();
350
351                Self::flush_deferred_nulls(
352                    &mut self.deferred_nulls,
353                    child,
354                    &mut self.offsets,
355                    &mut self.types,
356                    type_id,
357                );
358                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
359                child.push_point(Some(point));
360            }
361        } else {
362            self.push_null();
363        };
364
365        Ok(())
366    }
367
368    #[inline]
369    fn add_type<B: GeoArrowArrayBuilder>(
370        child: &mut B,
371        offsets: &mut Vec<i32>,
372        types: &mut Vec<i8>,
373        type_id: i8,
374    ) {
375        offsets.push(child.len().try_into().unwrap());
376        types.push(type_id);
377    }
378
379    #[inline]
380    fn add_point_type(&mut self, dim: Dimension) {
381        let child = &self.points[dim.order()];
382        self.offsets.push(child.len().try_into().unwrap());
383        self.types.push(child.geometry_type_id());
384    }
385
386    /// Add a new LineString to the end of this array.
387    ///
388    /// If `self.prefer_multi` is `true`, it will be stored in the `MultiLineStringBuilder` child
389    /// array. Otherwise, it will be stored in the `LineStringBuilder` child array.
390    ///
391    /// # Errors
392    ///
393    /// This function errors iff the new last item is larger than what O supports.
394    #[inline]
395    fn push_line_string(
396        &mut self,
397        value: Option<&impl LineStringTrait<T = f64>>,
398    ) -> GeoArrowResult<()> {
399        if let Some(line_string) = value {
400            let dim: Dimension = line_string.dim().try_into().unwrap();
401            let array_idx = dim.order();
402
403            if self.prefer_multi {
404                let child = &mut self.mline_strings[array_idx];
405                let type_id = child.geometry_type_id();
406
407                Self::flush_deferred_nulls(
408                    &mut self.deferred_nulls,
409                    child,
410                    &mut self.offsets,
411                    &mut self.types,
412                    type_id,
413                );
414                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
415                child.push_line_string(Some(line_string))?;
416            } else {
417                let child = &mut self.line_strings[array_idx];
418                let type_id = child.geometry_type_id();
419
420                Self::flush_deferred_nulls(
421                    &mut self.deferred_nulls,
422                    child,
423                    &mut self.offsets,
424                    &mut self.types,
425                    type_id,
426                );
427                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
428                child.push_line_string(Some(line_string))?;
429            }
430        } else {
431            self.push_null();
432        };
433
434        Ok(())
435    }
436
437    #[inline]
438    fn add_line_string_type(&mut self, dim: Dimension) {
439        let child = &self.line_strings[dim.order()];
440        self.offsets.push(child.len().try_into().unwrap());
441        self.types.push(child.geometry_type_id());
442    }
443
444    /// Add a new Polygon to the end of this array.
445    ///
446    /// If `self.prefer_multi` is `true`, it will be stored in the `MultiPolygonBuilder` child
447    /// array. Otherwise, it will be stored in the `PolygonBuilder` child array.
448    ///
449    /// # Errors
450    ///
451    /// This function errors iff the new last item is larger than what O supports.
452    #[inline]
453    fn push_polygon(&mut self, value: Option<&impl PolygonTrait<T = f64>>) -> GeoArrowResult<()> {
454        if let Some(polygon) = value {
455            let dim: Dimension = polygon.dim().try_into().unwrap();
456            let array_idx = dim.order();
457
458            if self.prefer_multi {
459                let child = &mut self.mpolygons[array_idx];
460                let type_id = child.geometry_type_id();
461
462                Self::flush_deferred_nulls(
463                    &mut self.deferred_nulls,
464                    child,
465                    &mut self.offsets,
466                    &mut self.types,
467                    type_id,
468                );
469                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
470                child.push_polygon(Some(polygon))?;
471            } else {
472                let child = &mut self.polygons[array_idx];
473                let type_id = child.geometry_type_id();
474
475                Self::flush_deferred_nulls(
476                    &mut self.deferred_nulls,
477                    child,
478                    &mut self.offsets,
479                    &mut self.types,
480                    type_id,
481                );
482                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
483                child.push_polygon(Some(polygon))?;
484            }
485        } else {
486            self.push_null();
487        };
488
489        Ok(())
490    }
491
492    #[inline]
493    fn add_polygon_type(&mut self, dim: Dimension) {
494        let child = &self.polygons[dim.order()];
495        self.offsets.push(child.len().try_into().unwrap());
496        self.types.push(child.geometry_type_id());
497    }
498
499    /// Add a new MultiPoint to the end of this array.
500    ///
501    /// # Errors
502    ///
503    /// This function errors iff the new last item is larger than what O supports.
504    #[inline]
505    fn push_multi_point(
506        &mut self,
507        value: Option<&impl MultiPointTrait<T = f64>>,
508    ) -> GeoArrowResult<()> {
509        if let Some(multi_point) = value {
510            let dim: Dimension = multi_point.dim().try_into().unwrap();
511            let array_idx = dim.order();
512
513            let child = &mut self.mpoints[array_idx];
514            let type_id = child.geometry_type_id();
515
516            Self::flush_deferred_nulls(
517                &mut self.deferred_nulls,
518                child,
519                &mut self.offsets,
520                &mut self.types,
521                type_id,
522            );
523            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
524            child.push_multi_point(Some(multi_point))?;
525        } else {
526            self.push_null();
527        };
528
529        Ok(())
530    }
531
532    #[inline]
533    fn add_multi_point_type(&mut self, dim: Dimension) {
534        let child = &self.mpoints[dim.order()];
535        self.offsets.push(child.len().try_into().unwrap());
536        self.types.push(child.geometry_type_id());
537    }
538
539    /// Add a new MultiLineString to the end of this array.
540    ///
541    /// # Errors
542    ///
543    /// This function errors iff the new last item is larger than what O supports.
544    #[inline]
545    fn push_multi_line_string(
546        &mut self,
547        value: Option<&impl MultiLineStringTrait<T = f64>>,
548    ) -> GeoArrowResult<()> {
549        if let Some(multi_line_string) = value {
550            let dim: Dimension = multi_line_string.dim().try_into().unwrap();
551            let array_idx = dim.order();
552
553            let child = &mut self.mline_strings[array_idx];
554            let type_id = child.geometry_type_id();
555
556            Self::flush_deferred_nulls(
557                &mut self.deferred_nulls,
558                child,
559                &mut self.offsets,
560                &mut self.types,
561                type_id,
562            );
563            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
564            child.push_multi_line_string(Some(multi_line_string))?;
565        } else {
566            self.push_null();
567        };
568
569        Ok(())
570    }
571
572    #[inline]
573    fn add_multi_line_string_type(&mut self, dim: Dimension) {
574        let child = &self.mline_strings[dim.order()];
575        self.offsets.push(child.len().try_into().unwrap());
576        self.types.push(child.geometry_type_id());
577    }
578
579    /// Add a new MultiPolygon to the end of this array.
580    ///
581    /// # Errors
582    ///
583    /// This function errors iff the new last item is larger than what O supports.
584    #[inline]
585    fn push_multi_polygon(
586        &mut self,
587        value: Option<&impl MultiPolygonTrait<T = f64>>,
588    ) -> GeoArrowResult<()> {
589        if let Some(multi_polygon) = value {
590            let dim: Dimension = multi_polygon.dim().try_into().unwrap();
591            let array_idx = dim.order();
592
593            let child = &mut self.mpolygons[array_idx];
594            let type_id = child.geometry_type_id();
595
596            Self::flush_deferred_nulls(
597                &mut self.deferred_nulls,
598                child,
599                &mut self.offsets,
600                &mut self.types,
601                type_id,
602            );
603            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
604            child.push_multi_polygon(Some(multi_polygon))?;
605        } else {
606            self.push_null();
607        };
608
609        Ok(())
610    }
611
612    #[inline]
613    fn add_multi_polygon_type(&mut self, dim: Dimension) {
614        let child = &self.mpolygons[dim.order()];
615        self.offsets.push(child.len().try_into().unwrap());
616        self.types.push(child.geometry_type_id());
617    }
618
619    /// Add a new geometry to this builder
620    #[inline]
621    pub fn push_geometry(
622        &mut self,
623        value: Option<&'a impl GeometryTrait<T = f64>>,
624    ) -> GeoArrowResult<()> {
625        use geo_traits::GeometryType::*;
626
627        if let Some(geom) = value {
628            match geom.as_type() {
629                Point(g) => {
630                    self.push_point(Some(g))?;
631                }
632                LineString(g) => {
633                    self.push_line_string(Some(g))?;
634                }
635                Polygon(g) => {
636                    self.push_polygon(Some(g))?;
637                }
638                MultiPoint(p) => self.push_multi_point(Some(p))?,
639                MultiLineString(p) => self.push_multi_line_string(Some(p))?,
640                MultiPolygon(p) => self.push_multi_polygon(Some(p))?,
641                GeometryCollection(gc) => {
642                    if gc.num_geometries() == 1 {
643                        self.push_geometry(Some(&gc.geometry(0).unwrap()))?
644                    } else {
645                        self.push_geometry_collection(Some(gc))?
646                    }
647                }
648                Rect(r) => self.push_polygon(Some(&RectWrapper::try_new(r)?))?,
649                Triangle(tri) => self.push_polygon(Some(&TriangleWrapper(tri)))?,
650                Line(l) => self.push_line_string(Some(&LineWrapper(l)))?,
651            };
652        } else {
653            self.push_null();
654        }
655        Ok(())
656    }
657
658    /// Add a new GeometryCollection to the end of this array.
659    ///
660    /// # Errors
661    ///
662    /// This function errors iff the new last item is larger than what O supports.
663    #[inline]
664    fn push_geometry_collection(
665        &mut self,
666        value: Option<&impl GeometryCollectionTrait<T = f64>>,
667    ) -> GeoArrowResult<()> {
668        if let Some(gc) = value {
669            let dim: Dimension = gc.dim().try_into().unwrap();
670            let array_idx = dim.order();
671
672            let child = &mut self.gcs[array_idx];
673            let type_id = child.geometry_type_id();
674
675            Self::flush_deferred_nulls(
676                &mut self.deferred_nulls,
677                child,
678                &mut self.offsets,
679                &mut self.types,
680                type_id,
681            );
682            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
683            child.push_geometry_collection(Some(gc))?;
684        } else {
685            self.push_null();
686        };
687
688        Ok(())
689    }
690
691    #[inline]
692    fn add_geometry_collection_type(&mut self, dim: Dimension) {
693        let child = &self.gcs[dim.order()];
694        self.offsets.push(child.len().try_into().unwrap());
695        self.types.push(child.geometry_type_id());
696    }
697
698    /// Push a null to this builder.
699    ///
700    /// Adding null values to a union array is tricky, because you don't want to add a null to a
701    /// child that would otherwise be totally empty. Ideally, as few children as possible exist and
702    /// are non-empty.
703    ///
704    /// We handle that by pushing nulls to the first non-empty child we find. If no underlying
705    /// arrays are non-empty, we add to an internal counter instead. Once the first non-empty
706    /// geometry is pushed, then we flush all the "deferred nulls" to that child.
707    #[inline]
708    pub fn push_null(&mut self) {
709        // Iterate through each dimension, then iterate through each child type. If a child exists,
710        // push a null to it.
711        //
712        // Note that we must **also** call `add_*_type` so that the offsets are correct to point
713        // the union array to the child.
714        for dim in [
715            Dimension::XY,
716            Dimension::XYZ,
717            Dimension::XYM,
718            Dimension::XYZM,
719        ] {
720            let dim_idx = dim.order();
721            if !self.points[dim_idx].is_empty() {
722                self.add_point_type(dim);
723                self.points[dim_idx].push_null();
724                return;
725            }
726            if !self.line_strings[dim_idx].is_empty() {
727                self.add_line_string_type(dim);
728                self.line_strings[dim_idx].push_null();
729                return;
730            }
731            if !self.polygons[dim_idx].is_empty() {
732                self.add_polygon_type(dim);
733                self.polygons[dim_idx].push_null();
734                return;
735            }
736            if !self.mpoints[dim_idx].is_empty() {
737                self.add_multi_point_type(dim);
738                self.mpoints[dim_idx].push_null();
739                return;
740            }
741            if !self.mline_strings[dim_idx].is_empty() {
742                self.add_multi_line_string_type(dim);
743                self.mline_strings[dim_idx].push_null();
744                return;
745            }
746            if !self.mpolygons[dim_idx].is_empty() {
747                self.add_multi_polygon_type(dim);
748                self.mpolygons[dim_idx].push_null();
749                return;
750            }
751            if !self.gcs[dim_idx].is_empty() {
752                self.add_geometry_collection_type(dim);
753                self.gcs[dim_idx].push_null();
754                return;
755            }
756        }
757
758        self.deferred_nulls += 1;
759    }
760
761    /// Flush any deferred nulls to the desired array builder.
762    fn flush_deferred_nulls<B: GeoArrowArrayBuilder>(
763        deferred_nulls: &mut usize,
764        child: &mut B,
765        offsets: &mut Vec<i32>,
766        types: &mut Vec<i8>,
767        type_id: i8,
768    ) {
769        let offset = child.len().try_into().unwrap();
770        // For each null we also have to update the offsets and types
771        for _ in 0..*deferred_nulls {
772            offsets.push(offset);
773            types.push(type_id);
774            child.push_null();
775        }
776
777        *deferred_nulls = 0;
778    }
779
780    /// Extend this builder with the given geometries
781    pub fn extend_from_iter(
782        &mut self,
783        geoms: impl Iterator<Item = Option<&'a (impl GeometryTrait<T = f64> + 'a)>>,
784    ) {
785        geoms
786            .into_iter()
787            .try_for_each(|maybe_geom| self.push_geometry(maybe_geom))
788            .unwrap();
789    }
790
791    /// Create this builder from a slice of nullable Geometries.
792    pub fn from_nullable_geometries(
793        geoms: &[Option<impl GeometryTrait<T = f64>>],
794        typ: GeometryType,
795    ) -> GeoArrowResult<Self> {
796        let capacity = GeometryCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?;
797        let mut array = Self::with_capacity(typ, capacity);
798        array.extend_from_iter(geoms.iter().map(|x| x.as_ref()));
799        Ok(array)
800    }
801}
802
803impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, GeometryType)> for GeometryBuilder {
804    type Error = GeoArrowError;
805
806    fn try_from((value, typ): (GenericWkbArray<O>, GeometryType)) -> GeoArrowResult<Self> {
807        let wkb_objects = value
808            .iter()
809            .map(|x| x.transpose())
810            .collect::<GeoArrowResult<Vec<_>>>()?;
811        Self::from_nullable_geometries(&wkb_objects, typ)
812    }
813}
814
815impl GeoArrowArrayBuilder for GeometryBuilder {
816    fn len(&self) -> usize {
817        self.types.len()
818    }
819
820    fn push_null(&mut self) {
821        self.push_null();
822    }
823
824    fn push_geometry(
825        &mut self,
826        geometry: Option<&impl GeometryTrait<T = f64>>,
827    ) -> GeoArrowResult<()> {
828        self.push_geometry(geometry)
829    }
830
831    fn finish(self) -> Arc<dyn GeoArrowArray> {
832        Arc::new(self.finish())
833    }
834}
835
836#[cfg(test)]
837mod test {
838    use geoarrow_schema::CoordType;
839    use wkt::wkt;
840
841    use super::*;
842    use crate::GeoArrowArray;
843
844    #[test]
845    fn all_items_null() {
846        // Testing the behavior of deferred nulls when there are no valid geometries.
847        let typ = GeometryType::new(Default::default());
848        let mut builder = GeometryBuilder::new(typ);
849
850        builder.push_null();
851        builder.push_null();
852        builder.push_null();
853
854        let array = builder.finish();
855        assert_eq!(array.logical_null_count(), 3);
856
857        // We expect the nulls to be placed in (canonically) the first child
858        assert_eq!(array.points[0].logical_null_count(), 3);
859    }
860
861    #[test]
862    fn deferred_nulls() {
863        let coord_type = CoordType::Interleaved;
864        let typ = GeometryType::new(Default::default()).with_coord_type(coord_type);
865
866        let mut builder = GeometryBuilder::new(typ);
867        builder.push_null();
868        builder.push_null();
869
870        let linestring_arr = crate::test::linestring::array(coord_type, Dimension::XYZ);
871        let linestring_arr_null_count = linestring_arr.logical_null_count();
872
873        // Push the geometries from the linestring arr onto the geometry builder
874        for geom in linestring_arr.iter() {
875            builder
876                .push_geometry(geom.transpose().unwrap().as_ref())
877                .unwrap();
878        }
879
880        let geom_arr = builder.finish();
881
882        // Since there are 2 nulls pushed manually and a third from the LineString arr
883        let total_expected_null_count = 2 + linestring_arr_null_count;
884        assert_eq!(geom_arr.logical_null_count(), total_expected_null_count);
885
886        // All nulls should be in the XYZ linestring child
887        assert_eq!(
888            geom_arr.line_strings[Dimension::XYZ.order()].logical_null_count(),
889            total_expected_null_count
890        );
891    }
892
893    #[test]
894    fn later_nulls_after_deferred_nulls_pushed_directly() {
895        let coord_type = CoordType::Interleaved;
896        let typ = GeometryType::new(Default::default()).with_coord_type(coord_type);
897
898        let mut builder = GeometryBuilder::new(typ);
899        builder.push_null();
900        builder.push_null();
901
902        let point = wkt! { POINT Z (30. 10. 40.) };
903        builder.push_point(Some(&point)).unwrap();
904
905        let ls = wkt! { LINESTRING (30. 10., 10. 30., 40. 40.) };
906        builder.push_line_string(Some(&ls)).unwrap();
907
908        builder.push_null();
909        builder.push_null();
910
911        let geom_arr = builder.finish();
912
913        assert_eq!(geom_arr.logical_null_count(), 4);
914
915        // The first two nulls get added to the point z child because those are deferred and the
916        // point z is the first non-null geometry added.
917        assert_eq!(
918            geom_arr.points[Dimension::XYZ.order()].logical_null_count(),
919            2
920        );
921
922        // The last two nulls get added to the linestring XY child because the current
923        // implementation looks through all XY arrays then all XYZ then etc looking for the first
924        // non-empty array. Since the linestring XY child is non-empty, the last nulls get pushed
925        // here.
926        assert_eq!(
927            geom_arr.line_strings[Dimension::XY.order()].logical_null_count(),
928            2
929        );
930    }
931
932    // Test pushing nulls that are added after a valid geometry has been pushed.
933    #[test]
934    fn nulls_no_deferred() {
935        let coord_type = CoordType::Interleaved;
936        let typ = GeometryType::new(Default::default()).with_coord_type(coord_type);
937
938        let mut builder = GeometryBuilder::new(typ);
939        let point = wkt! { POINT Z (30. 10. 40.) };
940        builder.push_point(Some(&point)).unwrap();
941        builder.push_null();
942        builder.push_null();
943
944        let geom_arr = builder.finish();
945        assert_eq!(geom_arr.logical_null_count(), 2);
946        // All nulls should be in point XYZ child.
947        assert_eq!(
948            geom_arr.points[Dimension::XYZ.order()].logical_null_count(),
949            2
950        );
951    }
952}