geoarrow_array/builder/
geometry.rs

1use std::sync::Arc;
2
3use arrow_array::OffsetSizeTrait;
4use geo_traits::*;
5use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
6use geoarrow_schema::{
7    Dimension, GeometryCollectionType, GeometryType, LineStringType, Metadata, MultiLineStringType,
8    MultiPointType, MultiPolygonType, PointType, PolygonType,
9};
10
11use crate::GeoArrowArray;
12use crate::array::{DimensionIndex, GenericWkbArray, GeometryArray};
13use crate::builder::geo_trait_wrappers::{LineWrapper, RectWrapper, TriangleWrapper};
14use crate::builder::{
15    GeometryCollectionBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPointBuilder,
16    MultiPolygonBuilder, PointBuilder, PolygonBuilder,
17};
18use crate::capacity::GeometryCapacity;
19use crate::trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder};
20
21pub(crate) const DEFAULT_PREFER_MULTI: bool = false;
22
23/// The GeoArrow equivalent to a `Vec<Option<Geometry>>`: a mutable collection of Geometries.
24///
25/// Each Geometry can have a different dimension. All geometries must have the same coordinate
26/// type.
27///
28/// This currently has the caveat that these geometries must be a _primitive_ geometry type. This
29/// does not currently support nested GeometryCollection objects.
30///
31/// Converting an [`GeometryBuilder`] into a [`GeometryArray`] is `O(1)`.
32///
33/// # Invariants
34///
35/// - All arrays must have the same coordinate layout (interleaved or separated)
36#[derive(Debug)]
37pub struct GeometryBuilder {
38    metadata: Arc<Metadata>,
39
40    // Invariant: every item in `types` is `> 0 && < fields.len()`
41    types: Vec<i8>,
42
43    /// An array of PointArray, ordered XY, XYZ, XYM, XYZM
44    points: [PointBuilder; 4],
45    line_strings: [LineStringBuilder; 4],
46    polygons: [PolygonBuilder; 4],
47    mpoints: [MultiPointBuilder; 4],
48    mline_strings: [MultiLineStringBuilder; 4],
49    mpolygons: [MultiPolygonBuilder; 4],
50    gcs: [GeometryCollectionBuilder; 4],
51
52    // Invariant: `offsets.len() == types.len()`
53    offsets: Vec<i32>,
54
55    /// Whether to prefer multi or single arrays for new geometries.
56    ///
57    /// E.g. if this is `true` and a Point geometry is added, it will be added to the
58    /// MultiPointBuilder. If this is `false`, the Point geometry will be added to the
59    /// PointBuilder.
60    ///
61    /// The idea is that always adding multi-geometries will make it easier to downcast later.
62    pub(crate) prefer_multi: bool,
63
64    /// The number of nulls that has been deferred and are still to be written.
65    ///
66    /// Adding nulls is tricky. We often want to use this builder as a generic builder for data
67    /// from unknown sources, which then gets downcasted to an array of a specific type.
68    ///
69    /// In a large majority of the time, this builder will have only data of a single type, which
70    /// can then get downcasted to a simple array of a single geometry type and dimension. But in
71    /// order for this process to be easy, we want the nulls to be assigned to the same array type
72    /// as the actual data.
73    ///
74    /// When there's a valid geometry pushed before the null, we can add the null to an existing
75    /// non-null array type, but if there are no valid geometries yet, we don't know which array to
76    /// push the null to. This `deferred_nulls` is the number of initial null values that haven't
77    /// yet been written to an array, because we don't know which array to write them to.
78    deferred_nulls: usize,
79}
80
81impl<'a> GeometryBuilder {
82    /// Creates a new empty [`GeometryBuilder`].
83    pub fn new(typ: GeometryType) -> Self {
84        Self::with_capacity(typ, Default::default())
85    }
86
87    /// Creates a new [`GeometryBuilder`] with given capacity and no validity.
88    pub fn with_capacity(typ: GeometryType, capacity: GeometryCapacity) -> Self {
89        let coord_type = typ.coord_type();
90
91        let points = core::array::from_fn(|i| {
92            let dim = Dimension::from_order(i).unwrap();
93            PointBuilder::with_capacity(
94                PointType::new(dim, Default::default()).with_coord_type(coord_type),
95                capacity.point(dim),
96            )
97        });
98        let line_strings = core::array::from_fn(|i| {
99            let dim = Dimension::from_order(i).unwrap();
100            LineStringBuilder::with_capacity(
101                LineStringType::new(dim, Default::default()).with_coord_type(coord_type),
102                capacity.line_string(dim),
103            )
104        });
105        let polygons = core::array::from_fn(|i| {
106            let dim = Dimension::from_order(i).unwrap();
107            PolygonBuilder::with_capacity(
108                PolygonType::new(dim, Default::default()).with_coord_type(coord_type),
109                capacity.polygon(dim),
110            )
111        });
112        let mpoints = core::array::from_fn(|i| {
113            let dim = Dimension::from_order(i).unwrap();
114            MultiPointBuilder::with_capacity(
115                MultiPointType::new(dim, Default::default()).with_coord_type(coord_type),
116                capacity.multi_point(dim),
117            )
118        });
119        let mline_strings = core::array::from_fn(|i| {
120            let dim = Dimension::from_order(i).unwrap();
121            MultiLineStringBuilder::with_capacity(
122                MultiLineStringType::new(dim, Default::default()).with_coord_type(coord_type),
123                capacity.multi_line_string(dim),
124            )
125        });
126        let mpolygons = core::array::from_fn(|i| {
127            let dim = Dimension::from_order(i).unwrap();
128            MultiPolygonBuilder::with_capacity(
129                MultiPolygonType::new(dim, Default::default()).with_coord_type(coord_type),
130                capacity.multi_polygon(dim),
131            )
132        });
133        let gcs = core::array::from_fn(|i| {
134            let dim = Dimension::from_order(i).unwrap();
135            GeometryCollectionBuilder::with_capacity(
136                GeometryCollectionType::new(dim, Default::default()).with_coord_type(coord_type),
137                capacity.geometry_collection(dim),
138            )
139        });
140
141        // Don't store array metadata on child arrays
142        Self {
143            metadata: typ.metadata().clone(),
144            types: vec![],
145            points,
146            line_strings,
147            polygons,
148            mpoints,
149            mline_strings,
150            mpolygons,
151            gcs,
152            offsets: vec![],
153            deferred_nulls: 0,
154            prefer_multi: DEFAULT_PREFER_MULTI,
155        }
156    }
157
158    /// Change whether to prefer multi or single arrays for new single-part geometries.
159    ///
160    /// If `true`, a new `Point` will be added to the `MultiPointBuilder` child array, a new
161    /// `LineString` will be added to the `MultiLineStringBuilder` child array, and a new `Polygon`
162    /// will be added to the `MultiPolygonBuilder` child array.
163    ///
164    /// This can be desired when the user wants to downcast the array to a single geometry array
165    /// later, as casting to a, say, `MultiPointArray` from a `GeometryArray` could be done
166    /// zero-copy.
167    ///
168    /// Note that only geometries added _after_ this method is called will be affected.
169    pub fn with_prefer_multi(self, prefer_multi: bool) -> Self {
170        Self {
171            prefer_multi,
172            gcs: self.gcs.map(|gc| gc.with_prefer_multi(prefer_multi)),
173            ..self
174        }
175    }
176
177    /// Reserves capacity for at least `additional` more geometries.
178    ///
179    /// The collection may reserve more space to speculatively avoid frequent reallocations. After
180    /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`.
181    /// Does nothing if capacity is already sufficient.
182    pub fn reserve(&mut self, capacity: GeometryCapacity) {
183        let total_num_geoms = capacity.total_num_geoms();
184        self.types.reserve(total_num_geoms);
185        self.offsets.reserve(total_num_geoms);
186
187        capacity.points.iter().enumerate().for_each(|(i, cap)| {
188            self.points[i].reserve(*cap);
189        });
190        capacity
191            .line_strings
192            .iter()
193            .enumerate()
194            .for_each(|(i, cap)| {
195                self.line_strings[i].reserve(*cap);
196            });
197        capacity.polygons.iter().enumerate().for_each(|(i, cap)| {
198            self.polygons[i].reserve(*cap);
199        });
200        capacity.mpoints.iter().enumerate().for_each(|(i, cap)| {
201            self.mpoints[i].reserve(*cap);
202        });
203        capacity
204            .mline_strings
205            .iter()
206            .enumerate()
207            .for_each(|(i, cap)| {
208                self.mline_strings[i].reserve(*cap);
209            });
210        capacity.mpolygons.iter().enumerate().for_each(|(i, cap)| {
211            self.mpolygons[i].reserve(*cap);
212        });
213        capacity.gcs.iter().enumerate().for_each(|(i, cap)| {
214            self.gcs[i].reserve(*cap);
215        });
216    }
217
218    /// Reserves the minimum capacity for at least `additional` more Geometries.
219    ///
220    /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid
221    /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal
222    /// to `self.len() + additional`. Does nothing if the capacity is already sufficient.
223    ///
224    /// Note that the allocator may give the collection more space than it
225    /// requests. Therefore, capacity can not be relied upon to be precisely
226    /// minimal. Prefer [`reserve`] if future insertions are expected.
227    ///
228    /// [`reserve`]: Self::reserve
229    pub fn reserve_exact(&mut self, capacity: GeometryCapacity) {
230        let total_num_geoms = capacity.total_num_geoms();
231
232        self.types.reserve_exact(total_num_geoms);
233        self.offsets.reserve_exact(total_num_geoms);
234
235        capacity.points.iter().enumerate().for_each(|(i, cap)| {
236            self.points[i].reserve_exact(*cap);
237        });
238        capacity
239            .line_strings
240            .iter()
241            .enumerate()
242            .for_each(|(i, cap)| {
243                self.line_strings[i].reserve_exact(*cap);
244            });
245        capacity.polygons.iter().enumerate().for_each(|(i, cap)| {
246            self.polygons[i].reserve_exact(*cap);
247        });
248        capacity.mpoints.iter().enumerate().for_each(|(i, cap)| {
249            self.mpoints[i].reserve_exact(*cap);
250        });
251        capacity
252            .mline_strings
253            .iter()
254            .enumerate()
255            .for_each(|(i, cap)| {
256                self.mline_strings[i].reserve_exact(*cap);
257            });
258        capacity.mpolygons.iter().enumerate().for_each(|(i, cap)| {
259            self.mpolygons[i].reserve_exact(*cap);
260        });
261        capacity.gcs.iter().enumerate().for_each(|(i, cap)| {
262            self.gcs[i].reserve_exact(*cap);
263        });
264    }
265
266    /// Shrinks the capacity of self to fit.
267    pub fn shrink_to_fit(&mut self) {
268        self.points.iter_mut().for_each(PointBuilder::shrink_to_fit);
269        self.line_strings
270            .iter_mut()
271            .for_each(LineStringBuilder::shrink_to_fit);
272        self.polygons
273            .iter_mut()
274            .for_each(PolygonBuilder::shrink_to_fit);
275        self.mpoints
276            .iter_mut()
277            .for_each(MultiPointBuilder::shrink_to_fit);
278        self.mline_strings
279            .iter_mut()
280            .for_each(MultiLineStringBuilder::shrink_to_fit);
281        self.mpolygons
282            .iter_mut()
283            .for_each(MultiPolygonBuilder::shrink_to_fit);
284        self.gcs
285            .iter_mut()
286            .for_each(GeometryCollectionBuilder::shrink_to_fit);
287
288        self.offsets.shrink_to_fit();
289        self.types.shrink_to_fit();
290    }
291
292    /// Consume the builder and convert to an immutable [`GeometryArray`]
293    pub fn finish(mut self) -> GeometryArray {
294        // If there are still deferred nulls to be written, then there aren't any valid geometries
295        // in this array, and just choose a child to write them to.
296        if self.deferred_nulls > 0 {
297            let dim = Dimension::XY;
298            let child = &mut self.points[dim.order()];
299            let type_id = child.type_id(dim);
300            Self::flush_deferred_nulls(
301                &mut self.deferred_nulls,
302                child,
303                &mut self.offsets,
304                &mut self.types,
305                type_id,
306            );
307        }
308
309        GeometryArray::new(
310            self.types.into(),
311            self.offsets.into(),
312            self.points.map(|arr| arr.finish()),
313            self.line_strings.map(|arr| arr.finish()),
314            self.polygons.map(|arr| arr.finish()),
315            self.mpoints.map(|arr| arr.finish()),
316            self.mline_strings.map(|arr| arr.finish()),
317            self.mpolygons.map(|arr| arr.finish()),
318            self.gcs.map(|arr| arr.finish()),
319            self.metadata,
320        )
321    }
322
323    /// Add a new Point to the end of this array.
324    ///
325    /// If `self.prefer_multi` is `true`, it will be stored in the `MultiPointBuilder` child
326    /// array. Otherwise, it will be stored in the `PointBuilder` child array.
327    #[inline]
328    fn push_point(&mut self, value: Option<&impl PointTrait<T = f64>>) -> GeoArrowResult<()> {
329        if let Some(point) = value {
330            let dim: Dimension = point.dim().try_into().unwrap();
331            let array_idx = dim.order();
332
333            if self.prefer_multi {
334                let child = &mut self.mpoints[array_idx];
335                let type_id = child.type_id(dim);
336
337                Self::flush_deferred_nulls(
338                    &mut self.deferred_nulls,
339                    child,
340                    &mut self.offsets,
341                    &mut self.types,
342                    type_id,
343                );
344                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
345                child.push_point(Some(point))?;
346            } else {
347                let child = &mut self.points[array_idx];
348                let type_id = child.type_id(dim);
349
350                Self::flush_deferred_nulls(
351                    &mut self.deferred_nulls,
352                    child,
353                    &mut self.offsets,
354                    &mut self.types,
355                    type_id,
356                );
357                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
358                child.push_point(Some(point));
359            }
360        } else {
361            self.push_null();
362        };
363
364        Ok(())
365    }
366
367    #[inline]
368    fn add_type<B: GeoArrowArrayBuilder>(
369        child: &mut B,
370        offsets: &mut Vec<i32>,
371        types: &mut Vec<i8>,
372        type_id: i8,
373    ) {
374        offsets.push(child.len().try_into().unwrap());
375        types.push(type_id);
376    }
377
378    #[inline]
379    fn add_point_type(&mut self, dim: Dimension) {
380        let child = &self.points[dim.order()];
381        self.offsets.push(child.len().try_into().unwrap());
382        self.types.push(child.type_id(dim));
383    }
384
385    /// Add a new LineString to the end of this array.
386    ///
387    /// If `self.prefer_multi` is `true`, it will be stored in the `MultiLineStringBuilder` child
388    /// array. Otherwise, it will be stored in the `LineStringBuilder` child array.
389    ///
390    /// # Errors
391    ///
392    /// This function errors iff the new last item is larger than what O supports.
393    #[inline]
394    fn push_line_string(
395        &mut self,
396        value: Option<&impl LineStringTrait<T = f64>>,
397    ) -> GeoArrowResult<()> {
398        if let Some(line_string) = value {
399            let dim: Dimension = line_string.dim().try_into().unwrap();
400            let array_idx = dim.order();
401
402            if self.prefer_multi {
403                let child = &mut self.mline_strings[array_idx];
404                let type_id = child.type_id(dim);
405
406                Self::flush_deferred_nulls(
407                    &mut self.deferred_nulls,
408                    child,
409                    &mut self.offsets,
410                    &mut self.types,
411                    type_id,
412                );
413                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
414                child.push_line_string(Some(line_string))?;
415            } else {
416                let child = &mut self.line_strings[array_idx];
417                let type_id = child.type_id(dim);
418
419                Self::flush_deferred_nulls(
420                    &mut self.deferred_nulls,
421                    child,
422                    &mut self.offsets,
423                    &mut self.types,
424                    type_id,
425                );
426                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
427                child.push_line_string(Some(line_string))?;
428            }
429        } else {
430            self.push_null();
431        };
432
433        Ok(())
434    }
435
436    #[inline]
437    fn add_line_string_type(&mut self, dim: Dimension) {
438        let child = &self.line_strings[dim.order()];
439        self.offsets.push(child.len().try_into().unwrap());
440        self.types.push(child.type_id(dim));
441    }
442
443    /// Add a new Polygon to the end of this array.
444    ///
445    /// If `self.prefer_multi` is `true`, it will be stored in the `MultiPolygonBuilder` child
446    /// array. Otherwise, it will be stored in the `PolygonBuilder` child array.
447    ///
448    /// # Errors
449    ///
450    /// This function errors iff the new last item is larger than what O supports.
451    #[inline]
452    fn push_polygon(&mut self, value: Option<&impl PolygonTrait<T = f64>>) -> GeoArrowResult<()> {
453        if let Some(polygon) = value {
454            let dim: Dimension = polygon.dim().try_into().unwrap();
455            let array_idx = dim.order();
456
457            if self.prefer_multi {
458                let child = &mut self.mpolygons[array_idx];
459                let type_id = child.type_id(dim);
460
461                Self::flush_deferred_nulls(
462                    &mut self.deferred_nulls,
463                    child,
464                    &mut self.offsets,
465                    &mut self.types,
466                    type_id,
467                );
468                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
469                child.push_polygon(Some(polygon))?;
470            } else {
471                let child = &mut self.polygons[array_idx];
472                let type_id = child.type_id(dim);
473
474                Self::flush_deferred_nulls(
475                    &mut self.deferred_nulls,
476                    child,
477                    &mut self.offsets,
478                    &mut self.types,
479                    type_id,
480                );
481                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
482                child.push_polygon(Some(polygon))?;
483            }
484        } else {
485            self.push_null();
486        };
487
488        Ok(())
489    }
490
491    #[inline]
492    fn add_polygon_type(&mut self, dim: Dimension) {
493        let child = &self.polygons[dim.order()];
494        self.offsets.push(child.len().try_into().unwrap());
495        self.types.push(child.type_id(dim));
496    }
497
498    /// Add a new MultiPoint to the end of this array.
499    ///
500    /// # Errors
501    ///
502    /// This function errors iff the new last item is larger than what O supports.
503    #[inline]
504    fn push_multi_point(
505        &mut self,
506        value: Option<&impl MultiPointTrait<T = f64>>,
507    ) -> GeoArrowResult<()> {
508        if let Some(multi_point) = value {
509            let dim: Dimension = multi_point.dim().try_into().unwrap();
510            let array_idx = dim.order();
511
512            let child = &mut self.mpoints[array_idx];
513            let type_id = child.type_id(dim);
514
515            Self::flush_deferred_nulls(
516                &mut self.deferred_nulls,
517                child,
518                &mut self.offsets,
519                &mut self.types,
520                type_id,
521            );
522            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
523            child.push_multi_point(Some(multi_point))?;
524        } else {
525            self.push_null();
526        };
527
528        Ok(())
529    }
530
531    #[inline]
532    fn add_multi_point_type(&mut self, dim: Dimension) {
533        let child = &self.mpoints[dim.order()];
534        self.offsets.push(child.len().try_into().unwrap());
535        self.types.push(child.type_id(dim));
536    }
537
538    /// Add a new MultiLineString to the end of this array.
539    ///
540    /// # Errors
541    ///
542    /// This function errors iff the new last item is larger than what O supports.
543    #[inline]
544    fn push_multi_line_string(
545        &mut self,
546        value: Option<&impl MultiLineStringTrait<T = f64>>,
547    ) -> GeoArrowResult<()> {
548        if let Some(multi_line_string) = value {
549            let dim: Dimension = multi_line_string.dim().try_into().unwrap();
550            let array_idx = dim.order();
551
552            let child = &mut self.mline_strings[array_idx];
553            let type_id = child.type_id(dim);
554
555            Self::flush_deferred_nulls(
556                &mut self.deferred_nulls,
557                child,
558                &mut self.offsets,
559                &mut self.types,
560                type_id,
561            );
562            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
563            child.push_multi_line_string(Some(multi_line_string))?;
564        } else {
565            self.push_null();
566        };
567
568        Ok(())
569    }
570
571    #[inline]
572    fn add_multi_line_string_type(&mut self, dim: Dimension) {
573        let child = &self.mline_strings[dim.order()];
574        self.offsets.push(child.len().try_into().unwrap());
575        self.types.push(child.type_id(dim));
576    }
577
578    /// Add a new MultiPolygon to the end of this array.
579    ///
580    /// # Errors
581    ///
582    /// This function errors iff the new last item is larger than what O supports.
583    #[inline]
584    fn push_multi_polygon(
585        &mut self,
586        value: Option<&impl MultiPolygonTrait<T = f64>>,
587    ) -> GeoArrowResult<()> {
588        if let Some(multi_polygon) = value {
589            let dim: Dimension = multi_polygon.dim().try_into().unwrap();
590            let array_idx = dim.order();
591
592            let child = &mut self.mpolygons[array_idx];
593            let type_id = child.type_id(dim);
594
595            Self::flush_deferred_nulls(
596                &mut self.deferred_nulls,
597                child,
598                &mut self.offsets,
599                &mut self.types,
600                type_id,
601            );
602            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
603            child.push_multi_polygon(Some(multi_polygon))?;
604        } else {
605            self.push_null();
606        };
607
608        Ok(())
609    }
610
611    #[inline]
612    fn add_multi_polygon_type(&mut self, dim: Dimension) {
613        let child = &self.mpolygons[dim.order()];
614        self.offsets.push(child.len().try_into().unwrap());
615        self.types.push(child.type_id(dim));
616    }
617
618    /// Add a new geometry to this builder
619    #[inline]
620    pub fn push_geometry(
621        &mut self,
622        value: Option<&'a impl GeometryTrait<T = f64>>,
623    ) -> GeoArrowResult<()> {
624        use geo_traits::GeometryType::*;
625
626        if let Some(geom) = value {
627            match geom.as_type() {
628                Point(g) => {
629                    self.push_point(Some(g))?;
630                }
631                LineString(g) => {
632                    self.push_line_string(Some(g))?;
633                }
634                Polygon(g) => {
635                    self.push_polygon(Some(g))?;
636                }
637                MultiPoint(p) => self.push_multi_point(Some(p))?,
638                MultiLineString(p) => self.push_multi_line_string(Some(p))?,
639                MultiPolygon(p) => self.push_multi_polygon(Some(p))?,
640                GeometryCollection(gc) => {
641                    if gc.num_geometries() == 1 {
642                        self.push_geometry(Some(&gc.geometry(0).unwrap()))?
643                    } else {
644                        self.push_geometry_collection(Some(gc))?
645                    }
646                }
647                Rect(r) => self.push_polygon(Some(&RectWrapper::try_new(r)?))?,
648                Triangle(tri) => self.push_polygon(Some(&TriangleWrapper(tri)))?,
649                Line(l) => self.push_line_string(Some(&LineWrapper(l)))?,
650            };
651        } else {
652            self.push_null();
653        }
654        Ok(())
655    }
656
657    /// Add a new GeometryCollection to the end of this array.
658    ///
659    /// # Errors
660    ///
661    /// This function errors iff the new last item is larger than what O supports.
662    #[inline]
663    fn push_geometry_collection(
664        &mut self,
665        value: Option<&impl GeometryCollectionTrait<T = f64>>,
666    ) -> GeoArrowResult<()> {
667        if let Some(gc) = value {
668            let dim: Dimension = gc.dim().try_into().unwrap();
669            let array_idx = dim.order();
670
671            let child = &mut self.gcs[array_idx];
672            let type_id = child.type_id(dim);
673
674            Self::flush_deferred_nulls(
675                &mut self.deferred_nulls,
676                child,
677                &mut self.offsets,
678                &mut self.types,
679                type_id,
680            );
681            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
682            child.push_geometry_collection(Some(gc))?;
683        } else {
684            self.push_null();
685        };
686
687        Ok(())
688    }
689
690    #[inline]
691    fn add_geometry_collection_type(&mut self, dim: Dimension) {
692        let child = &self.gcs[dim.order()];
693        self.offsets.push(child.len().try_into().unwrap());
694        self.types.push(child.type_id(dim));
695    }
696
697    /// Push a null to this builder.
698    ///
699    /// Adding null values to a union array is tricky, because you don't want to add a null to a
700    /// child that would otherwise be totally empty. Ideally, as few children as possible exist and
701    /// are non-empty.
702    ///
703    /// We handle that by pushing nulls to the first non-empty child we find. If no underlying
704    /// arrays are non-empty, we add to an internal counter instead. Once the first non-empty
705    /// geometry is pushed, then we flush all the "deferred nulls" to that child.
706    #[inline]
707    pub fn push_null(&mut self) {
708        // Iterate through each dimension, then iterate through each child type. If a child exists,
709        // push a null to it.
710        //
711        // Note that we must **also** call `add_*_type` so that the offsets are correct to point
712        // the union array to the child.
713        for dim in [
714            Dimension::XY,
715            Dimension::XYZ,
716            Dimension::XYM,
717            Dimension::XYZM,
718        ] {
719            let dim_idx = dim.order();
720            if !self.points[dim_idx].is_empty() {
721                self.add_point_type(dim);
722                self.points[dim_idx].push_null();
723                return;
724            }
725            if !self.line_strings[dim_idx].is_empty() {
726                self.add_line_string_type(dim);
727                self.line_strings[dim_idx].push_null();
728                return;
729            }
730            if !self.polygons[dim_idx].is_empty() {
731                self.add_polygon_type(dim);
732                self.polygons[dim_idx].push_null();
733                return;
734            }
735            if !self.mpoints[dim_idx].is_empty() {
736                self.add_multi_point_type(dim);
737                self.mpoints[dim_idx].push_null();
738                return;
739            }
740            if !self.mline_strings[dim_idx].is_empty() {
741                self.add_multi_line_string_type(dim);
742                self.mline_strings[dim_idx].push_null();
743                return;
744            }
745            if !self.mpolygons[dim_idx].is_empty() {
746                self.add_multi_polygon_type(dim);
747                self.mpolygons[dim_idx].push_null();
748                return;
749            }
750            if !self.gcs[dim_idx].is_empty() {
751                self.add_geometry_collection_type(dim);
752                self.gcs[dim_idx].push_null();
753                return;
754            }
755        }
756
757        self.deferred_nulls += 1;
758    }
759
760    /// Flush any deferred nulls to the desired array builder.
761    fn flush_deferred_nulls<B: GeoArrowArrayBuilder>(
762        deferred_nulls: &mut usize,
763        child: &mut B,
764        offsets: &mut Vec<i32>,
765        types: &mut Vec<i8>,
766        type_id: i8,
767    ) {
768        let offset = child.len().try_into().unwrap();
769        // For each null we also have to update the offsets and types
770        for _ in 0..*deferred_nulls {
771            offsets.push(offset);
772            types.push(type_id);
773            child.push_null();
774        }
775
776        *deferred_nulls = 0;
777    }
778
779    /// Extend this builder with the given geometries
780    pub fn extend_from_iter(
781        &mut self,
782        geoms: impl Iterator<Item = Option<&'a (impl GeometryTrait<T = f64> + 'a)>>,
783    ) {
784        geoms
785            .into_iter()
786            .try_for_each(|maybe_geom| self.push_geometry(maybe_geom))
787            .unwrap();
788    }
789
790    /// Create this builder from a slice of nullable Geometries.
791    pub fn from_nullable_geometries(
792        geoms: &[Option<impl GeometryTrait<T = f64>>],
793        typ: GeometryType,
794    ) -> GeoArrowResult<Self> {
795        let capacity = GeometryCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?;
796        let mut array = Self::with_capacity(typ, capacity);
797        array.extend_from_iter(geoms.iter().map(|x| x.as_ref()));
798        Ok(array)
799    }
800}
801
802impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, GeometryType)> for GeometryBuilder {
803    type Error = GeoArrowError;
804
805    fn try_from((value, typ): (GenericWkbArray<O>, GeometryType)) -> GeoArrowResult<Self> {
806        let wkb_objects = value
807            .iter()
808            .map(|x| x.transpose())
809            .collect::<GeoArrowResult<Vec<_>>>()?;
810        Self::from_nullable_geometries(&wkb_objects, typ)
811    }
812}
813
814impl GeoArrowArrayBuilder for GeometryBuilder {
815    fn len(&self) -> usize {
816        self.types.len()
817    }
818
819    fn push_null(&mut self) {
820        self.push_null();
821    }
822
823    fn push_geometry(
824        &mut self,
825        geometry: Option<&impl GeometryTrait<T = f64>>,
826    ) -> GeoArrowResult<()> {
827        self.push_geometry(geometry)
828    }
829
830    fn finish(self) -> Arc<dyn GeoArrowArray> {
831        Arc::new(self.finish())
832    }
833}
834
835/// Access the type id for an array-dimension combo
836pub(crate) trait TypeId {
837    const ARRAY_TYPE_OFFSET: i8;
838
839    fn type_id(&self, dim: Dimension) -> i8 {
840        (dim.order() as i8 * 10) + Self::ARRAY_TYPE_OFFSET
841    }
842}
843
844impl TypeId for PointBuilder {
845    const ARRAY_TYPE_OFFSET: i8 = 1;
846}
847
848impl TypeId for LineStringBuilder {
849    const ARRAY_TYPE_OFFSET: i8 = 2;
850}
851
852impl TypeId for PolygonBuilder {
853    const ARRAY_TYPE_OFFSET: i8 = 3;
854}
855impl TypeId for MultiPointBuilder {
856    const ARRAY_TYPE_OFFSET: i8 = 4;
857}
858impl TypeId for MultiLineStringBuilder {
859    const ARRAY_TYPE_OFFSET: i8 = 5;
860}
861impl TypeId for MultiPolygonBuilder {
862    const ARRAY_TYPE_OFFSET: i8 = 6;
863}
864impl TypeId for GeometryCollectionBuilder {
865    const ARRAY_TYPE_OFFSET: i8 = 7;
866}
867
868#[cfg(test)]
869mod test {
870    use geoarrow_schema::CoordType;
871    use wkt::wkt;
872
873    use super::*;
874    use crate::GeoArrowArray;
875
876    #[test]
877    fn all_items_null() {
878        // Testing the behavior of deferred nulls when there are no valid geometries.
879        let typ = GeometryType::new(Default::default());
880        let mut builder = GeometryBuilder::new(typ);
881
882        builder.push_null();
883        builder.push_null();
884        builder.push_null();
885
886        let array = builder.finish();
887        assert_eq!(array.logical_null_count(), 3);
888
889        // We expect the nulls to be placed in (canonically) the first child
890        assert_eq!(array.points[0].logical_null_count(), 3);
891    }
892
893    #[test]
894    fn deferred_nulls() {
895        let coord_type = CoordType::Interleaved;
896        let typ = GeometryType::new(Default::default()).with_coord_type(coord_type);
897
898        let mut builder = GeometryBuilder::new(typ);
899        builder.push_null();
900        builder.push_null();
901
902        let linestring_arr = crate::test::linestring::array(coord_type, Dimension::XYZ);
903        let linestring_arr_null_count = linestring_arr.logical_null_count();
904
905        // Push the geometries from the linestring arr onto the geometry builder
906        for geom in linestring_arr.iter() {
907            builder
908                .push_geometry(geom.transpose().unwrap().as_ref())
909                .unwrap();
910        }
911
912        let geom_arr = builder.finish();
913
914        // Since there are 2 nulls pushed manually and a third from the LineString arr
915        let total_expected_null_count = 2 + linestring_arr_null_count;
916        assert_eq!(geom_arr.logical_null_count(), total_expected_null_count);
917
918        // All nulls should be in the XYZ linestring child
919        assert_eq!(
920            geom_arr.line_strings[Dimension::XYZ.order()].logical_null_count(),
921            total_expected_null_count
922        );
923    }
924
925    #[test]
926    fn later_nulls_after_deferred_nulls_pushed_directly() {
927        let coord_type = CoordType::Interleaved;
928        let typ = GeometryType::new(Default::default()).with_coord_type(coord_type);
929
930        let mut builder = GeometryBuilder::new(typ);
931        builder.push_null();
932        builder.push_null();
933
934        let point = wkt! { POINT Z (30. 10. 40.) };
935        builder.push_point(Some(&point)).unwrap();
936
937        let ls = wkt! { LINESTRING (30. 10., 10. 30., 40. 40.) };
938        builder.push_line_string(Some(&ls)).unwrap();
939
940        builder.push_null();
941        builder.push_null();
942
943        let geom_arr = builder.finish();
944
945        assert_eq!(geom_arr.logical_null_count(), 4);
946
947        // The first two nulls get added to the point z child because those are deferred and the
948        // point z is the first non-null geometry added.
949        assert_eq!(
950            geom_arr.points[Dimension::XYZ.order()].logical_null_count(),
951            2
952        );
953
954        // The last two nulls get added to the linestring XY child because the current
955        // implementation looks through all XY arrays then all XYZ then etc looking for the first
956        // non-empty array. Since the linestring XY child is non-empty, the last nulls get pushed
957        // here.
958        assert_eq!(
959            geom_arr.line_strings[Dimension::XY.order()].logical_null_count(),
960            2
961        );
962    }
963
964    // Test pushing nulls that are added after a valid geometry has been pushed.
965    #[test]
966    fn nulls_no_deferred() {
967        let coord_type = CoordType::Interleaved;
968        let typ = GeometryType::new(Default::default()).with_coord_type(coord_type);
969
970        let mut builder = GeometryBuilder::new(typ);
971        let point = wkt! { POINT Z (30. 10. 40.) };
972        builder.push_point(Some(&point)).unwrap();
973        builder.push_null();
974        builder.push_null();
975
976        let geom_arr = builder.finish();
977        assert_eq!(geom_arr.logical_null_count(), 2);
978        // All nulls should be in point XYZ child.
979        assert_eq!(
980            geom_arr.points[Dimension::XYZ.order()].logical_null_count(),
981            2
982        );
983    }
984}