geoarrow_array/builder/
geometry.rs

1use std::sync::Arc;
2
3use arrow_array::OffsetSizeTrait;
4use geo_traits::*;
5use geoarrow_schema::error::{GeoArrowError, GeoArrowResult};
6use geoarrow_schema::{
7    Dimension, GeometryCollectionType, GeometryType, LineStringType, Metadata, MultiLineStringType,
8    MultiPointType, MultiPolygonType, PointType, PolygonType,
9};
10
11use crate::GeoArrowArray;
12use crate::array::{DimensionIndex, GenericWkbArray, GeometryArray};
13use crate::builder::geo_trait_wrappers::{LineWrapper, RectWrapper, TriangleWrapper};
14use crate::builder::{
15    GeometryCollectionBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPointBuilder,
16    MultiPolygonBuilder, PointBuilder, PolygonBuilder,
17};
18use crate::capacity::GeometryCapacity;
19use crate::trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder};
20
21pub(crate) const DEFAULT_PREFER_MULTI: bool = false;
22
23/// The GeoArrow equivalent to a `Vec<Option<Geometry>>`: a mutable collection of Geometries.
24///
25/// Each Geometry can have a different dimension. All geometries must have the same coordinate
26/// type.
27///
28/// This currently has the caveat that these geometries must be a _primitive_ geometry type. This
29/// does not currently support nested GeometryCollection objects.
30///
31/// Converting an [`GeometryBuilder`] into a [`GeometryArray`] is `O(1)`.
32///
33/// # Invariants
34///
35/// - All arrays must have the same coordinate layout (interleaved or separated)
36#[derive(Debug)]
37pub struct GeometryBuilder {
38    metadata: Arc<Metadata>,
39
40    // Invariant: every item in `types` is `> 0 && < fields.len()`
41    types: Vec<i8>,
42
43    /// An array of PointArray, ordered XY, XYZ, XYM, XYZM
44    points: [PointBuilder; 4],
45    line_strings: [LineStringBuilder; 4],
46    polygons: [PolygonBuilder; 4],
47    mpoints: [MultiPointBuilder; 4],
48    mline_strings: [MultiLineStringBuilder; 4],
49    mpolygons: [MultiPolygonBuilder; 4],
50    gcs: [GeometryCollectionBuilder; 4],
51
52    // Invariant: `offsets.len() == types.len()`
53    offsets: Vec<i32>,
54
55    /// Whether to prefer multi or single arrays for new geometries.
56    ///
57    /// E.g. if this is `true` and a Point geometry is added, it will be added to the
58    /// MultiPointBuilder. If this is `false`, the Point geometry will be added to the
59    /// PointBuilder.
60    ///
61    /// The idea is that always adding multi-geometries will make it easier to downcast later.
62    pub(crate) prefer_multi: bool,
63
64    /// The number of nulls that has been deferred and are still to be written.
65    ///
66    /// Adding nulls is tricky. We often want to use this builder as a generic builder for data
67    /// from unknown sources, which then gets downcasted to an array of a specific type.
68    ///
69    /// In a large majority of the time, this builder will have only data of a single type, which
70    /// can then get downcasted to a simple array of a single geometry type and dimension. But in
71    /// order for this process to be easy, we want the nulls to be assigned to the same array type
72    /// as the actual data.
73    ///
74    /// When there's a valid geometry pushed before the null, we can add the null to an existing
75    /// non-null array type, but if there are no valid geometries yet, we don't know which array to
76    /// push the null to. This `deferred_nulls` is the number of initial null values that haven't
77    /// yet been written to an array, because we don't know which array to write them to.
78    deferred_nulls: usize,
79}
80
81impl<'a> GeometryBuilder {
82    /// Creates a new empty [`GeometryBuilder`].
83    pub fn new(typ: GeometryType) -> Self {
84        Self::with_capacity(typ, Default::default())
85    }
86
87    /// Creates a new [`GeometryBuilder`] with given capacity and no validity.
88    pub fn with_capacity(typ: GeometryType, capacity: GeometryCapacity) -> Self {
89        let coord_type = typ.coord_type();
90
91        let points = core::array::from_fn(|i| {
92            let dim = Dimension::from_order(i).unwrap();
93            PointBuilder::with_capacity(
94                PointType::new(dim, Default::default()).with_coord_type(coord_type),
95                capacity.point(dim),
96            )
97        });
98        let line_strings = core::array::from_fn(|i| {
99            let dim = Dimension::from_order(i).unwrap();
100            LineStringBuilder::with_capacity(
101                LineStringType::new(dim, Default::default()).with_coord_type(coord_type),
102                capacity.line_string(dim),
103            )
104        });
105        let polygons = core::array::from_fn(|i| {
106            let dim = Dimension::from_order(i).unwrap();
107            PolygonBuilder::with_capacity(
108                PolygonType::new(dim, Default::default()).with_coord_type(coord_type),
109                capacity.polygon(dim),
110            )
111        });
112        let mpoints = core::array::from_fn(|i| {
113            let dim = Dimension::from_order(i).unwrap();
114            MultiPointBuilder::with_capacity(
115                MultiPointType::new(dim, Default::default()).with_coord_type(coord_type),
116                capacity.multi_point(dim),
117            )
118        });
119        let mline_strings = core::array::from_fn(|i| {
120            let dim = Dimension::from_order(i).unwrap();
121            MultiLineStringBuilder::with_capacity(
122                MultiLineStringType::new(dim, Default::default()).with_coord_type(coord_type),
123                capacity.multi_line_string(dim),
124            )
125        });
126        let mpolygons = core::array::from_fn(|i| {
127            let dim = Dimension::from_order(i).unwrap();
128            MultiPolygonBuilder::with_capacity(
129                MultiPolygonType::new(dim, Default::default()).with_coord_type(coord_type),
130                capacity.multi_polygon(dim),
131            )
132        });
133        let gcs = core::array::from_fn(|i| {
134            let dim = Dimension::from_order(i).unwrap();
135            GeometryCollectionBuilder::with_capacity(
136                GeometryCollectionType::new(dim, Default::default()).with_coord_type(coord_type),
137                capacity.geometry_collection(dim),
138            )
139        });
140
141        // Don't store array metadata on child arrays
142        Self {
143            metadata: typ.metadata().clone(),
144            types: vec![],
145            points,
146            line_strings,
147            polygons,
148            mpoints,
149            mline_strings,
150            mpolygons,
151            gcs,
152            offsets: vec![],
153            deferred_nulls: 0,
154            prefer_multi: DEFAULT_PREFER_MULTI,
155        }
156    }
157
158    /// Change whether to prefer multi or single arrays for new single-part geometries.
159    ///
160    /// If `true`, a new `Point` will be added to the `MultiPointBuilder` child array, a new
161    /// `LineString` will be added to the `MultiLineStringBuilder` child array, and a new `Polygon`
162    /// will be added to the `MultiPolygonBuilder` child array.
163    ///
164    /// This can be desired when the user wants to downcast the array to a single geometry array
165    /// later, as casting to a, say, `MultiPointArray` from a `GeometryArray` could be done
166    /// zero-copy.
167    ///
168    /// Note that only geometries added _after_ this method is called will be affected.
169    pub fn with_prefer_multi(self, prefer_multi: bool) -> Self {
170        Self {
171            prefer_multi,
172            gcs: self.gcs.map(|gc| gc.with_prefer_multi(prefer_multi)),
173            ..self
174        }
175    }
176
177    /// Reserves capacity for at least `additional` more geometries.
178    ///
179    /// The collection may reserve more space to speculatively avoid frequent reallocations. After
180    /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`.
181    /// Does nothing if capacity is already sufficient.
182    pub fn reserve(&mut self, capacity: GeometryCapacity) {
183        let total_num_geoms = capacity.total_num_geoms();
184        self.types.reserve(total_num_geoms);
185        self.offsets.reserve(total_num_geoms);
186
187        capacity.points.iter().enumerate().for_each(|(i, cap)| {
188            self.points[i].reserve(*cap);
189        });
190        capacity
191            .line_strings
192            .iter()
193            .enumerate()
194            .for_each(|(i, cap)| {
195                self.line_strings[i].reserve(*cap);
196            });
197        capacity.polygons.iter().enumerate().for_each(|(i, cap)| {
198            self.polygons[i].reserve(*cap);
199        });
200        capacity.mpoints.iter().enumerate().for_each(|(i, cap)| {
201            self.mpoints[i].reserve(*cap);
202        });
203        capacity
204            .mline_strings
205            .iter()
206            .enumerate()
207            .for_each(|(i, cap)| {
208                self.mline_strings[i].reserve(*cap);
209            });
210        capacity.mpolygons.iter().enumerate().for_each(|(i, cap)| {
211            self.mpolygons[i].reserve(*cap);
212        });
213        capacity.gcs.iter().enumerate().for_each(|(i, cap)| {
214            self.gcs[i].reserve(*cap);
215        });
216    }
217
218    /// Reserves the minimum capacity for at least `additional` more Geometries.
219    ///
220    /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid
221    /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal
222    /// to `self.len() + additional`. Does nothing if the capacity is already sufficient.
223    ///
224    /// Note that the allocator may give the collection more space than it
225    /// requests. Therefore, capacity can not be relied upon to be precisely
226    /// minimal. Prefer [`reserve`] if future insertions are expected.
227    ///
228    /// [`reserve`]: Self::reserve
229    pub fn reserve_exact(&mut self, capacity: GeometryCapacity) {
230        let total_num_geoms = capacity.total_num_geoms();
231
232        self.types.reserve_exact(total_num_geoms);
233        self.offsets.reserve_exact(total_num_geoms);
234
235        capacity.points.iter().enumerate().for_each(|(i, cap)| {
236            self.points[i].reserve_exact(*cap);
237        });
238        capacity
239            .line_strings
240            .iter()
241            .enumerate()
242            .for_each(|(i, cap)| {
243                self.line_strings[i].reserve_exact(*cap);
244            });
245        capacity.polygons.iter().enumerate().for_each(|(i, cap)| {
246            self.polygons[i].reserve_exact(*cap);
247        });
248        capacity.mpoints.iter().enumerate().for_each(|(i, cap)| {
249            self.mpoints[i].reserve_exact(*cap);
250        });
251        capacity
252            .mline_strings
253            .iter()
254            .enumerate()
255            .for_each(|(i, cap)| {
256                self.mline_strings[i].reserve_exact(*cap);
257            });
258        capacity.mpolygons.iter().enumerate().for_each(|(i, cap)| {
259            self.mpolygons[i].reserve_exact(*cap);
260        });
261        capacity.gcs.iter().enumerate().for_each(|(i, cap)| {
262            self.gcs[i].reserve_exact(*cap);
263        });
264    }
265
266    /// Consume the builder and convert to an immutable [`GeometryArray`]
267    pub fn finish(mut self) -> GeometryArray {
268        // If there are still deferred nulls to be written, then there aren't any valid geometries
269        // in this array, and just choose a child to write them to.
270        if self.deferred_nulls > 0 {
271            let dim = Dimension::XY;
272            let child = &mut self.points[dim.order()];
273            let type_id = child.type_id(dim);
274            Self::flush_deferred_nulls(
275                &mut self.deferred_nulls,
276                child,
277                &mut self.offsets,
278                &mut self.types,
279                type_id,
280            );
281        }
282
283        GeometryArray::new(
284            self.types.into(),
285            self.offsets.into(),
286            self.points.map(|arr| arr.finish()),
287            self.line_strings.map(|arr| arr.finish()),
288            self.polygons.map(|arr| arr.finish()),
289            self.mpoints.map(|arr| arr.finish()),
290            self.mline_strings.map(|arr| arr.finish()),
291            self.mpolygons.map(|arr| arr.finish()),
292            self.gcs.map(|arr| arr.finish()),
293            self.metadata,
294        )
295    }
296
297    /// Add a new Point to the end of this array.
298    ///
299    /// If `self.prefer_multi` is `true`, it will be stored in the `MultiPointBuilder` child
300    /// array. Otherwise, it will be stored in the `PointBuilder` child array.
301    #[inline]
302    fn push_point(&mut self, value: Option<&impl PointTrait<T = f64>>) -> GeoArrowResult<()> {
303        if let Some(point) = value {
304            let dim: Dimension = point.dim().try_into().unwrap();
305            let array_idx = dim.order();
306
307            if self.prefer_multi {
308                let child = &mut self.mpoints[array_idx];
309                let type_id = child.type_id(dim);
310
311                Self::flush_deferred_nulls(
312                    &mut self.deferred_nulls,
313                    child,
314                    &mut self.offsets,
315                    &mut self.types,
316                    type_id,
317                );
318                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
319                child.push_point(Some(point))?;
320            } else {
321                let child = &mut self.points[array_idx];
322                let type_id = child.type_id(dim);
323
324                Self::flush_deferred_nulls(
325                    &mut self.deferred_nulls,
326                    child,
327                    &mut self.offsets,
328                    &mut self.types,
329                    type_id,
330                );
331                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
332                child.push_point(Some(point));
333            }
334        } else {
335            self.push_null();
336        };
337
338        Ok(())
339    }
340
341    #[inline]
342    fn add_type<B: GeoArrowArrayBuilder>(
343        child: &mut B,
344        offsets: &mut Vec<i32>,
345        types: &mut Vec<i8>,
346        type_id: i8,
347    ) {
348        offsets.push(child.len().try_into().unwrap());
349        types.push(type_id);
350    }
351
352    #[inline]
353    fn add_point_type(&mut self, dim: Dimension) {
354        let child = &self.points[dim.order()];
355        self.offsets.push(child.len().try_into().unwrap());
356        self.types.push(child.type_id(dim));
357    }
358
359    /// Add a new LineString to the end of this array.
360    ///
361    /// If `self.prefer_multi` is `true`, it will be stored in the `MultiLineStringBuilder` child
362    /// array. Otherwise, it will be stored in the `LineStringBuilder` child array.
363    ///
364    /// # Errors
365    ///
366    /// This function errors iff the new last item is larger than what O supports.
367    #[inline]
368    fn push_line_string(
369        &mut self,
370        value: Option<&impl LineStringTrait<T = f64>>,
371    ) -> GeoArrowResult<()> {
372        if let Some(line_string) = value {
373            let dim: Dimension = line_string.dim().try_into().unwrap();
374            let array_idx = dim.order();
375
376            if self.prefer_multi {
377                let child = &mut self.mline_strings[array_idx];
378                let type_id = child.type_id(dim);
379
380                Self::flush_deferred_nulls(
381                    &mut self.deferred_nulls,
382                    child,
383                    &mut self.offsets,
384                    &mut self.types,
385                    type_id,
386                );
387                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
388                child.push_line_string(Some(line_string))?;
389            } else {
390                let child = &mut self.line_strings[array_idx];
391                let type_id = child.type_id(dim);
392
393                Self::flush_deferred_nulls(
394                    &mut self.deferred_nulls,
395                    child,
396                    &mut self.offsets,
397                    &mut self.types,
398                    type_id,
399                );
400                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
401                child.push_line_string(Some(line_string))?;
402            }
403        } else {
404            self.push_null();
405        };
406
407        Ok(())
408    }
409
410    #[inline]
411    fn add_line_string_type(&mut self, dim: Dimension) {
412        let child = &self.line_strings[dim.order()];
413        self.offsets.push(child.len().try_into().unwrap());
414        self.types.push(child.type_id(dim));
415    }
416
417    /// Add a new Polygon to the end of this array.
418    ///
419    /// If `self.prefer_multi` is `true`, it will be stored in the `MultiPolygonBuilder` child
420    /// array. Otherwise, it will be stored in the `PolygonBuilder` child array.
421    ///
422    /// # Errors
423    ///
424    /// This function errors iff the new last item is larger than what O supports.
425    #[inline]
426    fn push_polygon(&mut self, value: Option<&impl PolygonTrait<T = f64>>) -> GeoArrowResult<()> {
427        if let Some(polygon) = value {
428            let dim: Dimension = polygon.dim().try_into().unwrap();
429            let array_idx = dim.order();
430
431            if self.prefer_multi {
432                let child = &mut self.mpolygons[array_idx];
433                let type_id = child.type_id(dim);
434
435                Self::flush_deferred_nulls(
436                    &mut self.deferred_nulls,
437                    child,
438                    &mut self.offsets,
439                    &mut self.types,
440                    type_id,
441                );
442                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
443                child.push_polygon(Some(polygon))?;
444            } else {
445                let child = &mut self.polygons[array_idx];
446                let type_id = child.type_id(dim);
447
448                Self::flush_deferred_nulls(
449                    &mut self.deferred_nulls,
450                    child,
451                    &mut self.offsets,
452                    &mut self.types,
453                    type_id,
454                );
455                Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
456                child.push_polygon(Some(polygon))?;
457            }
458        } else {
459            self.push_null();
460        };
461
462        Ok(())
463    }
464
465    #[inline]
466    fn add_polygon_type(&mut self, dim: Dimension) {
467        let child = &self.polygons[dim.order()];
468        self.offsets.push(child.len().try_into().unwrap());
469        self.types.push(child.type_id(dim));
470    }
471
472    /// Add a new MultiPoint to the end of this array.
473    ///
474    /// # Errors
475    ///
476    /// This function errors iff the new last item is larger than what O supports.
477    #[inline]
478    fn push_multi_point(
479        &mut self,
480        value: Option<&impl MultiPointTrait<T = f64>>,
481    ) -> GeoArrowResult<()> {
482        if let Some(multi_point) = value {
483            let dim: Dimension = multi_point.dim().try_into().unwrap();
484            let array_idx = dim.order();
485
486            let child = &mut self.mpoints[array_idx];
487            let type_id = child.type_id(dim);
488
489            Self::flush_deferred_nulls(
490                &mut self.deferred_nulls,
491                child,
492                &mut self.offsets,
493                &mut self.types,
494                type_id,
495            );
496            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
497            child.push_multi_point(Some(multi_point))?;
498        } else {
499            self.push_null();
500        };
501
502        Ok(())
503    }
504
505    #[inline]
506    fn add_multi_point_type(&mut self, dim: Dimension) {
507        let child = &self.mpoints[dim.order()];
508        self.offsets.push(child.len().try_into().unwrap());
509        self.types.push(child.type_id(dim));
510    }
511
512    /// Add a new MultiLineString to the end of this array.
513    ///
514    /// # Errors
515    ///
516    /// This function errors iff the new last item is larger than what O supports.
517    #[inline]
518    fn push_multi_line_string(
519        &mut self,
520        value: Option<&impl MultiLineStringTrait<T = f64>>,
521    ) -> GeoArrowResult<()> {
522        if let Some(multi_line_string) = value {
523            let dim: Dimension = multi_line_string.dim().try_into().unwrap();
524            let array_idx = dim.order();
525
526            let child = &mut self.mline_strings[array_idx];
527            let type_id = child.type_id(dim);
528
529            Self::flush_deferred_nulls(
530                &mut self.deferred_nulls,
531                child,
532                &mut self.offsets,
533                &mut self.types,
534                type_id,
535            );
536            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
537            child.push_multi_line_string(Some(multi_line_string))?;
538        } else {
539            self.push_null();
540        };
541
542        Ok(())
543    }
544
545    #[inline]
546    fn add_multi_line_string_type(&mut self, dim: Dimension) {
547        let child = &self.mline_strings[dim.order()];
548        self.offsets.push(child.len().try_into().unwrap());
549        self.types.push(child.type_id(dim));
550    }
551
552    /// Add a new MultiPolygon to the end of this array.
553    ///
554    /// # Errors
555    ///
556    /// This function errors iff the new last item is larger than what O supports.
557    #[inline]
558    fn push_multi_polygon(
559        &mut self,
560        value: Option<&impl MultiPolygonTrait<T = f64>>,
561    ) -> GeoArrowResult<()> {
562        if let Some(multi_polygon) = value {
563            let dim: Dimension = multi_polygon.dim().try_into().unwrap();
564            let array_idx = dim.order();
565
566            let child = &mut self.mpolygons[array_idx];
567            let type_id = child.type_id(dim);
568
569            Self::flush_deferred_nulls(
570                &mut self.deferred_nulls,
571                child,
572                &mut self.offsets,
573                &mut self.types,
574                type_id,
575            );
576            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
577            child.push_multi_polygon(Some(multi_polygon))?;
578        } else {
579            self.push_null();
580        };
581
582        Ok(())
583    }
584
585    #[inline]
586    fn add_multi_polygon_type(&mut self, dim: Dimension) {
587        let child = &self.mpolygons[dim.order()];
588        self.offsets.push(child.len().try_into().unwrap());
589        self.types.push(child.type_id(dim));
590    }
591
592    /// Add a new geometry to this builder
593    #[inline]
594    pub fn push_geometry(
595        &mut self,
596        value: Option<&'a impl GeometryTrait<T = f64>>,
597    ) -> GeoArrowResult<()> {
598        use geo_traits::GeometryType::*;
599
600        if let Some(geom) = value {
601            match geom.as_type() {
602                Point(g) => {
603                    self.push_point(Some(g))?;
604                }
605                LineString(g) => {
606                    self.push_line_string(Some(g))?;
607                }
608                Polygon(g) => {
609                    self.push_polygon(Some(g))?;
610                }
611                MultiPoint(p) => self.push_multi_point(Some(p))?,
612                MultiLineString(p) => self.push_multi_line_string(Some(p))?,
613                MultiPolygon(p) => self.push_multi_polygon(Some(p))?,
614                GeometryCollection(gc) => {
615                    if gc.num_geometries() == 1 {
616                        self.push_geometry(Some(&gc.geometry(0).unwrap()))?
617                    } else {
618                        self.push_geometry_collection(Some(gc))?
619                    }
620                }
621                Rect(r) => self.push_polygon(Some(&RectWrapper::try_new(r)?))?,
622                Triangle(tri) => self.push_polygon(Some(&TriangleWrapper(tri)))?,
623                Line(l) => self.push_line_string(Some(&LineWrapper(l)))?,
624            };
625        } else {
626            self.push_null();
627        }
628        Ok(())
629    }
630
631    /// Add a new GeometryCollection to the end of this array.
632    ///
633    /// # Errors
634    ///
635    /// This function errors iff the new last item is larger than what O supports.
636    #[inline]
637    fn push_geometry_collection(
638        &mut self,
639        value: Option<&impl GeometryCollectionTrait<T = f64>>,
640    ) -> GeoArrowResult<()> {
641        if let Some(gc) = value {
642            let dim: Dimension = gc.dim().try_into().unwrap();
643            let array_idx = dim.order();
644
645            let child = &mut self.gcs[array_idx];
646            let type_id = child.type_id(dim);
647
648            Self::flush_deferred_nulls(
649                &mut self.deferred_nulls,
650                child,
651                &mut self.offsets,
652                &mut self.types,
653                type_id,
654            );
655            Self::add_type(child, &mut self.offsets, &mut self.types, type_id);
656            child.push_geometry_collection(Some(gc))?;
657        } else {
658            self.push_null();
659        };
660
661        Ok(())
662    }
663
664    #[inline]
665    fn add_geometry_collection_type(&mut self, dim: Dimension) {
666        let child = &self.gcs[dim.order()];
667        self.offsets.push(child.len().try_into().unwrap());
668        self.types.push(child.type_id(dim));
669    }
670
671    /// Push a null to this builder.
672    ///
673    /// Adding null values to a union array is tricky, because you don't want to add a null to a
674    /// child that would otherwise be totally empty. Ideally, as few children as possible exist and
675    /// are non-empty.
676    ///
677    /// We handle that by pushing nulls to the first non-empty child we find. If no underlying
678    /// arrays are non-empty, we add to an internal counter instead. Once the first non-empty
679    /// geometry is pushed, then we flush all the "deferred nulls" to that child.
680    ///
681    // TODO: test building an array of all nulls. Make sure we flush deferred nulls if we've never
682    // added any valid geometries.
683    #[inline]
684    pub fn push_null(&mut self) {
685        // Iterate through each dimension, then iterate through each child type. If a child exists,
686        // push a null to it.
687        //
688        // Note that we must **also** call `add_*_type` so that the offsets are correct to point
689        // the union array to the child.
690        for dim in [
691            Dimension::XY,
692            Dimension::XYZ,
693            Dimension::XYM,
694            Dimension::XYZM,
695        ] {
696            let dim_idx = dim.order();
697            if !self.points[dim_idx].is_empty() {
698                self.add_point_type(dim);
699                self.points[dim_idx].push_null();
700                return;
701            }
702            if !self.line_strings[dim_idx].is_empty() {
703                self.add_line_string_type(dim);
704                self.line_strings[dim_idx].push_null();
705                return;
706            }
707            if !self.polygons[dim_idx].is_empty() {
708                self.add_polygon_type(dim);
709                self.polygons[dim_idx].push_null();
710                return;
711            }
712            if !self.mpoints[dim_idx].is_empty() {
713                self.add_multi_point_type(dim);
714                self.mpoints[dim_idx].push_null();
715                return;
716            }
717            if !self.mline_strings[dim_idx].is_empty() {
718                self.add_multi_line_string_type(dim);
719                self.mline_strings[dim_idx].push_null();
720                return;
721            }
722            if !self.mpolygons[dim_idx].is_empty() {
723                self.add_multi_polygon_type(dim);
724                self.mpolygons[dim_idx].push_null();
725                return;
726            }
727            if !self.gcs[dim_idx].is_empty() {
728                self.add_geometry_collection_type(dim);
729                self.gcs[dim_idx].push_null();
730                return;
731            }
732        }
733
734        self.deferred_nulls += 1;
735    }
736
737    /// Flush any deferred nulls to the desired array builder.
738    fn flush_deferred_nulls<B: GeoArrowArrayBuilder>(
739        deferred_nulls: &mut usize,
740        child: &mut B,
741        offsets: &mut Vec<i32>,
742        types: &mut Vec<i8>,
743        type_id: i8,
744    ) {
745        let offset = child.len().try_into().unwrap();
746        // For each null we also have to update the offsets and types
747        for _ in 0..*deferred_nulls {
748            offsets.push(offset);
749            types.push(type_id);
750            child.push_null();
751        }
752
753        *deferred_nulls = 0;
754    }
755
756    /// Extend this builder with the given geometries
757    pub fn extend_from_iter(
758        &mut self,
759        geoms: impl Iterator<Item = Option<&'a (impl GeometryTrait<T = f64> + 'a)>>,
760    ) {
761        geoms
762            .into_iter()
763            .try_for_each(|maybe_geom| self.push_geometry(maybe_geom))
764            .unwrap();
765    }
766
767    /// Create this builder from a slice of nullable Geometries.
768    pub fn from_nullable_geometries(
769        geoms: &[Option<impl GeometryTrait<T = f64>>],
770        typ: GeometryType,
771    ) -> GeoArrowResult<Self> {
772        let capacity = GeometryCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?;
773        let mut array = Self::with_capacity(typ, capacity);
774        array.extend_from_iter(geoms.iter().map(|x| x.as_ref()));
775        Ok(array)
776    }
777}
778
779impl<O: OffsetSizeTrait> TryFrom<(GenericWkbArray<O>, GeometryType)> for GeometryBuilder {
780    type Error = GeoArrowError;
781
782    fn try_from((value, typ): (GenericWkbArray<O>, GeometryType)) -> GeoArrowResult<Self> {
783        let wkb_objects = value
784            .iter()
785            .map(|x| x.transpose())
786            .collect::<GeoArrowResult<Vec<_>>>()?;
787        Self::from_nullable_geometries(&wkb_objects, typ)
788    }
789}
790
791impl GeoArrowArrayBuilder for GeometryBuilder {
792    fn len(&self) -> usize {
793        self.types.len()
794    }
795
796    fn push_null(&mut self) {
797        self.push_null();
798    }
799
800    fn push_geometry(
801        &mut self,
802        geometry: Option<&impl GeometryTrait<T = f64>>,
803    ) -> GeoArrowResult<()> {
804        self.push_geometry(geometry)
805    }
806
807    fn finish(self) -> Arc<dyn GeoArrowArray> {
808        Arc::new(self.finish())
809    }
810}
811
812/// Access the type id for an array-dimension combo
813pub(crate) trait TypeId {
814    const ARRAY_TYPE_OFFSET: i8;
815
816    fn type_id(&self, dim: Dimension) -> i8 {
817        (dim.order() as i8 * 10) + Self::ARRAY_TYPE_OFFSET
818    }
819}
820
821impl TypeId for PointBuilder {
822    const ARRAY_TYPE_OFFSET: i8 = 1;
823}
824
825impl TypeId for LineStringBuilder {
826    const ARRAY_TYPE_OFFSET: i8 = 2;
827}
828
829impl TypeId for PolygonBuilder {
830    const ARRAY_TYPE_OFFSET: i8 = 3;
831}
832impl TypeId for MultiPointBuilder {
833    const ARRAY_TYPE_OFFSET: i8 = 4;
834}
835impl TypeId for MultiLineStringBuilder {
836    const ARRAY_TYPE_OFFSET: i8 = 5;
837}
838impl TypeId for MultiPolygonBuilder {
839    const ARRAY_TYPE_OFFSET: i8 = 6;
840}
841impl TypeId for GeometryCollectionBuilder {
842    const ARRAY_TYPE_OFFSET: i8 = 7;
843}
844
845#[cfg(test)]
846mod test {
847    use geoarrow_schema::CoordType;
848    use wkt::wkt;
849
850    use super::*;
851    use crate::GeoArrowArray;
852
853    #[test]
854    fn all_items_null() {
855        // Testing the behavior of deferred nulls when there are no valid geometries.
856        let typ = GeometryType::new(Default::default());
857        let mut builder = GeometryBuilder::new(typ);
858
859        builder.push_null();
860        builder.push_null();
861        builder.push_null();
862
863        let array = builder.finish();
864        assert_eq!(array.logical_null_count(), 3);
865
866        // We expect the nulls to be placed in (canonically) the first child
867        assert_eq!(array.points[0].logical_null_count(), 3);
868    }
869
870    #[test]
871    fn deferred_nulls() {
872        let coord_type = CoordType::Interleaved;
873        let typ = GeometryType::new(Default::default()).with_coord_type(coord_type);
874
875        let mut builder = GeometryBuilder::new(typ);
876        builder.push_null();
877        builder.push_null();
878
879        let linestring_arr = crate::test::linestring::array(coord_type, Dimension::XYZ);
880        let linestring_arr_null_count = linestring_arr.logical_null_count();
881
882        // Push the geometries from the linestring arr onto the geometry builder
883        for geom in linestring_arr.iter() {
884            builder
885                .push_geometry(geom.transpose().unwrap().as_ref())
886                .unwrap();
887        }
888
889        let geom_arr = builder.finish();
890
891        // Since there are 2 nulls pushed manually and a third from the LineString arr
892        let total_expected_null_count = 2 + linestring_arr_null_count;
893        assert_eq!(geom_arr.logical_null_count(), total_expected_null_count);
894
895        // All nulls should be in the XYZ linestring child
896        assert_eq!(
897            geom_arr.line_strings[Dimension::XYZ.order()].logical_null_count(),
898            total_expected_null_count
899        );
900    }
901
902    #[test]
903    fn later_nulls_after_deferred_nulls_pushed_directly() {
904        let coord_type = CoordType::Interleaved;
905        let typ = GeometryType::new(Default::default()).with_coord_type(coord_type);
906
907        let mut builder = GeometryBuilder::new(typ);
908        builder.push_null();
909        builder.push_null();
910
911        let point = wkt! { POINT Z (30. 10. 40.) };
912        builder.push_point(Some(&point)).unwrap();
913
914        let ls = wkt! { LINESTRING (30. 10., 10. 30., 40. 40.) };
915        builder.push_line_string(Some(&ls)).unwrap();
916
917        builder.push_null();
918        builder.push_null();
919
920        let geom_arr = builder.finish();
921
922        assert_eq!(geom_arr.logical_null_count(), 4);
923
924        // The first two nulls get added to the point z child because those are deferred and the
925        // point z is the first non-null geometry added.
926        assert_eq!(
927            geom_arr.points[Dimension::XYZ.order()].logical_null_count(),
928            2
929        );
930
931        // The last two nulls get added to the linestring XY child because the current
932        // implementation looks through all XY arrays then all XYZ then etc looking for the first
933        // non-empty array. Since the linestring XY child is non-empty, the last nulls get pushed
934        // here.
935        assert_eq!(
936            geom_arr.line_strings[Dimension::XY.order()].logical_null_count(),
937            2
938        );
939    }
940
941    // Test pushing nulls that are added after a valid geometry has been pushed.
942    #[test]
943    fn nulls_no_deferred() {
944        let coord_type = CoordType::Interleaved;
945        let typ = GeometryType::new(Default::default()).with_coord_type(coord_type);
946
947        let mut builder = GeometryBuilder::new(typ);
948        let point = wkt! { POINT Z (30. 10. 40.) };
949        builder.push_point(Some(&point)).unwrap();
950        builder.push_null();
951        builder.push_null();
952
953        let geom_arr = builder.finish();
954        assert_eq!(geom_arr.logical_null_count(), 2);
955        // All nulls should be in point XYZ child.
956        assert_eq!(
957            geom_arr.points[Dimension::XYZ.order()].logical_null_count(),
958            2
959        );
960    }
961}