geoarrow_array/geozero/import/
geometry.rs

1use std::fmt::Debug;
2use std::sync::Arc;
3
4use geoarrow_schema::GeometryType;
5use geoarrow_schema::error::GeoArrowResult;
6use geozero::error::GeozeroError;
7use geozero::geo_types::GeoWriter;
8use geozero::{GeomProcessor, GeozeroGeometry};
9
10use crate::GeoArrowArray;
11use crate::array::GeometryArray;
12use crate::builder::GeometryBuilder;
13use crate::trait_::GeoArrowArrayBuilder;
14
15/// GeoZero trait to convert to GeoArrow [`GeometryArray`].
16///
17/// **NOTE** only XY dimensions are currently supported here.
18///
19/// (This is because the internal GeoWriter only supports XY dimensions.)
20pub trait ToGeometryArray {
21    /// Convert to GeoArrow [`GeometryArray`]
22    fn to_geometry_array(&self, typ: GeometryType) -> geozero::error::Result<GeometryArray> {
23        Ok(self.to_geometry_builder(typ)?.finish())
24    }
25
26    /// Convert to a GeoArrow [`GeometryBuilder`]
27    fn to_geometry_builder(&self, typ: GeometryType) -> geozero::error::Result<GeometryBuilder>;
28}
29
30impl<T: GeozeroGeometry> ToGeometryArray for T {
31    fn to_geometry_builder(&self, typ: GeometryType) -> geozero::error::Result<GeometryBuilder> {
32        let mut stream_builder = GeometryStreamBuilder::new(typ);
33        self.process_geom(&mut stream_builder)?;
34        Ok(stream_builder.builder)
35    }
36}
37
38/// A streaming builder for GeoArrow [`GeometryArray`].
39///
40/// This is useful in conjunction with [`geozero`] APIs because its coordinate stream requires the
41/// consumer to keep track of which geometry type is currently being added to.
42///
43/// This implementation can be complex because we need to connect the push-based stream of the
44/// geozero source (coordinate-by-coordinate) with the pull-based (complete geometry) APIs of the
45/// [`GeometryBuilder`]. In particular, [`GeometryBuilder`] requires reading from _whole
46/// geometries_.
47///
48/// This is implemented with an internal [GeoWriter] used to buffer each stream of coordinates.
49/// Each incoming geometry is collected into a "current geometry", and then when that geometry's
50/// stream is finished, that geometry is propagated on to the [`GeometryBuilder`] and the current
51/// geometry is cleared.
52///
53/// Note that this has some memory overhead because of the buffering, and it requires copying
54/// _once_ from the geozero source into the intermediate [geo_types] object, and then _again_ into
55/// the GeoArrow array.
56///
57/// In the future we could use a bump allocator to improve memory performance here.
58///
59/// Converting an [`GeometryStreamBuilder`] into a [`GeometryArray`] is `O(1)`.
60struct GeometryStreamBuilder {
61    /// The underlying geometry builder. When each geometry is finished, we add the geometry to
62    /// this builder.
63    builder: GeometryBuilder,
64    /// The current geometry being built. [GeoWriter] implements [GeomProcessor].
65    current_geometry: GeoWriter,
66    /// The current nesting level of geometry collections. This is required because geozero
67    /// represents an array of geometries as a GeometryCollection. But we don't want to try to
68    /// "finish" the `current_geometry` when this only represents the top-level sequence of
69    /// geometries we're putting into the array.
70    ///
71    /// We should only "finish" the `current_geometry` for _nested_ geometry collections beyond the
72    /// root level.
73    geometry_collection_level: usize,
74}
75
76impl GeometryStreamBuilder {
77    pub fn new(typ: GeometryType) -> Self {
78        Self {
79            builder: GeometryBuilder::new(typ),
80            current_geometry: GeoWriter::new(),
81            geometry_collection_level: 0,
82        }
83    }
84
85    fn push_current_geometry(&mut self) -> geozero::error::Result<()> {
86        let geom = self
87            .current_geometry
88            .take_geometry()
89            .ok_or(GeozeroError::Geometry("Take geometry failed".to_string()))?;
90        self.builder
91            .push_geometry(Some(&geom))
92            .map_err(|err| GeozeroError::Geometry(err.to_string()))?;
93        self.current_geometry = GeoWriter::new();
94        Ok(())
95    }
96}
97
98impl Debug for GeometryStreamBuilder {
99    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100        self.builder.fmt(f)
101    }
102}
103
104#[allow(unused_variables)]
105impl GeomProcessor for GeometryStreamBuilder {
106    fn xy(&mut self, x: f64, y: f64, idx: usize) -> geozero::error::Result<()> {
107        self.current_geometry.xy(x, y, idx)
108    }
109
110    fn coordinate(
111        &mut self,
112        x: f64,
113        y: f64,
114        z: Option<f64>,
115        m: Option<f64>,
116        t: Option<f64>,
117        tm: Option<u64>,
118        idx: usize,
119    ) -> geozero::error::Result<()> {
120        self.current_geometry.coordinate(x, y, z, m, t, tm, idx)
121    }
122
123    fn empty_point(&mut self, idx: usize) -> geozero::error::Result<()> {
124        // This needs to be separate because GeoWriter doesn't know how to handle empty points
125        Err(GeozeroError::Geometry(
126            "Empty points not currently supported in ToGeometryArray.".to_string(),
127        ))
128    }
129
130    fn point_begin(&mut self, idx: usize) -> geozero::error::Result<()> {
131        self.current_geometry.point_begin(idx)
132    }
133
134    fn point_end(&mut self, idx: usize) -> geozero::error::Result<()> {
135        self.current_geometry.point_end(idx)?;
136        self.push_current_geometry()
137    }
138
139    fn multipoint_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> {
140        self.current_geometry.multipoint_begin(size, idx)
141    }
142
143    fn multipoint_end(&mut self, idx: usize) -> geozero::error::Result<()> {
144        self.current_geometry.multipoint_end(idx)?;
145        self.push_current_geometry()
146    }
147
148    fn linestring_begin(
149        &mut self,
150        tagged: bool,
151        size: usize,
152        idx: usize,
153    ) -> geozero::error::Result<()> {
154        self.current_geometry.linestring_begin(tagged, size, idx)
155    }
156
157    fn linestring_end(&mut self, tagged: bool, idx: usize) -> geozero::error::Result<()> {
158        self.current_geometry.linestring_end(tagged, idx)?;
159
160        // When tagged is true, that means it's a standalone LineString and not part of a
161        // MultiLineString
162        if tagged {
163            self.push_current_geometry()?;
164        }
165        Ok(())
166    }
167
168    fn multilinestring_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> {
169        self.current_geometry.multilinestring_begin(size, idx)
170    }
171
172    fn multilinestring_end(&mut self, idx: usize) -> geozero::error::Result<()> {
173        self.current_geometry.multilinestring_end(idx)?;
174        self.push_current_geometry()
175    }
176
177    fn polygon_begin(
178        &mut self,
179        tagged: bool,
180        size: usize,
181        idx: usize,
182    ) -> geozero::error::Result<()> {
183        self.current_geometry.polygon_begin(tagged, size, idx)
184    }
185
186    fn polygon_end(&mut self, tagged: bool, idx: usize) -> geozero::error::Result<()> {
187        self.current_geometry.polygon_end(tagged, idx)?;
188
189        // When tagged is true, that means it's a standalone LineString and not part of a
190        // MultiLineString
191        if tagged {
192            self.push_current_geometry()?;
193        }
194
195        Ok(())
196    }
197
198    fn multipolygon_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> {
199        self.current_geometry.multipolygon_begin(size, idx)
200    }
201
202    fn multipolygon_end(&mut self, idx: usize) -> geozero::error::Result<()> {
203        self.current_geometry.multipolygon_end(idx)?;
204        self.push_current_geometry()
205    }
206
207    fn geometrycollection_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> {
208        if self.geometry_collection_level > 0 {
209            self.current_geometry.geometrycollection_begin(size, idx)?;
210        }
211
212        self.geometry_collection_level += 1;
213        Ok(())
214    }
215
216    fn geometrycollection_end(&mut self, idx: usize) -> geozero::error::Result<()> {
217        self.geometry_collection_level -= 1;
218
219        if self.geometry_collection_level > 0 {
220            self.current_geometry.geometrycollection_end(idx)?;
221            self.push_current_geometry()?;
222        }
223
224        Ok(())
225    }
226}
227
228impl GeoArrowArrayBuilder for GeometryStreamBuilder {
229    fn len(&self) -> usize {
230        self.builder.len()
231    }
232
233    fn push_null(&mut self) {
234        self.builder.push_null()
235    }
236
237    fn push_geometry(
238        &mut self,
239        geometry: Option<&impl geo_traits::GeometryTrait<T = f64>>,
240    ) -> GeoArrowResult<()> {
241        self.builder.push_geometry(geometry)
242    }
243
244    fn finish(self) -> Arc<dyn GeoArrowArray> {
245        Arc::new(self.builder.finish())
246    }
247}
248
249#[cfg(test)]
250mod test {
251    use geo_types::{Geometry, GeometryCollection};
252    use geozero::error::Result;
253
254    use super::*;
255    use crate::test::{linestring, multilinestring, multipoint, multipolygon, point, polygon};
256
257    fn geoms() -> Vec<geo_types::Geometry> {
258        vec![
259            point::p0().into(),
260            point::p1().into(),
261            point::p2().into(),
262            linestring::ls0().into(),
263            linestring::ls1().into(),
264            polygon::p0().into(),
265            polygon::p1().into(),
266            multipoint::mp0().into(),
267            multipoint::mp1().into(),
268            multilinestring::ml0().into(),
269            multilinestring::ml1().into(),
270            multipolygon::mp0().into(),
271            multipolygon::mp1().into(),
272        ]
273    }
274
275    #[test]
276    fn from_geo_using_geozero() -> Result<()> {
277        let geo_geoms = geoms().into_iter().map(Some).collect::<Vec<_>>();
278        let geo = Geometry::GeometryCollection(GeometryCollection(geoms()));
279        let typ = GeometryType::new(Default::default());
280        let geo_arr = geo.to_geometry_array(typ.clone()).unwrap();
281
282        let geo_arr2 = GeometryBuilder::from_nullable_geometries(&geo_geoms, typ)
283            .unwrap()
284            .finish();
285
286        // These are constructed with two different code paths
287        assert_eq!(geo_arr, geo_arr2);
288        Ok(())
289    }
290}