Skip to main content

nodedb_spatial/
wkb.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Well-Known Binary (WKB) serialization for Geometry types.
4//!
5//! WKB is the standard binary format for geometry interchange (ISO 13249).
6//! Used as the Arrow `DataType::Binary` backing for spatial columns —
7//! avoids JSON parse overhead during DataFusion query execution.
8//!
9//! Format (little-endian):
10//! ```text
11//! [byte_order: u8] [type: u32] [coordinates...]
12//! ```
13//!
14//! Byte order: 1 = little-endian (NDR), 0 = big-endian (XDR). We always
15//! write little-endian and accept both on read.
16
17use nodedb_types::geometry::Geometry;
18
19// WKB geometry type codes (ISO 13249 / OGC SFA).
20const WKB_POINT: u32 = 1;
21const WKB_LINESTRING: u32 = 2;
22const WKB_POLYGON: u32 = 3;
23const WKB_MULTIPOINT: u32 = 4;
24const WKB_MULTILINESTRING: u32 = 5;
25const WKB_MULTIPOLYGON: u32 = 6;
26const WKB_GEOMETRYCOLLECTION: u32 = 7;
27
28const BYTE_ORDER_LE: u8 = 1;
29
30/// Serialize a Geometry to WKB (little-endian).
31pub fn geometry_to_wkb(geom: &Geometry) -> Vec<u8> {
32    let mut buf = Vec::with_capacity(64);
33    write_geometry(&mut buf, geom);
34    buf
35}
36
37/// Deserialize a Geometry from WKB bytes.
38///
39/// Returns `None` if the bytes are malformed or truncated.
40pub fn geometry_from_wkb(data: &[u8]) -> Option<Geometry> {
41    let mut cursor = 0;
42    read_geometry(data, &mut cursor)
43}
44
45/// Extract bounding box from WKB without full deserialization.
46///
47/// Scans coordinate values to find min/max. Faster than full deserialize
48/// when only the bbox is needed (e.g., R-tree insertion from Arrow batch).
49pub fn wkb_bbox(data: &[u8]) -> Option<nodedb_types::BoundingBox> {
50    let geom = geometry_from_wkb(data)?;
51    Some(nodedb_types::geometry_bbox(&geom))
52}
53
54// ── Write helpers ──
55
56fn write_geometry(buf: &mut Vec<u8>, geom: &Geometry) {
57    match geom {
58        Geometry::Point { coordinates } => {
59            write_header(buf, WKB_POINT);
60            write_f64(buf, coordinates[0]);
61            write_f64(buf, coordinates[1]);
62        }
63        Geometry::LineString { coordinates } => {
64            write_header(buf, WKB_LINESTRING);
65            write_u32(buf, coordinates.len() as u32);
66            for c in coordinates {
67                write_f64(buf, c[0]);
68                write_f64(buf, c[1]);
69            }
70        }
71        Geometry::Polygon { coordinates } => {
72            write_header(buf, WKB_POLYGON);
73            write_u32(buf, coordinates.len() as u32);
74            for ring in coordinates {
75                write_u32(buf, ring.len() as u32);
76                for c in ring {
77                    write_f64(buf, c[0]);
78                    write_f64(buf, c[1]);
79                }
80            }
81        }
82        Geometry::MultiPoint { coordinates } => {
83            write_header(buf, WKB_MULTIPOINT);
84            write_u32(buf, coordinates.len() as u32);
85            for c in coordinates {
86                // Each point is a full WKB Point.
87                write_header(buf, WKB_POINT);
88                write_f64(buf, c[0]);
89                write_f64(buf, c[1]);
90            }
91        }
92        Geometry::MultiLineString { coordinates } => {
93            write_header(buf, WKB_MULTILINESTRING);
94            write_u32(buf, coordinates.len() as u32);
95            for ls in coordinates {
96                write_geometry(
97                    buf,
98                    &Geometry::LineString {
99                        coordinates: ls.clone(),
100                    },
101                );
102            }
103        }
104        Geometry::MultiPolygon { coordinates } => {
105            write_header(buf, WKB_MULTIPOLYGON);
106            write_u32(buf, coordinates.len() as u32);
107            for poly in coordinates {
108                write_geometry(
109                    buf,
110                    &Geometry::Polygon {
111                        coordinates: poly.clone(),
112                    },
113                );
114            }
115        }
116        Geometry::GeometryCollection { geometries } => {
117            write_header(buf, WKB_GEOMETRYCOLLECTION);
118            write_u32(buf, geometries.len() as u32);
119            for g in geometries {
120                write_geometry(buf, g);
121            }
122        }
123
124        // Unknown future geometry type — write empty geometry collection.
125        _ => {
126            write_header(buf, WKB_GEOMETRYCOLLECTION);
127            write_u32(buf, 0);
128        }
129    }
130}
131
132fn write_header(buf: &mut Vec<u8>, wkb_type: u32) {
133    buf.push(BYTE_ORDER_LE);
134    write_u32(buf, wkb_type);
135}
136
137fn write_u32(buf: &mut Vec<u8>, val: u32) {
138    buf.extend_from_slice(&val.to_le_bytes());
139}
140
141fn write_f64(buf: &mut Vec<u8>, val: f64) {
142    buf.extend_from_slice(&val.to_le_bytes());
143}
144
145// ── Read helpers ──
146
147fn read_geometry(data: &[u8], cursor: &mut usize) -> Option<Geometry> {
148    let byte_order = read_u8(data, cursor)?;
149    let is_le = byte_order == 1;
150    let wkb_type = read_u32(data, cursor, is_le)?;
151
152    match wkb_type {
153        WKB_POINT => {
154            let x = read_f64(data, cursor, is_le)?;
155            let y = read_f64(data, cursor, is_le)?;
156            Some(Geometry::Point {
157                coordinates: [x, y],
158            })
159        }
160        WKB_LINESTRING => {
161            let n = read_u32(data, cursor, is_le)? as usize;
162            let coords = read_coords(data, cursor, n, is_le)?;
163            Some(Geometry::LineString {
164                coordinates: coords,
165            })
166        }
167        WKB_POLYGON => {
168            let num_rings = read_u32(data, cursor, is_le)? as usize;
169            let mut rings = Vec::with_capacity(num_rings);
170            for _ in 0..num_rings {
171                let n = read_u32(data, cursor, is_le)? as usize;
172                let ring = read_coords(data, cursor, n, is_le)?;
173                rings.push(ring);
174            }
175            Some(Geometry::Polygon { coordinates: rings })
176        }
177        WKB_MULTIPOINT => {
178            let count = read_u32(data, cursor, is_le)? as usize;
179            let mut coords = Vec::with_capacity(count);
180            for _ in 0..count {
181                let inner = read_geometry(data, cursor)?;
182                if let Geometry::Point { coordinates } = inner {
183                    coords.push(coordinates);
184                } else {
185                    return None;
186                }
187            }
188            Some(Geometry::MultiPoint {
189                coordinates: coords,
190            })
191        }
192        WKB_MULTILINESTRING => {
193            let count = read_u32(data, cursor, is_le)? as usize;
194            let mut lines = Vec::with_capacity(count);
195            for _ in 0..count {
196                let inner = read_geometry(data, cursor)?;
197                if let Geometry::LineString { coordinates } = inner {
198                    lines.push(coordinates);
199                } else {
200                    return None;
201                }
202            }
203            Some(Geometry::MultiLineString { coordinates: lines })
204        }
205        WKB_MULTIPOLYGON => {
206            let count = read_u32(data, cursor, is_le)? as usize;
207            let mut polys = Vec::with_capacity(count);
208            for _ in 0..count {
209                let inner = read_geometry(data, cursor)?;
210                if let Geometry::Polygon { coordinates } = inner {
211                    polys.push(coordinates);
212                } else {
213                    return None;
214                }
215            }
216            Some(Geometry::MultiPolygon { coordinates: polys })
217        }
218        WKB_GEOMETRYCOLLECTION => {
219            let count = read_u32(data, cursor, is_le)? as usize;
220            let mut geoms = Vec::with_capacity(count);
221            for _ in 0..count {
222                geoms.push(read_geometry(data, cursor)?);
223            }
224            Some(Geometry::GeometryCollection { geometries: geoms })
225        }
226        _ => None,
227    }
228}
229
230fn read_u8(data: &[u8], cursor: &mut usize) -> Option<u8> {
231    if *cursor >= data.len() {
232        return None;
233    }
234    let val = data[*cursor];
235    *cursor += 1;
236    Some(val)
237}
238
239fn read_u32(data: &[u8], cursor: &mut usize, is_le: bool) -> Option<u32> {
240    if *cursor + 4 > data.len() {
241        return None;
242    }
243    let bytes: [u8; 4] = [
244        data[*cursor],
245        data[*cursor + 1],
246        data[*cursor + 2],
247        data[*cursor + 3],
248    ];
249    *cursor += 4;
250    Some(if is_le {
251        u32::from_le_bytes(bytes)
252    } else {
253        u32::from_be_bytes(bytes)
254    })
255}
256
257fn read_f64(data: &[u8], cursor: &mut usize, is_le: bool) -> Option<f64> {
258    if *cursor + 8 > data.len() {
259        return None;
260    }
261    let bytes: [u8; 8] = [
262        data[*cursor],
263        data[*cursor + 1],
264        data[*cursor + 2],
265        data[*cursor + 3],
266        data[*cursor + 4],
267        data[*cursor + 5],
268        data[*cursor + 6],
269        data[*cursor + 7],
270    ];
271    *cursor += 8;
272    Some(if is_le {
273        f64::from_le_bytes(bytes)
274    } else {
275        f64::from_be_bytes(bytes)
276    })
277}
278
279fn read_coords(
280    data: &[u8],
281    cursor: &mut usize,
282    count: usize,
283    is_le: bool,
284) -> Option<Vec<[f64; 2]>> {
285    let mut coords = Vec::with_capacity(count);
286    for _ in 0..count {
287        let x = read_f64(data, cursor, is_le)?;
288        let y = read_f64(data, cursor, is_le)?;
289        coords.push([x, y]);
290    }
291    Some(coords)
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297
298    #[test]
299    fn point_roundtrip() {
300        let geom = Geometry::point(-73.9857, 40.7484);
301        let wkb = geometry_to_wkb(&geom);
302        let decoded = geometry_from_wkb(&wkb).unwrap();
303        assert_eq!(geom, decoded);
304    }
305
306    #[test]
307    fn linestring_roundtrip() {
308        let geom = Geometry::line_string(vec![[0.0, 0.0], [1.0, 1.0], [2.0, 0.0]]);
309        let wkb = geometry_to_wkb(&geom);
310        let decoded = geometry_from_wkb(&wkb).unwrap();
311        assert_eq!(geom, decoded);
312    }
313
314    #[test]
315    fn polygon_roundtrip() {
316        let geom = Geometry::polygon(vec![
317            vec![
318                [0.0, 0.0],
319                [10.0, 0.0],
320                [10.0, 10.0],
321                [0.0, 10.0],
322                [0.0, 0.0],
323            ],
324            vec![[2.0, 2.0], [3.0, 2.0], [3.0, 3.0], [2.0, 3.0], [2.0, 2.0]], // hole
325        ]);
326        let wkb = geometry_to_wkb(&geom);
327        let decoded = geometry_from_wkb(&wkb).unwrap();
328        assert_eq!(geom, decoded);
329    }
330
331    #[test]
332    fn multipoint_roundtrip() {
333        let geom = Geometry::MultiPoint {
334            coordinates: vec![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
335        };
336        let wkb = geometry_to_wkb(&geom);
337        let decoded = geometry_from_wkb(&wkb).unwrap();
338        assert_eq!(geom, decoded);
339    }
340
341    #[test]
342    fn multilinestring_roundtrip() {
343        let geom = Geometry::MultiLineString {
344            coordinates: vec![
345                vec![[0.0, 0.0], [1.0, 1.0]],
346                vec![[2.0, 2.0], [3.0, 3.0], [4.0, 2.0]],
347            ],
348        };
349        let wkb = geometry_to_wkb(&geom);
350        let decoded = geometry_from_wkb(&wkb).unwrap();
351        assert_eq!(geom, decoded);
352    }
353
354    #[test]
355    fn multipolygon_roundtrip() {
356        let geom = Geometry::MultiPolygon {
357            coordinates: vec![
358                vec![vec![
359                    [0.0, 0.0],
360                    [1.0, 0.0],
361                    [1.0, 1.0],
362                    [0.0, 1.0],
363                    [0.0, 0.0],
364                ]],
365                vec![vec![
366                    [5.0, 5.0],
367                    [6.0, 5.0],
368                    [6.0, 6.0],
369                    [5.0, 6.0],
370                    [5.0, 5.0],
371                ]],
372            ],
373        };
374        let wkb = geometry_to_wkb(&geom);
375        let decoded = geometry_from_wkb(&wkb).unwrap();
376        assert_eq!(geom, decoded);
377    }
378
379    #[test]
380    fn geometry_collection_roundtrip() {
381        let geom = Geometry::GeometryCollection {
382            geometries: vec![
383                Geometry::point(1.0, 2.0),
384                Geometry::line_string(vec![[0.0, 0.0], [1.0, 1.0]]),
385            ],
386        };
387        let wkb = geometry_to_wkb(&geom);
388        let decoded = geometry_from_wkb(&wkb).unwrap();
389        assert_eq!(geom, decoded);
390    }
391
392    #[test]
393    fn truncated_data_returns_none() {
394        let wkb = geometry_to_wkb(&Geometry::point(1.0, 2.0));
395        assert!(geometry_from_wkb(&wkb[..3]).is_none());
396        assert!(geometry_from_wkb(&[]).is_none());
397    }
398
399    #[test]
400    fn invalid_type_returns_none() {
401        let mut wkb = geometry_to_wkb(&Geometry::point(1.0, 2.0));
402        wkb[1] = 99; // invalid WKB type
403        assert!(geometry_from_wkb(&wkb).is_none());
404    }
405
406    #[test]
407    fn wkb_bbox_extraction() {
408        let geom = Geometry::polygon(vec![vec![
409            [-10.0, -5.0],
410            [10.0, -5.0],
411            [10.0, 5.0],
412            [-10.0, 5.0],
413            [-10.0, -5.0],
414        ]]);
415        let wkb = geometry_to_wkb(&geom);
416        let bb = wkb_bbox(&wkb).unwrap();
417        assert_eq!(bb.min_lng, -10.0);
418        assert_eq!(bb.max_lng, 10.0);
419        assert_eq!(bb.min_lat, -5.0);
420        assert_eq!(bb.max_lat, 5.0);
421    }
422
423    #[test]
424    fn point_wkb_size() {
425        let wkb = geometry_to_wkb(&Geometry::point(0.0, 0.0));
426        // 1 (byte order) + 4 (type) + 8 (x) + 8 (y) = 21 bytes
427        assert_eq!(wkb.len(), 21);
428    }
429}