Skip to main content

nodedb_spatial/
wkb.rs

1//! Well-Known Binary (WKB) serialization for Geometry types.
2//!
3//! WKB is the standard binary format for geometry interchange (ISO 13249).
4//! Used as the Arrow `DataType::Binary` backing for spatial columns —
5//! avoids JSON parse overhead during DataFusion query execution.
6//!
7//! Format (little-endian):
8//! ```text
9//! [byte_order: u8] [type: u32] [coordinates...]
10//! ```
11//!
12//! Byte order: 1 = little-endian (NDR), 0 = big-endian (XDR). We always
13//! write little-endian and accept both on read.
14
15use nodedb_types::geometry::Geometry;
16
17// WKB geometry type codes (ISO 13249 / OGC SFA).
18const WKB_POINT: u32 = 1;
19const WKB_LINESTRING: u32 = 2;
20const WKB_POLYGON: u32 = 3;
21const WKB_MULTIPOINT: u32 = 4;
22const WKB_MULTILINESTRING: u32 = 5;
23const WKB_MULTIPOLYGON: u32 = 6;
24const WKB_GEOMETRYCOLLECTION: u32 = 7;
25
26const BYTE_ORDER_LE: u8 = 1;
27
28/// Serialize a Geometry to WKB (little-endian).
29pub fn geometry_to_wkb(geom: &Geometry) -> Vec<u8> {
30    let mut buf = Vec::with_capacity(64);
31    write_geometry(&mut buf, geom);
32    buf
33}
34
35/// Deserialize a Geometry from WKB bytes.
36///
37/// Returns `None` if the bytes are malformed or truncated.
38pub fn geometry_from_wkb(data: &[u8]) -> Option<Geometry> {
39    let mut cursor = 0;
40    read_geometry(data, &mut cursor)
41}
42
43/// Extract bounding box from WKB without full deserialization.
44///
45/// Scans coordinate values to find min/max. Faster than full deserialize
46/// when only the bbox is needed (e.g., R-tree insertion from Arrow batch).
47pub fn wkb_bbox(data: &[u8]) -> Option<nodedb_types::BoundingBox> {
48    let geom = geometry_from_wkb(data)?;
49    Some(nodedb_types::geometry_bbox(&geom))
50}
51
52// ── Write helpers ──
53
54fn write_geometry(buf: &mut Vec<u8>, geom: &Geometry) {
55    match geom {
56        Geometry::Point { coordinates } => {
57            write_header(buf, WKB_POINT);
58            write_f64(buf, coordinates[0]);
59            write_f64(buf, coordinates[1]);
60        }
61        Geometry::LineString { coordinates } => {
62            write_header(buf, WKB_LINESTRING);
63            write_u32(buf, coordinates.len() as u32);
64            for c in coordinates {
65                write_f64(buf, c[0]);
66                write_f64(buf, c[1]);
67            }
68        }
69        Geometry::Polygon { coordinates } => {
70            write_header(buf, WKB_POLYGON);
71            write_u32(buf, coordinates.len() as u32);
72            for ring in coordinates {
73                write_u32(buf, ring.len() as u32);
74                for c in ring {
75                    write_f64(buf, c[0]);
76                    write_f64(buf, c[1]);
77                }
78            }
79        }
80        Geometry::MultiPoint { coordinates } => {
81            write_header(buf, WKB_MULTIPOINT);
82            write_u32(buf, coordinates.len() as u32);
83            for c in coordinates {
84                // Each point is a full WKB Point.
85                write_header(buf, WKB_POINT);
86                write_f64(buf, c[0]);
87                write_f64(buf, c[1]);
88            }
89        }
90        Geometry::MultiLineString { coordinates } => {
91            write_header(buf, WKB_MULTILINESTRING);
92            write_u32(buf, coordinates.len() as u32);
93            for ls in coordinates {
94                write_geometry(
95                    buf,
96                    &Geometry::LineString {
97                        coordinates: ls.clone(),
98                    },
99                );
100            }
101        }
102        Geometry::MultiPolygon { coordinates } => {
103            write_header(buf, WKB_MULTIPOLYGON);
104            write_u32(buf, coordinates.len() as u32);
105            for poly in coordinates {
106                write_geometry(
107                    buf,
108                    &Geometry::Polygon {
109                        coordinates: poly.clone(),
110                    },
111                );
112            }
113        }
114        Geometry::GeometryCollection { geometries } => {
115            write_header(buf, WKB_GEOMETRYCOLLECTION);
116            write_u32(buf, geometries.len() as u32);
117            for g in geometries {
118                write_geometry(buf, g);
119            }
120        }
121    }
122}
123
124fn write_header(buf: &mut Vec<u8>, wkb_type: u32) {
125    buf.push(BYTE_ORDER_LE);
126    write_u32(buf, wkb_type);
127}
128
129fn write_u32(buf: &mut Vec<u8>, val: u32) {
130    buf.extend_from_slice(&val.to_le_bytes());
131}
132
133fn write_f64(buf: &mut Vec<u8>, val: f64) {
134    buf.extend_from_slice(&val.to_le_bytes());
135}
136
137// ── Read helpers ──
138
139fn read_geometry(data: &[u8], cursor: &mut usize) -> Option<Geometry> {
140    let byte_order = read_u8(data, cursor)?;
141    let is_le = byte_order == 1;
142    let wkb_type = read_u32(data, cursor, is_le)?;
143
144    match wkb_type {
145        WKB_POINT => {
146            let x = read_f64(data, cursor, is_le)?;
147            let y = read_f64(data, cursor, is_le)?;
148            Some(Geometry::Point {
149                coordinates: [x, y],
150            })
151        }
152        WKB_LINESTRING => {
153            let n = read_u32(data, cursor, is_le)? as usize;
154            let coords = read_coords(data, cursor, n, is_le)?;
155            Some(Geometry::LineString {
156                coordinates: coords,
157            })
158        }
159        WKB_POLYGON => {
160            let num_rings = read_u32(data, cursor, is_le)? as usize;
161            let mut rings = Vec::with_capacity(num_rings);
162            for _ in 0..num_rings {
163                let n = read_u32(data, cursor, is_le)? as usize;
164                let ring = read_coords(data, cursor, n, is_le)?;
165                rings.push(ring);
166            }
167            Some(Geometry::Polygon { coordinates: rings })
168        }
169        WKB_MULTIPOINT => {
170            let count = read_u32(data, cursor, is_le)? as usize;
171            let mut coords = Vec::with_capacity(count);
172            for _ in 0..count {
173                let inner = read_geometry(data, cursor)?;
174                if let Geometry::Point { coordinates } = inner {
175                    coords.push(coordinates);
176                } else {
177                    return None;
178                }
179            }
180            Some(Geometry::MultiPoint {
181                coordinates: coords,
182            })
183        }
184        WKB_MULTILINESTRING => {
185            let count = read_u32(data, cursor, is_le)? as usize;
186            let mut lines = Vec::with_capacity(count);
187            for _ in 0..count {
188                let inner = read_geometry(data, cursor)?;
189                if let Geometry::LineString { coordinates } = inner {
190                    lines.push(coordinates);
191                } else {
192                    return None;
193                }
194            }
195            Some(Geometry::MultiLineString { coordinates: lines })
196        }
197        WKB_MULTIPOLYGON => {
198            let count = read_u32(data, cursor, is_le)? as usize;
199            let mut polys = Vec::with_capacity(count);
200            for _ in 0..count {
201                let inner = read_geometry(data, cursor)?;
202                if let Geometry::Polygon { coordinates } = inner {
203                    polys.push(coordinates);
204                } else {
205                    return None;
206                }
207            }
208            Some(Geometry::MultiPolygon { coordinates: polys })
209        }
210        WKB_GEOMETRYCOLLECTION => {
211            let count = read_u32(data, cursor, is_le)? as usize;
212            let mut geoms = Vec::with_capacity(count);
213            for _ in 0..count {
214                geoms.push(read_geometry(data, cursor)?);
215            }
216            Some(Geometry::GeometryCollection { geometries: geoms })
217        }
218        _ => None,
219    }
220}
221
222fn read_u8(data: &[u8], cursor: &mut usize) -> Option<u8> {
223    if *cursor >= data.len() {
224        return None;
225    }
226    let val = data[*cursor];
227    *cursor += 1;
228    Some(val)
229}
230
231fn read_u32(data: &[u8], cursor: &mut usize, is_le: bool) -> Option<u32> {
232    if *cursor + 4 > data.len() {
233        return None;
234    }
235    let bytes: [u8; 4] = [
236        data[*cursor],
237        data[*cursor + 1],
238        data[*cursor + 2],
239        data[*cursor + 3],
240    ];
241    *cursor += 4;
242    Some(if is_le {
243        u32::from_le_bytes(bytes)
244    } else {
245        u32::from_be_bytes(bytes)
246    })
247}
248
249fn read_f64(data: &[u8], cursor: &mut usize, is_le: bool) -> Option<f64> {
250    if *cursor + 8 > data.len() {
251        return None;
252    }
253    let bytes: [u8; 8] = [
254        data[*cursor],
255        data[*cursor + 1],
256        data[*cursor + 2],
257        data[*cursor + 3],
258        data[*cursor + 4],
259        data[*cursor + 5],
260        data[*cursor + 6],
261        data[*cursor + 7],
262    ];
263    *cursor += 8;
264    Some(if is_le {
265        f64::from_le_bytes(bytes)
266    } else {
267        f64::from_be_bytes(bytes)
268    })
269}
270
271fn read_coords(
272    data: &[u8],
273    cursor: &mut usize,
274    count: usize,
275    is_le: bool,
276) -> Option<Vec<[f64; 2]>> {
277    let mut coords = Vec::with_capacity(count);
278    for _ in 0..count {
279        let x = read_f64(data, cursor, is_le)?;
280        let y = read_f64(data, cursor, is_le)?;
281        coords.push([x, y]);
282    }
283    Some(coords)
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289
290    #[test]
291    fn point_roundtrip() {
292        let geom = Geometry::point(-73.9857, 40.7484);
293        let wkb = geometry_to_wkb(&geom);
294        let decoded = geometry_from_wkb(&wkb).unwrap();
295        assert_eq!(geom, decoded);
296    }
297
298    #[test]
299    fn linestring_roundtrip() {
300        let geom = Geometry::line_string(vec![[0.0, 0.0], [1.0, 1.0], [2.0, 0.0]]);
301        let wkb = geometry_to_wkb(&geom);
302        let decoded = geometry_from_wkb(&wkb).unwrap();
303        assert_eq!(geom, decoded);
304    }
305
306    #[test]
307    fn polygon_roundtrip() {
308        let geom = Geometry::polygon(vec![
309            vec![
310                [0.0, 0.0],
311                [10.0, 0.0],
312                [10.0, 10.0],
313                [0.0, 10.0],
314                [0.0, 0.0],
315            ],
316            vec![[2.0, 2.0], [3.0, 2.0], [3.0, 3.0], [2.0, 3.0], [2.0, 2.0]], // hole
317        ]);
318        let wkb = geometry_to_wkb(&geom);
319        let decoded = geometry_from_wkb(&wkb).unwrap();
320        assert_eq!(geom, decoded);
321    }
322
323    #[test]
324    fn multipoint_roundtrip() {
325        let geom = Geometry::MultiPoint {
326            coordinates: vec![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
327        };
328        let wkb = geometry_to_wkb(&geom);
329        let decoded = geometry_from_wkb(&wkb).unwrap();
330        assert_eq!(geom, decoded);
331    }
332
333    #[test]
334    fn multilinestring_roundtrip() {
335        let geom = Geometry::MultiLineString {
336            coordinates: vec![
337                vec![[0.0, 0.0], [1.0, 1.0]],
338                vec![[2.0, 2.0], [3.0, 3.0], [4.0, 2.0]],
339            ],
340        };
341        let wkb = geometry_to_wkb(&geom);
342        let decoded = geometry_from_wkb(&wkb).unwrap();
343        assert_eq!(geom, decoded);
344    }
345
346    #[test]
347    fn multipolygon_roundtrip() {
348        let geom = Geometry::MultiPolygon {
349            coordinates: vec![
350                vec![vec![
351                    [0.0, 0.0],
352                    [1.0, 0.0],
353                    [1.0, 1.0],
354                    [0.0, 1.0],
355                    [0.0, 0.0],
356                ]],
357                vec![vec![
358                    [5.0, 5.0],
359                    [6.0, 5.0],
360                    [6.0, 6.0],
361                    [5.0, 6.0],
362                    [5.0, 5.0],
363                ]],
364            ],
365        };
366        let wkb = geometry_to_wkb(&geom);
367        let decoded = geometry_from_wkb(&wkb).unwrap();
368        assert_eq!(geom, decoded);
369    }
370
371    #[test]
372    fn geometry_collection_roundtrip() {
373        let geom = Geometry::GeometryCollection {
374            geometries: vec![
375                Geometry::point(1.0, 2.0),
376                Geometry::line_string(vec![[0.0, 0.0], [1.0, 1.0]]),
377            ],
378        };
379        let wkb = geometry_to_wkb(&geom);
380        let decoded = geometry_from_wkb(&wkb).unwrap();
381        assert_eq!(geom, decoded);
382    }
383
384    #[test]
385    fn truncated_data_returns_none() {
386        let wkb = geometry_to_wkb(&Geometry::point(1.0, 2.0));
387        assert!(geometry_from_wkb(&wkb[..3]).is_none());
388        assert!(geometry_from_wkb(&[]).is_none());
389    }
390
391    #[test]
392    fn invalid_type_returns_none() {
393        let mut wkb = geometry_to_wkb(&Geometry::point(1.0, 2.0));
394        wkb[1] = 99; // invalid WKB type
395        assert!(geometry_from_wkb(&wkb).is_none());
396    }
397
398    #[test]
399    fn wkb_bbox_extraction() {
400        let geom = Geometry::polygon(vec![vec![
401            [-10.0, -5.0],
402            [10.0, -5.0],
403            [10.0, 5.0],
404            [-10.0, 5.0],
405            [-10.0, -5.0],
406        ]]);
407        let wkb = geometry_to_wkb(&geom);
408        let bb = wkb_bbox(&wkb).unwrap();
409        assert_eq!(bb.min_lng, -10.0);
410        assert_eq!(bb.max_lng, 10.0);
411        assert_eq!(bb.min_lat, -5.0);
412        assert_eq!(bb.max_lat, 5.0);
413    }
414
415    #[test]
416    fn point_wkb_size() {
417        let wkb = geometry_to_wkb(&Geometry::point(0.0, 0.0));
418        // 1 (byte order) + 4 (type) + 8 (x) + 8 (y) = 21 bytes
419        assert_eq!(wkb.len(), 21);
420    }
421}