Skip to main content

nodedb_spatial/
geo_meta.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! GeoParquet and GeoArrow metadata for geometry columns.
4//!
5//! GeoParquet: JSON metadata in Parquet file key-value metadata that tells
6//! external tools (DuckDB, QGIS, GeoPandas) which columns contain geometry
7//! and what encoding/CRS is used.
8//!
9//! GeoArrow: Arrow extension type metadata on Binary columns so Arrow-native
10//! tools can recognize NodeDB's spatial columns.
11//!
12//! References:
13//! - GeoParquet spec: https://geoparquet.org/releases/v1.1.0/
14//! - GeoArrow spec: https://geoarrow.org/extension-types
15
16use serde::{Deserialize, Serialize};
17use std::collections::HashMap;
18
19/// GeoParquet metadata for a Parquet file containing geometry columns.
20///
21/// Stored as JSON in the Parquet file's key-value metadata under the key "geo".
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct GeoParquetMetadata {
24    /// GeoParquet spec version.
25    pub version: String,
26    /// Primary geometry column name.
27    pub primary_column: String,
28    /// Per-column geometry metadata.
29    pub columns: HashMap<String, GeoParquetColumnMeta>,
30}
31
32/// Metadata for a single geometry column in GeoParquet.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct GeoParquetColumnMeta {
35    /// Encoding: "WKB" (default).
36    pub encoding: String,
37    /// Geometry types present in this column.
38    pub geometry_types: Vec<String>,
39    /// Coordinate Reference System. "EPSG:4326" for WGS-84.
40    pub crs: serde_json::Value,
41    /// Bounding box of all geometries: [min_lng, min_lat, max_lng, max_lat].
42    #[serde(skip_serializing_if = "Option::is_none")]
43    pub bbox: Option<[f64; 4]>,
44}
45
46impl GeoParquetMetadata {
47    /// Create metadata for a single geometry column.
48    pub fn single_column(
49        column_name: &str,
50        geometry_types: Vec<String>,
51        bbox: Option<[f64; 4]>,
52    ) -> Self {
53        let mut columns = HashMap::new();
54        columns.insert(
55            column_name.to_string(),
56            GeoParquetColumnMeta {
57                encoding: "WKB".to_string(),
58                geometry_types,
59                crs: serde_json::json!({
60                    "type": "GeographicCRS",
61                    "name": "WGS 84",
62                    "id": { "authority": "EPSG", "code": 4326 }
63                }),
64                bbox,
65            },
66        );
67        Self {
68            version: "1.1.0".to_string(),
69            primary_column: column_name.to_string(),
70            columns,
71        }
72    }
73
74    /// Serialize to JSON string for Parquet file metadata.
75    pub fn to_json(&self) -> Result<String, sonic_rs::Error> {
76        sonic_rs::to_string(self)
77    }
78
79    /// The Parquet metadata key for GeoParquet.
80    pub const PARQUET_KEY: &'static str = "geo";
81}
82
83/// GeoArrow extension type name for WKB-encoded geometry columns.
84///
85/// Register this on Arrow `DataType::Binary` columns so Arrow-native tools
86/// (DuckDB, GeoPolars) recognize them as geometry.
87pub const GEOARROW_EXTENSION_NAME: &str = "geoarrow.wkb";
88
89/// GeoArrow extension metadata (JSON).
90///
91/// Stored in Arrow schema's field metadata under the key
92/// `ARROW:extension:metadata`.
93pub fn geoarrow_extension_metadata(crs_epsg: u32) -> String {
94    serde_json::json!({
95        "crs": {
96            "type": "GeographicCRS",
97            "name": "WGS 84",
98            "id": { "authority": "EPSG", "code": crs_epsg }
99        }
100    })
101    .to_string()
102}
103
104/// Arrow field metadata keys for extension types.
105pub const ARROW_EXTENSION_NAME_KEY: &str = "ARROW:extension:name";
106pub const ARROW_EXTENSION_METADATA_KEY: &str = "ARROW:extension:metadata";
107
108/// Build Arrow field metadata for a WKB geometry column.
109///
110/// Returns a HashMap to set on the Arrow Field's metadata.
111pub fn geoarrow_field_metadata() -> HashMap<String, String> {
112    let mut meta = HashMap::new();
113    meta.insert(
114        ARROW_EXTENSION_NAME_KEY.to_string(),
115        GEOARROW_EXTENSION_NAME.to_string(),
116    );
117    meta.insert(
118        ARROW_EXTENSION_METADATA_KEY.to_string(),
119        geoarrow_extension_metadata(4326),
120    );
121    meta
122}
123
124#[cfg(test)]
125mod tests {
126    use super::*;
127
128    #[test]
129    fn geoparquet_metadata_json() {
130        let meta = GeoParquetMetadata::single_column(
131            "geom",
132            vec!["Point".to_string(), "Polygon".to_string()],
133            Some([-180.0, -90.0, 180.0, 90.0]),
134        );
135        let json = meta.to_json().unwrap();
136        assert!(json.contains("\"version\":\"1.1.0\""));
137        assert!(json.contains("\"primary_column\":\"geom\""));
138        assert!(json.contains("\"encoding\":\"WKB\""));
139        assert!(json.contains("EPSG"));
140    }
141
142    #[test]
143    fn geoparquet_key() {
144        assert_eq!(GeoParquetMetadata::PARQUET_KEY, "geo");
145    }
146
147    #[test]
148    fn geoarrow_field_meta() {
149        let meta = geoarrow_field_metadata();
150        assert_eq!(meta[ARROW_EXTENSION_NAME_KEY], "geoarrow.wkb");
151        assert!(meta[ARROW_EXTENSION_METADATA_KEY].contains("EPSG"));
152    }
153
154    #[test]
155    fn geoarrow_extension_name() {
156        assert_eq!(GEOARROW_EXTENSION_NAME, "geoarrow.wkb");
157    }
158
159    #[test]
160    fn roundtrip_parquet_metadata() {
161        let meta = GeoParquetMetadata::single_column("location", vec!["Point".into()], None);
162        let json = meta.to_json().unwrap();
163        let parsed: GeoParquetMetadata = sonic_rs::from_str(&json).unwrap();
164        assert_eq!(parsed.primary_column, "location");
165        assert_eq!(parsed.columns["location"].encoding, "WKB");
166    }
167}