Skip to main content

nodedb_spatial/
geo_meta.rs

1//! GeoParquet and GeoArrow metadata for geometry columns.
2//!
3//! GeoParquet: JSON metadata in Parquet file key-value metadata that tells
4//! external tools (DuckDB, QGIS, GeoPandas) which columns contain geometry
5//! and what encoding/CRS is used.
6//!
7//! GeoArrow: Arrow extension type metadata on Binary columns so Arrow-native
8//! tools can recognize NodeDB's spatial columns.
9//!
10//! References:
11//! - GeoParquet spec: https://geoparquet.org/releases/v1.1.0/
12//! - GeoArrow spec: https://geoarrow.org/extension-types
13
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16
17/// GeoParquet metadata for a Parquet file containing geometry columns.
18///
19/// Stored as JSON in the Parquet file's key-value metadata under the key "geo".
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct GeoParquetMetadata {
22    /// GeoParquet spec version.
23    pub version: String,
24    /// Primary geometry column name.
25    pub primary_column: String,
26    /// Per-column geometry metadata.
27    pub columns: HashMap<String, GeoParquetColumnMeta>,
28}
29
30/// Metadata for a single geometry column in GeoParquet.
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct GeoParquetColumnMeta {
33    /// Encoding: "WKB" (default).
34    pub encoding: String,
35    /// Geometry types present in this column.
36    pub geometry_types: Vec<String>,
37    /// Coordinate Reference System. "EPSG:4326" for WGS-84.
38    pub crs: serde_json::Value,
39    /// Bounding box of all geometries: [min_lng, min_lat, max_lng, max_lat].
40    #[serde(skip_serializing_if = "Option::is_none")]
41    pub bbox: Option<[f64; 4]>,
42}
43
44impl GeoParquetMetadata {
45    /// Create metadata for a single geometry column.
46    pub fn single_column(
47        column_name: &str,
48        geometry_types: Vec<String>,
49        bbox: Option<[f64; 4]>,
50    ) -> Self {
51        let mut columns = HashMap::new();
52        columns.insert(
53            column_name.to_string(),
54            GeoParquetColumnMeta {
55                encoding: "WKB".to_string(),
56                geometry_types,
57                crs: serde_json::json!({
58                    "type": "GeographicCRS",
59                    "name": "WGS 84",
60                    "id": { "authority": "EPSG", "code": 4326 }
61                }),
62                bbox,
63            },
64        );
65        Self {
66            version: "1.1.0".to_string(),
67            primary_column: column_name.to_string(),
68            columns,
69        }
70    }
71
72    /// Serialize to JSON string for Parquet file metadata.
73    pub fn to_json(&self) -> Result<String, sonic_rs::Error> {
74        sonic_rs::to_string(self)
75    }
76
77    /// The Parquet metadata key for GeoParquet.
78    pub const PARQUET_KEY: &'static str = "geo";
79}
80
81/// GeoArrow extension type name for WKB-encoded geometry columns.
82///
83/// Register this on Arrow `DataType::Binary` columns so Arrow-native tools
84/// (DuckDB, GeoPolars) recognize them as geometry.
85pub const GEOARROW_EXTENSION_NAME: &str = "geoarrow.wkb";
86
87/// GeoArrow extension metadata (JSON).
88///
89/// Stored in Arrow schema's field metadata under the key
90/// `ARROW:extension:metadata`.
91pub fn geoarrow_extension_metadata(crs_epsg: u32) -> String {
92    serde_json::json!({
93        "crs": {
94            "type": "GeographicCRS",
95            "name": "WGS 84",
96            "id": { "authority": "EPSG", "code": crs_epsg }
97        }
98    })
99    .to_string()
100}
101
102/// Arrow field metadata keys for extension types.
103pub const ARROW_EXTENSION_NAME_KEY: &str = "ARROW:extension:name";
104pub const ARROW_EXTENSION_METADATA_KEY: &str = "ARROW:extension:metadata";
105
106/// Build Arrow field metadata for a WKB geometry column.
107///
108/// Returns a HashMap to set on the Arrow Field's metadata.
109pub fn geoarrow_field_metadata() -> HashMap<String, String> {
110    let mut meta = HashMap::new();
111    meta.insert(
112        ARROW_EXTENSION_NAME_KEY.to_string(),
113        GEOARROW_EXTENSION_NAME.to_string(),
114    );
115    meta.insert(
116        ARROW_EXTENSION_METADATA_KEY.to_string(),
117        geoarrow_extension_metadata(4326),
118    );
119    meta
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125
126    #[test]
127    fn geoparquet_metadata_json() {
128        let meta = GeoParquetMetadata::single_column(
129            "geom",
130            vec!["Point".to_string(), "Polygon".to_string()],
131            Some([-180.0, -90.0, 180.0, 90.0]),
132        );
133        let json = meta.to_json().unwrap();
134        assert!(json.contains("\"version\":\"1.1.0\""));
135        assert!(json.contains("\"primary_column\":\"geom\""));
136        assert!(json.contains("\"encoding\":\"WKB\""));
137        assert!(json.contains("EPSG"));
138    }
139
140    #[test]
141    fn geoparquet_key() {
142        assert_eq!(GeoParquetMetadata::PARQUET_KEY, "geo");
143    }
144
145    #[test]
146    fn geoarrow_field_meta() {
147        let meta = geoarrow_field_metadata();
148        assert_eq!(meta[ARROW_EXTENSION_NAME_KEY], "geoarrow.wkb");
149        assert!(meta[ARROW_EXTENSION_METADATA_KEY].contains("EPSG"));
150    }
151
152    #[test]
153    fn geoarrow_extension_name() {
154        assert_eq!(GEOARROW_EXTENSION_NAME, "geoarrow.wkb");
155    }
156
157    #[test]
158    fn roundtrip_parquet_metadata() {
159        let meta = GeoParquetMetadata::single_column("location", vec!["Point".into()], None);
160        let json = meta.to_json().unwrap();
161        let parsed: GeoParquetMetadata = sonic_rs::from_str(&json).unwrap();
162        assert_eq!(parsed.primary_column, "location");
163        assert_eq!(parsed.columns["location"].encoding, "WKB");
164    }
165}