Skip to main content

cityjson_arrow/
schema.rs

1use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
2use arrow::record_batch::RecordBatch;
3use serde::{Deserialize, Serialize};
4use std::fmt::{self, Display, Formatter};
5use std::sync::Arc;
6
7pub const PACKAGE_SCHEMA_ID: &str = "cityjson-arrow.package.v3alpha3";
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
10pub enum CityArrowPackageVersion {
11    #[serde(rename = "cityjson-arrow.package.v3alpha3")]
12    V3Alpha3,
13}
14
15impl CityArrowPackageVersion {
16    #[must_use]
17    pub const fn as_str(self) -> &'static str {
18        PACKAGE_SCHEMA_ID
19    }
20}
21
22impl Display for CityArrowPackageVersion {
23    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
24        f.write_str(self.as_str())
25    }
26}
27
28#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
29pub struct CityArrowHeader {
30    pub package_version: CityArrowPackageVersion,
31    pub citymodel_id: String,
32    pub cityjson_version: String,
33}
34
35impl CityArrowHeader {
36    #[must_use]
37    pub fn new(
38        package_version: CityArrowPackageVersion,
39        citymodel_id: impl Into<String>,
40        cityjson_version: impl Into<String>,
41    ) -> Self {
42        Self {
43            package_version,
44            citymodel_id: citymodel_id.into(),
45            cityjson_version: cityjson_version.into(),
46        }
47    }
48}
49
50#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
51pub enum ProjectedValueSpec {
52    Null,
53    Boolean,
54    UInt64,
55    Int64,
56    Float64,
57    Utf8,
58    /// JSON-serialized fallback for attributes whose values are heterogeneous across rows or
59    /// within a list. The wire type is `LargeUtf8`; values are round-tripped through JSON.
60    Json,
61    GeometryRef,
62    List {
63        item_nullable: bool,
64        item: Box<ProjectedValueSpec>,
65    },
66    Struct(ProjectedStructSpec),
67}
68
69impl ProjectedValueSpec {
70    #[must_use]
71    pub fn to_arrow_type(&self) -> DataType {
72        match self {
73            Self::Null => DataType::Null,
74            Self::Boolean => DataType::Boolean,
75            Self::UInt64 | Self::GeometryRef => DataType::UInt64,
76            Self::Int64 => DataType::Int64,
77            Self::Float64 => DataType::Float64,
78            Self::Utf8 | Self::Json => DataType::LargeUtf8,
79            Self::List {
80                item_nullable,
81                item,
82            } => DataType::List(Arc::new(Field::new_list_field(
83                item.to_arrow_type(),
84                *item_nullable,
85            ))),
86            Self::Struct(fields) => DataType::Struct(fields.to_arrow_fields()),
87        }
88    }
89}
90
91#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92pub struct ProjectedStructSpec {
93    pub fields: Vec<ProjectedFieldSpec>,
94}
95
96impl ProjectedStructSpec {
97    #[must_use]
98    pub fn new(fields: Vec<ProjectedFieldSpec>) -> Self {
99        Self { fields }
100    }
101
102    #[must_use]
103    pub fn is_empty(&self) -> bool {
104        self.fields.is_empty()
105    }
106
107    #[must_use]
108    pub fn to_arrow_fields(&self) -> Fields {
109        self.fields
110            .iter()
111            .map(ProjectedFieldSpec::to_arrow_field)
112            .map(Arc::new)
113            .collect()
114    }
115}
116
117#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
118pub struct ProjectedFieldSpec {
119    pub name: String,
120    pub value: ProjectedValueSpec,
121    pub nullable: bool,
122}
123
124impl ProjectedFieldSpec {
125    #[must_use]
126    pub fn new(name: impl Into<String>, value: ProjectedValueSpec, nullable: bool) -> Self {
127        Self {
128            name: name.into(),
129            value,
130            nullable,
131        }
132    }
133
134    #[must_use]
135    pub fn to_arrow_field(&self) -> Field {
136        Field::new(self.name.clone(), self.value.to_arrow_type(), self.nullable)
137    }
138}
139
140#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
141pub struct ProjectionLayout {
142    pub root_extra: Option<ProjectedStructSpec>,
143    pub metadata_extra: Option<ProjectedStructSpec>,
144    pub metadata_point_of_contact_address: Option<ProjectedStructSpec>,
145    pub cityobject_attributes: Option<ProjectedStructSpec>,
146    pub cityobject_extra: Option<ProjectedStructSpec>,
147    pub geometry_extra: Option<ProjectedStructSpec>,
148    pub semantic_attributes: Option<ProjectedStructSpec>,
149    pub material_payload: Option<ProjectedStructSpec>,
150    pub texture_payload: Option<ProjectedStructSpec>,
151}
152
153#[doc(hidden)]
154#[derive(Debug, Clone)]
155pub struct CityModelArrowParts {
156    pub header: CityArrowHeader,
157    pub projection: ProjectionLayout,
158    pub metadata: RecordBatch,
159    pub extensions: Option<RecordBatch>,
160    pub vertices: RecordBatch,
161    pub cityobjects: RecordBatch,
162    pub cityobject_children: Option<RecordBatch>,
163    pub geometries: RecordBatch,
164    pub geometry_boundaries: RecordBatch,
165    pub geometry_instances: Option<RecordBatch>,
166    pub template_vertices: Option<RecordBatch>,
167    pub template_geometries: Option<RecordBatch>,
168    pub template_geometry_boundaries: Option<RecordBatch>,
169    pub semantics: Option<RecordBatch>,
170    pub semantic_children: Option<RecordBatch>,
171    pub geometry_surface_semantics: Option<RecordBatch>,
172    pub geometry_point_semantics: Option<RecordBatch>,
173    pub geometry_linestring_semantics: Option<RecordBatch>,
174    pub template_geometry_semantics: Option<RecordBatch>,
175    pub materials: Option<RecordBatch>,
176    pub geometry_surface_materials: Option<RecordBatch>,
177    pub template_geometry_materials: Option<RecordBatch>,
178    pub textures: Option<RecordBatch>,
179    pub texture_vertices: Option<RecordBatch>,
180    pub geometry_ring_textures: Option<RecordBatch>,
181    pub template_geometry_ring_textures: Option<RecordBatch>,
182}
183
184#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
185pub struct PackageTableRef {
186    pub name: String,
187    pub offset: u64,
188    pub length: u64,
189    pub rows: usize,
190}
191
192#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
193pub struct PackageManifest {
194    pub package_schema: CityArrowPackageVersion,
195    pub cityjson_version: String,
196    pub citymodel_id: String,
197    pub projection: ProjectionLayout,
198    pub tables: Vec<PackageTableRef>,
199}
200
201impl PackageManifest {
202    #[must_use]
203    pub fn new(
204        citymodel_id: impl Into<String>,
205        cityjson_version: impl Into<String>,
206        projection: ProjectionLayout,
207    ) -> Self {
208        Self {
209            package_schema: CityArrowPackageVersion::V3Alpha3,
210            cityjson_version: cityjson_version.into(),
211            citymodel_id: citymodel_id.into(),
212            projection,
213            tables: Vec::new(),
214        }
215    }
216}
217
218impl From<&PackageManifest> for CityArrowHeader {
219    fn from(value: &PackageManifest) -> Self {
220        Self::new(
221            value.package_schema,
222            value.citymodel_id.clone(),
223            value.cityjson_version.clone(),
224        )
225    }
226}
227
228#[derive(Debug, Clone, PartialEq, Eq)]
229pub struct CanonicalSchemaSet {
230    pub metadata: SchemaRef,
231    pub extensions: SchemaRef,
232    pub vertices: SchemaRef,
233    pub cityobjects: SchemaRef,
234    pub cityobject_children: SchemaRef,
235    pub geometries: SchemaRef,
236    pub geometry_boundaries: SchemaRef,
237    pub geometry_instances: SchemaRef,
238    pub template_vertices: SchemaRef,
239    pub template_geometries: SchemaRef,
240    pub template_geometry_boundaries: SchemaRef,
241    pub semantics: SchemaRef,
242    pub semantic_children: SchemaRef,
243    pub geometry_surface_semantics: SchemaRef,
244    pub geometry_point_semantics: SchemaRef,
245    pub geometry_linestring_semantics: SchemaRef,
246    pub template_geometry_semantics: SchemaRef,
247    pub materials: SchemaRef,
248    pub geometry_surface_materials: SchemaRef,
249    pub template_geometry_materials: SchemaRef,
250    pub textures: SchemaRef,
251    pub texture_vertices: SchemaRef,
252    pub geometry_ring_textures: SchemaRef,
253    pub template_geometry_ring_textures: SchemaRef,
254}
255
256#[must_use]
257pub fn canonical_schema_set(layout: &ProjectionLayout) -> CanonicalSchemaSet {
258    CanonicalSchemaSet {
259        metadata: schema_ref(metadata_fields(layout)),
260        extensions: schema_ref(extensions_fields()),
261        vertices: schema_ref(vertices_fields()),
262        cityobjects: schema_ref(cityobjects_fields(layout)),
263        cityobject_children: schema_ref(cityobject_children_fields()),
264        geometries: schema_ref(geometries_fields(layout)),
265        geometry_boundaries: schema_ref(geometry_boundaries_fields()),
266        geometry_instances: schema_ref(geometry_instances_fields(layout)),
267        template_vertices: schema_ref(template_vertices_fields()),
268        template_geometries: schema_ref(template_geometries_fields(layout)),
269        template_geometry_boundaries: schema_ref(template_geometry_boundaries_fields()),
270        semantics: schema_ref(semantics_fields(layout)),
271        semantic_children: schema_ref(semantic_children_fields()),
272        geometry_surface_semantics: schema_ref(geometry_surface_semantics_fields()),
273        geometry_point_semantics: schema_ref(geometry_point_semantics_fields()),
274        geometry_linestring_semantics: schema_ref(geometry_linestring_semantics_fields()),
275        template_geometry_semantics: schema_ref(template_geometry_semantics_fields()),
276        materials: schema_ref(materials_fields(layout)),
277        geometry_surface_materials: schema_ref(geometry_surface_materials_fields()),
278        template_geometry_materials: schema_ref(template_geometry_materials_fields()),
279        textures: schema_ref(textures_fields(layout)),
280        texture_vertices: schema_ref(texture_vertices_fields()),
281        geometry_ring_textures: schema_ref(geometry_ring_textures_fields()),
282        template_geometry_ring_textures: schema_ref(template_geometry_ring_textures_fields()),
283    }
284}
285
286fn schema_ref(fields: Vec<Field>) -> SchemaRef {
287    Arc::new(Schema::new(fields))
288}
289
290fn fixed_size_list_field(
291    name: &str,
292    item_type: DataType,
293    item_nullable: bool,
294    size: i32,
295    nullable: bool,
296) -> Field {
297    Field::new(
298        name,
299        DataType::FixedSizeList(
300            Arc::new(Field::new_list_field(item_type, item_nullable)),
301            size,
302        ),
303        nullable,
304    )
305}
306
307fn list_field(name: &str, item_type: DataType, item_nullable: bool, nullable: bool) -> Field {
308    Field::new(
309        name,
310        DataType::List(Arc::new(Field::new_list_field(item_type, item_nullable))),
311        nullable,
312    )
313}
314
315fn projected_struct_field(name: &str, layout: Option<&ProjectedStructSpec>) -> Option<Field> {
316    layout.map(|layout| Field::new(name, DataType::Struct(layout.to_arrow_fields()), true))
317}
318
319fn point_of_contact_field(layout: &ProjectionLayout) -> Field {
320    let mut fields = vec![
321        Field::new("contact_name", DataType::LargeUtf8, false),
322        Field::new("email_address", DataType::LargeUtf8, false),
323        Field::new("role", DataType::Utf8, true),
324        Field::new("website", DataType::LargeUtf8, true),
325        Field::new("contact_type", DataType::Utf8, true),
326        Field::new("phone", DataType::LargeUtf8, true),
327        Field::new("organization", DataType::LargeUtf8, true),
328    ];
329    if let Some(field) =
330        projected_struct_field("address", layout.metadata_point_of_contact_address.as_ref())
331    {
332        fields.push(field);
333    }
334    Field::new(
335        "point_of_contact",
336        DataType::Struct(fields.into_iter().map(Arc::new).collect()),
337        true,
338    )
339}
340
341fn projected_payload_fields(layout: Option<&ProjectedStructSpec>) -> Vec<Field> {
342    layout
343        .map(|layout| {
344            layout
345                .fields
346                .iter()
347                .map(ProjectedFieldSpec::to_arrow_field)
348                .collect()
349        })
350        .unwrap_or_default()
351}
352
353fn metadata_fields(layout: &ProjectionLayout) -> Vec<Field> {
354    let mut fields = vec![
355        Field::new("citymodel_id", DataType::LargeUtf8, false),
356        Field::new("cityjson_version", DataType::Utf8, false),
357        Field::new("citymodel_kind", DataType::Utf8, false),
358        Field::new("feature_root_id", DataType::LargeUtf8, true),
359        Field::new("identifier", DataType::LargeUtf8, true),
360        Field::new("title", DataType::LargeUtf8, true),
361        Field::new("reference_system", DataType::LargeUtf8, true),
362        fixed_size_list_field("geographical_extent", DataType::Float64, false, 6, true),
363        Field::new("reference_date", DataType::Utf8, true),
364        Field::new("default_material_theme", DataType::Utf8, true),
365        Field::new("default_texture_theme", DataType::Utf8, true),
366        point_of_contact_field(layout),
367    ];
368    if let Some(field) = projected_struct_field("root_extra", layout.root_extra.as_ref()) {
369        fields.push(field);
370    }
371    if let Some(field) = projected_struct_field("metadata_extra", layout.metadata_extra.as_ref()) {
372        fields.push(field);
373    }
374    fields
375}
376
377fn extensions_fields() -> Vec<Field> {
378    vec![
379        Field::new("extension_name", DataType::Utf8, false),
380        Field::new("uri", DataType::LargeUtf8, false),
381        Field::new("version", DataType::Utf8, true),
382    ]
383}
384
385fn vertices_fields() -> Vec<Field> {
386    vec![
387        Field::new("vertex_id", DataType::UInt64, false),
388        Field::new("x", DataType::Float64, false),
389        Field::new("y", DataType::Float64, false),
390        Field::new("z", DataType::Float64, false),
391    ]
392}
393
394fn cityobjects_fields(layout: &ProjectionLayout) -> Vec<Field> {
395    let mut fields = vec![
396        Field::new("cityobject_id", DataType::LargeUtf8, false),
397        Field::new("cityobject_ix", DataType::UInt64, false),
398        Field::new("object_type", DataType::Utf8, false),
399        fixed_size_list_field("geographical_extent", DataType::Float64, false, 6, true),
400    ];
401    if let Some(field) = projected_struct_field("attributes", layout.cityobject_attributes.as_ref())
402    {
403        fields.push(field);
404    }
405    if let Some(field) = projected_struct_field("extra", layout.cityobject_extra.as_ref()) {
406        fields.push(field);
407    }
408    fields
409}
410
411fn cityobject_children_fields() -> Vec<Field> {
412    vec![
413        Field::new("parent_cityobject_ix", DataType::UInt64, false),
414        Field::new("child_ordinal", DataType::UInt32, false),
415        Field::new("child_cityobject_ix", DataType::UInt64, false),
416    ]
417}
418
419fn geometries_fields(layout: &ProjectionLayout) -> Vec<Field> {
420    let mut fields = vec![
421        Field::new("geometry_id", DataType::UInt64, false),
422        Field::new("cityobject_ix", DataType::UInt64, false),
423        Field::new("geometry_ordinal", DataType::UInt32, false),
424        Field::new("geometry_type", DataType::Utf8, false),
425        Field::new("lod", DataType::Utf8, true),
426    ];
427    if let Some(field) = projected_struct_field("extra", layout.geometry_extra.as_ref()) {
428        fields.push(field);
429    }
430    fields
431}
432
433fn geometry_boundaries_fields() -> Vec<Field> {
434    vec![
435        Field::new("geometry_id", DataType::UInt64, false),
436        list_field("vertex_indices", DataType::UInt32, false, false),
437        list_field("line_offsets", DataType::UInt32, false, true),
438        list_field("ring_offsets", DataType::UInt32, false, true),
439        list_field("surface_offsets", DataType::UInt32, false, true),
440        list_field("shell_offsets", DataType::UInt32, false, true),
441        list_field("solid_offsets", DataType::UInt32, false, true),
442    ]
443}
444
445fn geometry_instances_fields(layout: &ProjectionLayout) -> Vec<Field> {
446    let mut fields = vec![
447        Field::new("geometry_id", DataType::UInt64, false),
448        Field::new("cityobject_ix", DataType::UInt64, false),
449        Field::new("geometry_ordinal", DataType::UInt32, false),
450        Field::new("lod", DataType::Utf8, true),
451        Field::new("template_geometry_id", DataType::UInt64, false),
452        Field::new("reference_point_vertex_id", DataType::UInt64, false),
453        fixed_size_list_field("transform_matrix", DataType::Float64, false, 16, true),
454    ];
455    if let Some(field) = projected_struct_field("extra", layout.geometry_extra.as_ref()) {
456        fields.push(field);
457    }
458    fields
459}
460
461fn template_vertices_fields() -> Vec<Field> {
462    vec![
463        Field::new("template_vertex_id", DataType::UInt64, false),
464        Field::new("x", DataType::Float64, false),
465        Field::new("y", DataType::Float64, false),
466        Field::new("z", DataType::Float64, false),
467    ]
468}
469
470fn template_geometries_fields(layout: &ProjectionLayout) -> Vec<Field> {
471    let mut fields = vec![
472        Field::new("template_geometry_id", DataType::UInt64, false),
473        Field::new("geometry_type", DataType::Utf8, false),
474        Field::new("lod", DataType::Utf8, true),
475    ];
476    if let Some(field) = projected_struct_field("extra", layout.geometry_extra.as_ref()) {
477        fields.push(field);
478    }
479    fields
480}
481
482fn template_geometry_boundaries_fields() -> Vec<Field> {
483    vec![
484        Field::new("template_geometry_id", DataType::UInt64, false),
485        list_field("vertex_indices", DataType::UInt32, false, false),
486        list_field("line_offsets", DataType::UInt32, false, true),
487        list_field("ring_offsets", DataType::UInt32, false, true),
488        list_field("surface_offsets", DataType::UInt32, false, true),
489        list_field("shell_offsets", DataType::UInt32, false, true),
490        list_field("solid_offsets", DataType::UInt32, false, true),
491    ]
492}
493
494fn semantics_fields(layout: &ProjectionLayout) -> Vec<Field> {
495    let mut fields = vec![
496        Field::new("semantic_id", DataType::UInt64, false),
497        Field::new("semantic_type", DataType::Utf8, false),
498        Field::new("parent_semantic_id", DataType::UInt64, true),
499    ];
500    if let Some(field) = projected_struct_field("attributes", layout.semantic_attributes.as_ref()) {
501        fields.push(field);
502    }
503    fields
504}
505
506fn semantic_children_fields() -> Vec<Field> {
507    vec![
508        Field::new("parent_semantic_id", DataType::UInt64, false),
509        Field::new("child_ordinal", DataType::UInt32, false),
510        Field::new("child_semantic_id", DataType::UInt64, false),
511    ]
512}
513
514fn geometry_surface_semantics_fields() -> Vec<Field> {
515    vec![
516        Field::new("geometry_id", DataType::UInt64, false),
517        Field::new("surface_ordinal", DataType::UInt32, false),
518        Field::new("semantic_id", DataType::UInt64, true),
519    ]
520}
521
522fn geometry_point_semantics_fields() -> Vec<Field> {
523    vec![
524        Field::new("geometry_id", DataType::UInt64, false),
525        Field::new("point_ordinal", DataType::UInt32, false),
526        Field::new("semantic_id", DataType::UInt64, true),
527    ]
528}
529
530fn geometry_linestring_semantics_fields() -> Vec<Field> {
531    vec![
532        Field::new("geometry_id", DataType::UInt64, false),
533        Field::new("linestring_ordinal", DataType::UInt32, false),
534        Field::new("semantic_id", DataType::UInt64, true),
535    ]
536}
537
538fn template_geometry_semantics_fields() -> Vec<Field> {
539    vec![
540        Field::new("template_geometry_id", DataType::UInt64, false),
541        Field::new("primitive_type", DataType::Utf8, false),
542        Field::new("primitive_ordinal", DataType::UInt32, false),
543        Field::new("semantic_id", DataType::UInt64, true),
544    ]
545}
546
547fn materials_fields(layout: &ProjectionLayout) -> Vec<Field> {
548    let mut fields = vec![Field::new("material_id", DataType::UInt64, false)];
549    fields.extend(projected_payload_fields(layout.material_payload.as_ref()));
550    fields
551}
552
553fn geometry_surface_materials_fields() -> Vec<Field> {
554    vec![
555        Field::new("geometry_id", DataType::UInt64, false),
556        Field::new("surface_ordinal", DataType::UInt32, false),
557        Field::new("theme", DataType::Utf8, false),
558        Field::new("material_id", DataType::UInt64, false),
559    ]
560}
561
562fn template_geometry_materials_fields() -> Vec<Field> {
563    vec![
564        Field::new("template_geometry_id", DataType::UInt64, false),
565        Field::new("primitive_type", DataType::Utf8, false),
566        Field::new("primitive_ordinal", DataType::UInt32, false),
567        Field::new("theme", DataType::Utf8, false),
568        Field::new("material_id", DataType::UInt64, false),
569    ]
570}
571
572fn textures_fields(layout: &ProjectionLayout) -> Vec<Field> {
573    let mut fields = vec![
574        Field::new("texture_id", DataType::UInt64, false),
575        Field::new("image_uri", DataType::LargeUtf8, false),
576    ];
577    fields.extend(projected_payload_fields(layout.texture_payload.as_ref()));
578    fields
579}
580
581fn texture_vertices_fields() -> Vec<Field> {
582    vec![
583        Field::new("uv_id", DataType::UInt64, false),
584        Field::new("u", DataType::Float32, false),
585        Field::new("v", DataType::Float32, false),
586    ]
587}
588
589fn geometry_ring_textures_fields() -> Vec<Field> {
590    vec![
591        Field::new("geometry_id", DataType::UInt64, false),
592        Field::new("surface_ordinal", DataType::UInt32, false),
593        Field::new("ring_ordinal", DataType::UInt32, false),
594        Field::new("theme", DataType::Utf8, false),
595        Field::new("texture_id", DataType::UInt64, false),
596        list_field("uv_indices", DataType::UInt64, false, false),
597    ]
598}
599
600fn template_geometry_ring_textures_fields() -> Vec<Field> {
601    vec![
602        Field::new("template_geometry_id", DataType::UInt64, false),
603        Field::new("surface_ordinal", DataType::UInt32, false),
604        Field::new("ring_ordinal", DataType::UInt32, false),
605        Field::new("theme", DataType::Utf8, false),
606        Field::new("texture_id", DataType::UInt64, false),
607        list_field("uv_indices", DataType::UInt64, false, false),
608    ]
609}