1pub mod benchmark;
2pub mod profile;
3
4use std::collections::{BTreeMap, BTreeSet, HashMap};
5use std::fs;
6use std::io::{ErrorKind, Read, Seek, SeekFrom};
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex};
9use std::time::UNIX_EPOCH;
10
11use cityjson_lib::json::staged;
12use cityjson_lib::{CityModel, Error, Result};
13use globset::GlobMatcher;
14use ignore::WalkBuilder;
15use lru::LruCache;
16use rusqlite::{OptionalExtension, params};
17use serde::de::DeserializeOwned;
18use serde::{Deserialize, Serialize};
19use serde_json::value::RawValue;
20use serde_json::{Map, Number, Value};
21
22#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
23pub struct BBox {
24 pub min_x: f64,
25 pub max_x: f64,
26 pub min_y: f64,
27 pub max_y: f64,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
31pub struct FeatureBounds {
32 pub min_x: f64,
33 pub max_x: f64,
34 pub min_y: f64,
35 pub max_y: f64,
36 pub min_z: f64,
37 pub max_z: f64,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
41pub struct FeatureBoundsSummary {
42 pub bounds: FeatureBounds,
43 pub feature_count: usize,
44}
45
46impl FeatureBounds {
47 #[must_use]
48 pub fn bbox_2d(self) -> BBox {
49 BBox {
50 min_x: self.min_x,
51 max_x: self.max_x,
52 min_y: self.min_y,
53 max_y: self.max_y,
54 }
55 }
56}
57
58pub struct CityIndex {
59 index: Index,
60 backend: Box<dyn StorageBackend>,
61}
62
63pub const WORKER_COUNT_ENV: &str = "CITYJSON_INDEX_WORKERS";
64const DEFAULT_SCAN_PAGE_SIZE: usize = 512;
65
66#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
67pub struct IndexedFeatureRef {
68 pub row_id: i64,
69 pub feature_id: String,
70 pub source_id: i64,
71 pub source_path: PathBuf,
72 pub offset: u64,
73 pub length: u64,
74 pub vertices_offset: Option<u64>,
75 pub vertices_length: Option<u64>,
76 pub member_ranges_json: Option<String>,
77 pub bounds: FeatureBounds,
78}
79
80pub struct IndexedFeature {
81 pub reference: IndexedFeatureRef,
82 pub model: CityModel,
83}
84
85#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
86pub enum LodSelection {
87 #[default]
88 All,
89 Highest,
90 Exact(String),
91}
92
93#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
94pub struct FeatureFilter {
95 pub cityobject_types: Option<BTreeSet<String>>,
96 pub default_lod: LodSelection,
97 pub lods_by_type: BTreeMap<String, LodSelection>,
98}
99
100#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
101pub struct MissingLodSelection {
102 pub cityobject_type: String,
103 pub requested_lod: String,
104 pub available_lods: BTreeSet<String>,
105}
106
107#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
108pub struct FeatureFilterDiagnostics {
109 pub available_types: BTreeSet<String>,
110 pub retained_types: BTreeSet<String>,
111 pub ignored_types: BTreeSet<String>,
112 pub available_lods: BTreeMap<String, BTreeSet<String>>,
113 pub retained_lods: BTreeMap<String, BTreeSet<String>>,
114 pub missing_lods: Vec<MissingLodSelection>,
115 pub retained_geometry_count: usize,
116}
117
118#[derive(Debug, Clone)]
119pub struct FilteredFeature {
120 pub model: CityModel,
121 pub diagnostics: FeatureFilterDiagnostics,
122}
123
124#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
125pub struct FeatureFilterSummary {
126 pub available_types: BTreeSet<String>,
127 pub retained_types: BTreeSet<String>,
128 pub ignored_types: BTreeSet<String>,
129 pub available_lods: BTreeMap<String, BTreeSet<String>>,
130 pub retained_lods: BTreeMap<String, BTreeSet<String>>,
131 pub missing_lods: BTreeMap<String, MissingLodSelection>,
132 pub retained_feature_count: usize,
133 pub ignored_feature_count: usize,
134}
135
136impl IndexedFeatureRef {
137 fn to_location(&self) -> FeatureLocation {
138 FeatureLocation {
139 feature_id: self.feature_id.clone(),
140 source_id: self.source_id,
141 source_path: self.source_path.clone(),
142 offset: self.offset,
143 length: self.length,
144 vertices_offset: self.vertices_offset,
145 vertices_length: self.vertices_length,
146 member_ranges_json: self.member_ranges_json.clone(),
147 }
148 }
149}
150
151impl FeatureFilter {
152 #[must_use]
153 pub fn is_active(&self) -> bool {
154 self.cityobject_types.is_some()
155 || self.default_lod != LodSelection::All
156 || self
157 .lods_by_type
158 .values()
159 .any(|selection| *selection != LodSelection::All)
160 }
161
162 pub fn apply(&self, model: &CityModel) -> Result<FilteredFeature> {
173 let retained_handles = retained_cityobject_handles(model, self)?;
174 let diagnostics = filter_diagnostics(model, &retained_handles, self);
175
176 if !self.is_active() {
177 return Ok(FilteredFeature {
178 model: model.clone(),
179 diagnostics,
180 });
181 }
182
183 let type_selection = self
184 .cityobject_types
185 .as_ref()
186 .map(|_| {
187 cityjson_lib::ops::select_cityobjects(model, |ctx| {
188 retained_handles.contains(&ctx.handle())
189 })
190 })
191 .transpose()?;
192
193 let lod_selection = lod_selection(model, &retained_handles, self)?;
194 let selection = match (type_selection, lod_selection) {
195 (Some(types), Some(lods)) => types.intersection(&lods),
196 (Some(types), None) => types,
197 (None, Some(lods)) => lods,
198 (None, None) => {
199 return Ok(FilteredFeature {
200 model: model.clone(),
201 diagnostics,
202 });
203 }
204 };
205
206 let filtered = extract_or_empty_feature(model, &selection)?;
207 Ok(FilteredFeature {
208 model: filtered,
209 diagnostics,
210 })
211 }
212}
213
214impl FeatureFilterSummary {
215 pub fn add(&mut self, diagnostics: &FeatureFilterDiagnostics) {
216 self.available_types
217 .extend(diagnostics.available_types.iter().cloned());
218 self.retained_types
219 .extend(diagnostics.retained_types.iter().cloned());
220 self.ignored_types
221 .extend(diagnostics.ignored_types.iter().cloned());
222 merge_lod_sets(&mut self.available_lods, &diagnostics.available_lods);
223 merge_lod_sets(&mut self.retained_lods, &diagnostics.retained_lods);
224 for missing in &diagnostics.missing_lods {
225 self.missing_lods
226 .entry(missing.cityobject_type.clone())
227 .or_insert_with(|| missing.clone());
228 }
229 if diagnostics.retained_geometry_count == 0 {
230 self.ignored_feature_count += 1;
231 } else {
232 self.retained_feature_count += 1;
233 }
234 }
235
236 #[must_use]
237 pub fn requested_lod_failures(&self, filter: &FeatureFilter) -> Vec<MissingLodSelection> {
238 filter
239 .lods_by_type
240 .iter()
241 .filter_map(|(cityobject_type, selection)| {
242 let LodSelection::Exact(requested_lod) = selection else {
243 return None;
244 };
245 let eligible = self.available_lods.contains_key(cityobject_type)
246 || self.retained_types.contains(cityobject_type)
247 || filter
248 .cityobject_types
249 .as_ref()
250 .is_none_or(|types| types.contains(cityobject_type));
251 if !eligible {
252 return None;
253 }
254 let available_lods = self
255 .available_lods
256 .get(cityobject_type)
257 .cloned()
258 .unwrap_or_default();
259 if available_lods.contains(requested_lod) {
260 return None;
261 }
262 Some(MissingLodSelection {
263 cityobject_type: cityobject_type.clone(),
264 requested_lod: requested_lod.clone(),
265 available_lods,
266 })
267 })
268 .collect()
269 }
270
271 pub fn ensure_requested_lods_available(&self, filter: &FeatureFilter) -> Result<()> {
276 let failures = self.requested_lod_failures(filter);
277 if failures.is_empty() {
278 return Ok(());
279 }
280
281 let details = failures
282 .iter()
283 .map(|missing| {
284 let available = if missing.available_lods.is_empty() {
285 "none".to_owned()
286 } else {
287 missing
288 .available_lods
289 .iter()
290 .cloned()
291 .collect::<Vec<_>>()
292 .join(", ")
293 };
294 format!(
295 "{} requested LoD '{}' but available LoDs are: {}",
296 missing.cityobject_type, missing.requested_lod, available
297 )
298 })
299 .collect::<Vec<_>>()
300 .join("; ");
301 Err(import_error(format!(
302 "requested LoD selector matched no geometry: {details}"
303 )))
304 }
305}
306
307fn merge_lod_sets(
308 target: &mut BTreeMap<String, BTreeSet<String>>,
309 source: &BTreeMap<String, BTreeSet<String>>,
310) {
311 for (cityobject_type, lods) in source {
312 target
313 .entry(cityobject_type.clone())
314 .or_default()
315 .extend(lods.iter().cloned());
316 }
317}
318
319type CityObjectHandle = cityjson_types::prelude::CityObjectHandle;
320type GeometryHandle = cityjson_types::prelude::GeometryHandle;
321
322fn retained_cityobject_handles(
323 model: &CityModel,
324 filter: &FeatureFilter,
325) -> Result<BTreeSet<CityObjectHandle>> {
326 let Some(selected_types) = filter.cityobject_types.as_ref() else {
327 return Ok(model
328 .cityobjects()
329 .iter()
330 .map(|(handle, _)| handle)
331 .collect());
332 };
333
334 let mut retained = BTreeSet::new();
335 for (handle, cityobject) in model.cityobjects().iter() {
336 if selected_types.contains(&cityobject.type_cityobject().to_string()) {
337 collect_cityobject_descendants(model, handle, &mut retained)?;
338 }
339 }
340 Ok(retained)
341}
342
343fn collect_cityobject_descendants(
344 model: &CityModel,
345 handle: CityObjectHandle,
346 retained: &mut BTreeSet<CityObjectHandle>,
347) -> Result<()> {
348 if !retained.insert(handle) {
349 return Ok(());
350 }
351 let cityobject = model.cityobjects().get(handle).ok_or_else(|| {
352 import_error(format!(
353 "missing CityObject handle in filter traversal: {handle:?}"
354 ))
355 })?;
356 if let Some(children) = cityobject.children() {
357 for child in children {
358 collect_cityobject_descendants(model, *child, retained)?;
359 }
360 }
361 Ok(())
362}
363
364fn lod_selection(
365 model: &CityModel,
366 retained_handles: &BTreeSet<CityObjectHandle>,
367 filter: &FeatureFilter,
368) -> Result<Option<cityjson_lib::ops::ModelSelection>> {
369 if filter.default_lod == LodSelection::All
370 && filter
371 .lods_by_type
372 .values()
373 .all(|selection| *selection == LodSelection::All)
374 {
375 return Ok(None);
376 }
377
378 let highest_lods = highest_lods_by_cityobject(model, retained_handles);
379 cityjson_lib::ops::select_geometries(model, |ctx| {
380 if !retained_handles.contains(&ctx.cityobject_handle()) {
381 return false;
382 }
383 let cityobject_type = ctx.cityobject().type_cityobject().to_string();
384 let selection = filter
385 .lods_by_type
386 .get(&cityobject_type)
387 .unwrap_or(&filter.default_lod);
388 geometry_matches_lod_selection(
389 ctx.geometry().lod(),
390 highest_lods.get(&ctx.cityobject_handle()),
391 selection,
392 )
393 })
394 .map(Some)
395}
396
397fn geometry_matches_lod_selection(
398 geometry_lod: Option<&cityjson_types::v2_0::LoD>,
399 highest_lod: Option<&String>,
400 selection: &LodSelection,
401) -> bool {
402 match selection {
403 LodSelection::All => true,
404 LodSelection::Highest => geometry_lod
405 .is_some_and(|lod| highest_lod.is_some_and(|highest| lod.to_string() == *highest)),
406 LodSelection::Exact(selected_lod) => {
407 geometry_lod.is_some_and(|lod| lod.to_string() == *selected_lod)
408 }
409 }
410}
411
412fn highest_lods_by_cityobject(
413 model: &CityModel,
414 retained_handles: &BTreeSet<CityObjectHandle>,
415) -> BTreeMap<CityObjectHandle, String> {
416 let mut highest = BTreeMap::new();
417 for handle in retained_handles {
418 let Some(cityobject) = model.cityobjects().get(*handle) else {
419 continue;
420 };
421 let Some(geometry_handles) = cityobject.geometry() else {
422 continue;
423 };
424 if let Some(lod) = highest_lod(model, geometry_handles) {
425 highest.insert(*handle, lod);
426 }
427 }
428 highest
429}
430
431fn highest_lod(model: &CityModel, geometries: &[GeometryHandle]) -> Option<String> {
432 geometries
433 .iter()
434 .filter_map(|geometry_handle| {
435 model
436 .get_geometry(*geometry_handle)
437 .and_then(|geometry| geometry.lod())
438 .map(std::string::ToString::to_string)
439 })
440 .max_by(|lhs, rhs| compare_lod_strings(lhs, rhs))
441}
442
443fn compare_lod_strings(lhs: &str, rhs: &str) -> std::cmp::Ordering {
444 match (lhs.parse::<f64>(), rhs.parse::<f64>()) {
445 (Ok(lhs), Ok(rhs)) => lhs.partial_cmp(&rhs).unwrap_or(std::cmp::Ordering::Equal),
446 _ => lhs.cmp(rhs),
447 }
448}
449
450fn filter_diagnostics(
451 model: &CityModel,
452 retained_handles: &BTreeSet<CityObjectHandle>,
453 filter: &FeatureFilter,
454) -> FeatureFilterDiagnostics {
455 let highest_lods = highest_lods_by_cityobject(model, retained_handles);
456 let mut diagnostics = FeatureFilterDiagnostics::default();
457
458 for (handle, cityobject) in model.cityobjects().iter() {
459 let cityobject_type = cityobject.type_cityobject().to_string();
460 diagnostics.available_types.insert(cityobject_type.clone());
461 if retained_handles.contains(&handle) {
462 diagnostics.retained_types.insert(cityobject_type.clone());
463 } else {
464 diagnostics.ignored_types.insert(cityobject_type.clone());
465 continue;
466 }
467
468 let Some(geometry_handles) = cityobject.geometry() else {
469 continue;
470 };
471 let selection = filter
472 .lods_by_type
473 .get(&cityobject_type)
474 .unwrap_or(&filter.default_lod);
475 for geometry_handle in geometry_handles {
476 let Some(geometry_lod) = model
477 .get_geometry(*geometry_handle)
478 .and_then(|geometry| geometry.lod())
479 else {
480 continue;
481 };
482 let lod = geometry_lod.to_string();
483 diagnostics
484 .available_lods
485 .entry(cityobject_type.clone())
486 .or_default()
487 .insert(lod.clone());
488 if geometry_matches_lod_selection(
489 Some(geometry_lod),
490 highest_lods.get(&handle),
491 selection,
492 ) {
493 diagnostics
494 .retained_lods
495 .entry(cityobject_type.clone())
496 .or_default()
497 .insert(lod);
498 diagnostics.retained_geometry_count += 1;
499 }
500 }
501 }
502
503 for (cityobject_type, selection) in &filter.lods_by_type {
504 let LodSelection::Exact(requested_lod) = selection else {
505 continue;
506 };
507 if !diagnostics.retained_types.contains(cityobject_type) {
508 continue;
509 }
510 let available_lods = diagnostics
511 .available_lods
512 .get(cityobject_type)
513 .cloned()
514 .unwrap_or_default();
515 if !available_lods.contains(requested_lod) {
516 diagnostics.missing_lods.push(MissingLodSelection {
517 cityobject_type: cityobject_type.clone(),
518 requested_lod: requested_lod.clone(),
519 available_lods,
520 });
521 }
522 }
523
524 diagnostics
525}
526
527fn extract_or_empty_feature(
528 model: &CityModel,
529 selection: &cityjson_lib::ops::ModelSelection,
530) -> Result<CityModel> {
531 if !selection.is_empty() {
532 return cityjson_lib::ops::extract(model, selection);
533 }
534
535 let mut empty = model.clone();
536 empty.clear_cityobjects();
537 empty.set_id(None);
538 Ok(empty)
539}
540
541#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
542pub enum DatasetLayoutKind {
543 #[serde(rename = "ndjson")]
544 Ndjson,
545 #[serde(rename = "cityjson")]
546 CityJson,
547 #[serde(rename = "feature-files")]
548 FeatureFiles,
549}
550
551impl DatasetLayoutKind {
552 #[must_use]
553 pub fn as_str(self) -> &'static str {
554 match self {
555 Self::Ndjson => "ndjson",
556 Self::CityJson => "cityjson",
557 Self::FeatureFiles => "feature-files",
558 }
559 }
560}
561
562#[derive(Debug, Clone, Serialize, Deserialize)]
563pub struct ManifestSummary {
564 pub path: PathBuf,
565 pub selected_tile_count: Option<usize>,
566 pub total_features: Option<usize>,
567 pub total_cityobjects: Option<usize>,
568}
569
570#[derive(Debug, Clone)]
571pub struct ResolvedDataset {
572 pub dataset_root: PathBuf,
573 pub index_path: PathBuf,
574 pub layout: DatasetLayoutKind,
575 pub manifest: Option<ManifestSummary>,
576 storage_layout: StorageLayout,
577 source_paths: Vec<PathBuf>,
578 feature_file_paths: Vec<PathBuf>,
579}
580
581#[derive(Debug, Clone, Serialize, Deserialize)]
582pub struct IndexStatus {
583 pub path: PathBuf,
584 pub exists: bool,
585 pub index_mtime_ns: Option<i64>,
586 pub indexed_source_count: Option<usize>,
587 pub indexed_feature_count: Option<usize>,
588 pub indexed_cityobject_count: Option<usize>,
589 pub fresh: Option<bool>,
590 pub covered: Option<bool>,
591 pub needs_reindex: bool,
592 pub missing_source_paths: Vec<PathBuf>,
593 pub unindexed_source_paths: Vec<PathBuf>,
594 pub changed_source_paths: Vec<PathBuf>,
595 pub missing_feature_paths: Vec<PathBuf>,
596 pub unindexed_feature_paths: Vec<PathBuf>,
597 pub changed_feature_paths: Vec<PathBuf>,
598 pub issues: Vec<String>,
599}
600
601#[derive(Debug, Clone, Serialize, Deserialize)]
602pub struct DatasetInspection {
603 pub dataset_root: PathBuf,
604 pub layout: DatasetLayoutKind,
605 pub manifest: Option<ManifestSummary>,
606 pub detected_source_count: usize,
607 pub detected_feature_file_count: usize,
608 pub index: IndexStatus,
609}
610
611#[derive(Debug, Clone, Serialize, Deserialize)]
612pub struct ValidationReport {
613 pub ok: bool,
614 pub inspection: DatasetInspection,
615}
616
617#[derive(Debug, Clone)]
618pub enum StorageLayout {
619 Ndjson {
620 paths: Vec<PathBuf>,
621 },
622 CityJson {
623 paths: Vec<PathBuf>,
624 },
625 FeatureFiles {
626 root: PathBuf,
627 metadata_glob: String,
628 feature_glob: String,
629 },
630}
631
632impl StorageLayout {
633 #[must_use]
634 pub fn layout_kind(&self) -> DatasetLayoutKind {
635 match self {
636 Self::Ndjson { .. } => DatasetLayoutKind::Ndjson,
637 Self::CityJson { .. } => DatasetLayoutKind::CityJson,
638 Self::FeatureFiles { .. } => DatasetLayoutKind::FeatureFiles,
639 }
640 }
641}
642
643impl ResolvedDataset {
644 #[must_use]
645 pub fn storage_layout(&self) -> StorageLayout {
646 self.storage_layout.clone()
647 }
648
649 #[must_use]
650 pub fn source_paths(&self) -> &[PathBuf] {
651 &self.source_paths
652 }
653
654 #[must_use]
655 pub fn feature_file_paths(&self) -> &[PathBuf] {
656 &self.feature_file_paths
657 }
658
659 pub fn inspect(&self) -> Result<DatasetInspection> {
665 inspect_resolved_dataset(self)
666 }
667
668 pub fn validate(&self) -> Result<ValidationReport> {
674 let inspection = self.inspect()?;
675 let ok = inspection.index.issues.is_empty();
676 Ok(ValidationReport { ok, inspection })
677 }
678}
679
680#[allow(clippy::too_many_lines)]
681fn inspect_resolved_dataset(resolved: &ResolvedDataset) -> Result<DatasetInspection> {
682 let mut status = IndexStatus {
683 path: resolved.index_path.clone(),
684 exists: resolved.index_path.exists(),
685 index_mtime_ns: None,
686 indexed_source_count: None,
687 indexed_feature_count: None,
688 indexed_cityobject_count: None,
689 fresh: None,
690 covered: None,
691 needs_reindex: false,
692 missing_source_paths: Vec::new(),
693 unindexed_source_paths: Vec::new(),
694 changed_source_paths: Vec::new(),
695 missing_feature_paths: Vec::new(),
696 unindexed_feature_paths: Vec::new(),
697 changed_feature_paths: Vec::new(),
698 issues: Vec::new(),
699 };
700
701 if status.exists {
702 let (_, mtime_ns) = file_status(&resolved.index_path)?;
703 status.index_mtime_ns = Some(mtime_ns);
704
705 let index = Index::open(&resolved.index_path)?;
706 status.indexed_source_count = Some(index.source_count()?);
707 status.indexed_feature_count = Some(index.feature_count()?);
708 status.indexed_cityobject_count = Some(index.cityobject_count()?);
709 if !index.feature_bounds_complete()? {
710 status.needs_reindex = true;
711 status
712 .issues
713 .push("index is missing persisted z bounds; run cjindex reindex".to_owned());
714 }
715
716 let indexed_sources = index.indexed_sources()?;
717 let current_sources = collect_current_file_statuses(&resolved.source_paths)?;
718 compare_path_statuses(
719 ¤t_sources,
720 &indexed_sources,
721 &mut status.missing_source_paths,
722 &mut status.unindexed_source_paths,
723 &mut status.changed_source_paths,
724 &mut status.needs_reindex,
725 );
726
727 if resolved.layout == DatasetLayoutKind::FeatureFiles {
728 let indexed_features = index.indexed_feature_paths()?;
729 let current_features = collect_current_file_statuses(&resolved.feature_file_paths)?;
730 compare_feature_statuses(
731 ¤t_features,
732 &indexed_features,
733 &mut status.missing_feature_paths,
734 &mut status.unindexed_feature_paths,
735 &mut status.changed_feature_paths,
736 &mut status.needs_reindex,
737 );
738 }
739
740 if let Some(manifest) = &resolved.manifest {
741 if let Some(expected_features) = manifest.total_features
742 && status.indexed_feature_count != Some(expected_features)
743 {
744 status.issues.push(format!(
745 "indexed feature count {} does not match manifest count {}",
746 status.indexed_feature_count.unwrap_or(0),
747 expected_features
748 ));
749 }
750 if let Some(expected_cityobjects) = manifest.total_cityobjects
751 && status.indexed_cityobject_count != Some(expected_cityobjects)
752 {
753 status.issues.push(format!(
754 "indexed CityObject count {} does not match manifest count {}",
755 status.indexed_cityobject_count.unwrap_or(0),
756 expected_cityobjects
757 ));
758 }
759 if let Some(expected_sources) = manifest.selected_tile_count
760 && resolved.layout != DatasetLayoutKind::FeatureFiles
761 && status.indexed_source_count != Some(expected_sources)
762 {
763 status.issues.push(format!(
764 "indexed source count {} does not match manifest tile count {}",
765 status.indexed_source_count.unwrap_or(0),
766 expected_sources
767 ));
768 }
769 }
770
771 if let Some(source_count) = status.indexed_source_count
772 && source_count != resolved.source_paths.len()
773 {
774 status.issues.push(format!(
775 "indexed source count {} does not match detected source count {}",
776 source_count,
777 resolved.source_paths.len()
778 ));
779 }
780
781 if !status.missing_source_paths.is_empty() {
782 status.issues.push(format!(
783 "{} indexed source files are missing on disk",
784 status.missing_source_paths.len()
785 ));
786 }
787 if !status.unindexed_source_paths.is_empty() {
788 status.issues.push(format!(
789 "{} detected source files are missing from the index",
790 status.unindexed_source_paths.len()
791 ));
792 }
793 if !status.changed_source_paths.is_empty() {
794 status.issues.push(format!(
795 "{} indexed source files changed size or mtime",
796 status.changed_source_paths.len()
797 ));
798 }
799 if !status.missing_feature_paths.is_empty() {
800 status.issues.push(format!(
801 "{} indexed feature files are missing on disk",
802 status.missing_feature_paths.len()
803 ));
804 }
805 if !status.unindexed_feature_paths.is_empty() {
806 status.issues.push(format!(
807 "{} detected feature files are missing from the index",
808 status.unindexed_feature_paths.len()
809 ));
810 }
811 if !status.changed_feature_paths.is_empty() {
812 status.issues.push(format!(
813 "{} indexed feature files changed size or mtime",
814 status.changed_feature_paths.len()
815 ));
816 }
817 if status.needs_reindex {
818 status.issues.push(
819 "index is missing persisted freshness metadata; run cjindex reindex".to_owned(),
820 );
821 }
822
823 status.covered = Some(
824 status.missing_source_paths.is_empty()
825 && status.unindexed_source_paths.is_empty()
826 && status.missing_feature_paths.is_empty()
827 && status.unindexed_feature_paths.is_empty(),
828 );
829 status.fresh = Some(
830 status.covered == Some(true)
831 && status.changed_source_paths.is_empty()
832 && status.changed_feature_paths.is_empty()
833 && !status.needs_reindex,
834 );
835 } else {
836 status.issues.push(format!(
837 "index {} does not exist",
838 resolved.index_path.display()
839 ));
840 }
841
842 Ok(DatasetInspection {
843 dataset_root: resolved.dataset_root.clone(),
844 layout: resolved.layout,
845 manifest: resolved.manifest.clone(),
846 detected_source_count: resolved.source_paths.len(),
847 detected_feature_file_count: resolved.feature_file_paths.len(),
848 index: status,
849 })
850}
851
852fn resolve_manifest_summary(dataset_root: &Path) -> Result<Option<ManifestSummary>> {
853 let candidates = [
854 dataset_root.join("manifest.json"),
855 dataset_root.parent().map_or_else(
856 || dataset_root.join("manifest.json"),
857 |parent| parent.join("manifest.json"),
858 ),
859 ];
860 for candidate in candidates {
861 if !candidate.exists() {
862 continue;
863 }
864 let manifest: Value = read_json(&candidate)?;
865 let selected_tile_count = manifest
866 .get("selected_tiles")
867 .and_then(Value::as_array)
868 .map(Vec::len);
869 let total_features = manifest
870 .get("total_features")
871 .and_then(Value::as_u64)
872 .map(usize::try_from)
873 .transpose()
874 .map_err(|_| import_error("manifest total_features does not fit in usize"))?;
875 let total_cityobjects = manifest
876 .get("total_cityobjects")
877 .and_then(Value::as_u64)
878 .map(usize::try_from)
879 .transpose()
880 .map_err(|_| import_error("manifest total_cityobjects does not fit in usize"))?;
881 return Ok(Some(ManifestSummary {
882 path: candidate,
883 selected_tile_count,
884 total_features,
885 total_cityobjects,
886 }));
887 }
888 Ok(None)
889}
890
891fn collect_current_file_statuses(paths: &[PathBuf]) -> Result<BTreeMap<PathBuf, (u64, i64)>> {
892 paths
893 .iter()
894 .map(|path| file_status(path).map(|status| (path.clone(), status)))
895 .collect()
896}
897
898fn compare_path_statuses(
899 current: &BTreeMap<PathBuf, (u64, i64)>,
900 indexed: &[IndexedSourceRecord],
901 missing_on_disk: &mut Vec<PathBuf>,
902 missing_from_index: &mut Vec<PathBuf>,
903 changed: &mut Vec<PathBuf>,
904 needs_reindex: &mut bool,
905) {
906 let indexed_by_path = indexed
907 .iter()
908 .map(|record| {
909 (
910 record.path.clone(),
911 (record.source_size, record.source_mtime_ns),
912 )
913 })
914 .collect::<BTreeMap<_, _>>();
915
916 for path in current.keys() {
917 if !indexed_by_path.contains_key(path) {
918 missing_from_index.push(path.clone());
919 }
920 }
921
922 for (path, (expected_size, expected_mtime_ns)) in indexed_by_path {
923 let Some((current_size, current_mtime_ns)) = current.get(&path) else {
924 missing_on_disk.push(path);
925 continue;
926 };
927 let Some(expected_size) = expected_size else {
928 *needs_reindex = true;
929 continue;
930 };
931 let Some(expected_mtime_ns) = expected_mtime_ns else {
932 *needs_reindex = true;
933 continue;
934 };
935 if expected_size != *current_size || expected_mtime_ns != *current_mtime_ns {
936 changed.push(path);
937 }
938 }
939}
940
941fn compare_feature_statuses(
942 current: &BTreeMap<PathBuf, (u64, i64)>,
943 indexed: &[IndexedFeaturePathRecord],
944 missing_on_disk: &mut Vec<PathBuf>,
945 missing_from_index: &mut Vec<PathBuf>,
946 changed: &mut Vec<PathBuf>,
947 needs_reindex: &mut bool,
948) {
949 let indexed_by_path = indexed
950 .iter()
951 .map(|record| {
952 (
953 record.path.clone(),
954 (record.file_size, record.file_mtime_ns),
955 )
956 })
957 .collect::<BTreeMap<_, _>>();
958
959 for path in current.keys() {
960 if !indexed_by_path.contains_key(path) {
961 missing_from_index.push(path.clone());
962 }
963 }
964
965 for (path, (expected_size, expected_mtime_ns)) in indexed_by_path {
966 let Some((current_size, current_mtime_ns)) = current.get(&path) else {
967 missing_on_disk.push(path);
968 continue;
969 };
970 let Some(expected_size) = expected_size else {
971 *needs_reindex = true;
972 continue;
973 };
974 let Some(expected_mtime_ns) = expected_mtime_ns else {
975 *needs_reindex = true;
976 continue;
977 };
978 if expected_size != *current_size || expected_mtime_ns != *current_mtime_ns {
979 changed.push(path);
980 }
981 }
982}
983
984pub fn resolve_dataset(
992 dataset_dir: &Path,
993 index_override: Option<PathBuf>,
994) -> Result<ResolvedDataset> {
995 let dataset_root = fs::canonicalize(dataset_dir).map_err(|error| {
996 import_error(format!(
997 "failed to resolve dataset directory {}: {error}",
998 dataset_dir.display()
999 ))
1000 })?;
1001 if !dataset_root.is_dir() {
1002 return Err(import_error(format!(
1003 "dataset path {} is not a directory",
1004 dataset_root.display()
1005 )));
1006 }
1007
1008 let roots = vec![dataset_root.clone()];
1009 let ndjson_paths = collect_layout_files(&roots, ".city.jsonl")?;
1010 let cityjson_paths = collect_layout_files(&roots, ".city.json")?;
1011 let metadata_paths = collect_layout_files(&roots, "metadata.json")?;
1012 let feature_file_paths = if metadata_paths.is_empty() {
1013 Vec::new()
1014 } else {
1015 ndjson_paths.clone()
1016 };
1017
1018 let feature_files_match = !metadata_paths.is_empty() && !feature_file_paths.is_empty();
1019 let ndjson_match = !ndjson_paths.is_empty() && !feature_files_match;
1020 let cityjson_match = !cityjson_paths.is_empty();
1021
1022 let mut matches = Vec::new();
1023 if ndjson_match {
1024 matches.push(DatasetLayoutKind::Ndjson);
1025 }
1026 if cityjson_match {
1027 matches.push(DatasetLayoutKind::CityJson);
1028 }
1029 if feature_files_match {
1030 matches.push(DatasetLayoutKind::FeatureFiles);
1031 }
1032
1033 if matches.is_empty() {
1034 return Err(import_error(format!(
1035 "dataset directory {} does not match ndjson, cityjson, or feature-files layouts",
1036 dataset_root.display()
1037 )));
1038 }
1039 if matches.len() > 1 {
1040 let matched_layouts = matches
1041 .into_iter()
1042 .map(DatasetLayoutKind::as_str)
1043 .collect::<Vec<_>>()
1044 .join(", ");
1045 return Err(import_error(format!(
1046 "dataset directory {} matches multiple layouts ({matched_layouts}); use explicit CLI flags instead",
1047 dataset_root.display(),
1048 )));
1049 }
1050
1051 let layout = matches[0];
1052 let storage_layout = match layout {
1053 DatasetLayoutKind::Ndjson => StorageLayout::Ndjson {
1054 paths: vec![dataset_root.clone()],
1055 },
1056 DatasetLayoutKind::CityJson => StorageLayout::CityJson {
1057 paths: vec![dataset_root.clone()],
1058 },
1059 DatasetLayoutKind::FeatureFiles => StorageLayout::FeatureFiles {
1060 root: dataset_root.clone(),
1061 metadata_glob: "**/metadata.json".to_owned(),
1062 feature_glob: "**/*.city.jsonl".to_owned(),
1063 },
1064 };
1065 let source_paths = match layout {
1066 DatasetLayoutKind::Ndjson => ndjson_paths,
1067 DatasetLayoutKind::CityJson => cityjson_paths,
1068 DatasetLayoutKind::FeatureFiles => metadata_paths,
1069 };
1070 let feature_file_paths = match layout {
1071 DatasetLayoutKind::FeatureFiles => feature_file_paths,
1072 _ => Vec::new(),
1073 };
1074
1075 Ok(ResolvedDataset {
1076 dataset_root: dataset_root.clone(),
1077 index_path: index_override.unwrap_or_else(|| dataset_root.join(".cityjson-index.sqlite")),
1078 layout,
1079 manifest: resolve_manifest_summary(&dataset_root)?,
1080 storage_layout,
1081 source_paths,
1082 feature_file_paths,
1083 })
1084}
1085
1086impl CityIndex {
1087 pub fn open(layout: StorageLayout, index_path: &Path) -> Result<Self> {
1094 let backend: Box<dyn StorageBackend> = match layout {
1095 StorageLayout::Ndjson { paths } => Box::new(NdjsonBackend { paths }),
1096 StorageLayout::CityJson { paths } => Box::new(CityJsonBackend::new(paths)),
1097 StorageLayout::FeatureFiles {
1098 root,
1099 metadata_glob,
1100 feature_glob,
1101 } => Box::new(FeatureFilesBackend::new(
1102 root,
1103 metadata_glob.as_str(),
1104 feature_glob.as_str(),
1105 )),
1106 };
1107
1108 Ok(Self {
1109 index: Index::open(index_path)?,
1110 backend,
1111 })
1112 }
1113
1114 pub fn reindex(&mut self) -> Result<()> {
1120 let worker_count = configured_worker_count()?;
1121 let scans = self.backend.scan(worker_count)?;
1122 self.index.rebuild(&scans)
1123 }
1124
1125 pub fn get(&self, id: &str) -> Result<Option<CityModel>> {
1131 self.get_with_metadata(id)
1132 .map(|maybe| maybe.map(|(_, model)| model))
1133 }
1134
1135 pub fn get_with_metadata(&self, id: &str) -> Result<Option<(Arc<Meta>, CityModel)>> {
1142 let Some(loc) = self.index.lookup_id(id)? else {
1143 return Ok(None);
1144 };
1145 let metadata = self.index.get_cached_metadata(loc.source_id)?;
1146 let model = self.backend.read_one(&loc, Arc::clone(&metadata.bytes))?;
1147 Ok(Some((metadata.value, model)))
1148 }
1149
1150 pub fn lookup_feature_ref(&self, id: &str) -> Result<Option<IndexedFeatureRef>> {
1156 self.index.lookup_feature_ref(id)
1157 }
1158
1159 pub fn lookup_feature_refs(&self, id: &str) -> Result<Vec<IndexedFeatureRef>> {
1167 self.index.lookup_feature_refs(id)
1168 }
1169
1170 pub fn lookup_feature_ref_by_rowid(&self, row_id: i64) -> Result<Option<IndexedFeatureRef>> {
1176 self.index.lookup_feature_ref_by_rowid(row_id)
1177 }
1178
1179 pub fn metadata_for_source(&self, source_id: i64) -> Result<Arc<Meta>> {
1185 self.index
1186 .get_cached_metadata(source_id)
1187 .map(|metadata| metadata.value)
1188 }
1189
1190 pub fn query(&self, bbox: &BBox) -> Result<Vec<CityModel>> {
1196 self.query_iter(bbox)?
1197 .collect::<std::result::Result<Vec<_>, _>>()
1198 }
1199
1200 pub fn query_with_metadata(&self, bbox: &BBox) -> Result<Vec<(Arc<Meta>, CityModel)>> {
1207 self.query_iter_with_metadata(bbox)?
1208 .collect::<std::result::Result<Vec<_>, _>>()
1209 }
1210
1211 pub fn query_iter(&self, bbox: &BBox) -> Result<impl Iterator<Item = Result<CityModel>> + '_> {
1217 let iter = self.query_iter_with_metadata(bbox)?;
1218 Ok(iter.map(|item| item.map(|(_, model)| model)))
1219 }
1220
1221 pub fn query_iter_with_ids(
1228 &self,
1229 bbox: &BBox,
1230 ) -> Result<impl Iterator<Item = Result<(String, CityModel)>> + '_> {
1231 let locations = self.index.lookup_bbox_iter(*bbox);
1232 Ok(locations.map(move |loc| {
1233 let loc = loc?;
1234 let feature_id = loc.feature_id.clone();
1235 let metadata = self.index.get_cached_metadata(loc.source_id)?;
1236 let model = self.backend.read_one(&loc, Arc::clone(&metadata.bytes))?;
1237 Ok((feature_id, model))
1238 }))
1239 }
1240
1241 pub fn query_iter_with_metadata(
1248 &self,
1249 bbox: &BBox,
1250 ) -> Result<impl Iterator<Item = Result<(Arc<Meta>, CityModel)>> + '_> {
1251 let locations = self.index.lookup_bbox_iter(*bbox);
1252 Ok(locations.map(move |loc| {
1253 let loc = loc?;
1254 let metadata = self.index.get_cached_metadata(loc.source_id)?;
1255 let model = self.backend.read_one(&loc, Arc::clone(&metadata.bytes))?;
1256 Ok((metadata.value, model))
1257 }))
1258 }
1259
1260 pub fn iter_all(&self) -> Result<impl Iterator<Item = Result<CityModel>> + '_> {
1266 let iter = self.iter_all_with_metadata()?;
1267 Ok(iter.map(|item| item.map(|(_, model)| model)))
1268 }
1269
1270 pub fn iter_all_with_ids(
1276 &self,
1277 ) -> Result<impl Iterator<Item = Result<(String, CityModel)>> + '_> {
1278 let iter = self.index.lookup_all_iter();
1279 Ok(iter.map(move |loc| {
1280 let loc = loc?;
1281 let feature_id = loc.location.feature_id.clone();
1282 let metadata = self.index.get_cached_metadata(loc.location.source_id)?;
1283 let model = self
1284 .backend
1285 .read_one(&loc.location, Arc::clone(&metadata.bytes))?;
1286 Ok((feature_id, model))
1287 }))
1288 }
1289
1290 pub fn iter_all_with_metadata(
1297 &self,
1298 ) -> Result<impl Iterator<Item = Result<(Arc<Meta>, CityModel)>> + '_> {
1299 let iter = self.index.lookup_all_iter();
1300 Ok(iter.map(move |loc| {
1301 let loc = loc?;
1302 let metadata = self.index.get_cached_metadata(loc.location.source_id)?;
1303 let model = self
1304 .backend
1305 .read_one(&loc.location, Arc::clone(&metadata.bytes))?;
1306 Ok((metadata.value, model))
1307 }))
1308 }
1309
1310 pub fn iter_all_feature_ref_pages(
1320 &self,
1321 page_size: usize,
1322 ) -> Result<impl Iterator<Item = Result<Vec<IndexedFeatureRef>>> + '_> {
1323 self.index.lookup_all_ref_page_iter(page_size)
1324 }
1325
1326 pub fn iter_all_bbox_pages(
1336 &self,
1337 page_size: usize,
1338 ) -> Result<impl Iterator<Item = Result<Vec<IndexedFeatureRef>>> + '_> {
1339 self.index.lookup_all_ref_page_iter(page_size)
1340 }
1341
1342 pub fn scan_features(&self) -> Result<impl Iterator<Item = Result<IndexedFeature>> + '_> {
1349 let ref_pages = self
1350 .index
1351 .lookup_all_ref_page_iter(DEFAULT_SCAN_PAGE_SIZE)?;
1352 Ok(AllIndexedFeatureIter::new(AllIndexedFeaturePageIter {
1353 city_index: self,
1354 ref_pages,
1355 }))
1356 }
1357
1358 pub fn scan_feature_pages(
1370 &self,
1371 page_size: usize,
1372 ) -> Result<impl Iterator<Item = Result<Vec<IndexedFeature>>> + '_> {
1373 let ref_pages = self.index.lookup_all_ref_page_iter(page_size)?;
1374 Ok(AllIndexedFeaturePageIter {
1375 city_index: self,
1376 ref_pages,
1377 })
1378 }
1379
1380 pub fn feature_bounds_summary(&self) -> Result<Option<FeatureBoundsSummary>> {
1388 self.index.feature_bounds_summary()
1389 }
1390
1391 pub fn read_feature(&self, feature: &IndexedFeatureRef) -> Result<CityModel> {
1397 let metadata = self.index.get_cached_metadata(feature.source_id)?;
1398 self.backend
1399 .read_one(&feature.to_location(), Arc::clone(&metadata.bytes))
1400 }
1401
1402 pub fn read_features(&self, features: &[IndexedFeatureRef]) -> Result<Vec<CityModel>> {
1408 self.read_feature_models(features)
1409 }
1410
1411 pub fn read_filtered_features(
1418 &self,
1419 features: &[IndexedFeatureRef],
1420 filter: &FeatureFilter,
1421 ) -> Result<Vec<FilteredFeature>> {
1422 self.read_feature_models(features)?
1423 .iter()
1424 .map(|model| filter.apply(model))
1425 .collect()
1426 }
1427
1428 pub fn read_indexed_features(
1436 &self,
1437 features: &[IndexedFeatureRef],
1438 ) -> Result<Vec<IndexedFeature>> {
1439 let models = self.read_feature_models(features)?;
1440 Ok(features
1441 .iter()
1442 .cloned()
1443 .zip(models)
1444 .map(|(reference, model)| IndexedFeature { reference, model })
1445 .collect())
1446 }
1447
1448 pub fn read_feature_by_rowid(&self, row_id: i64) -> Result<Option<IndexedFeature>> {
1454 let Some(reference) = self.lookup_feature_ref_by_rowid(row_id)? else {
1455 return Ok(None);
1456 };
1457 let model = self.read_feature(&reference)?;
1458 Ok(Some(IndexedFeature { reference, model }))
1459 }
1460
1461 pub fn read_features_by_rowids(&self, row_ids: &[i64]) -> Result<Vec<Option<IndexedFeature>>> {
1469 let references = self.index.lookup_feature_refs_by_rowids(row_ids)?;
1470 let present_references = references.iter().flatten().cloned().collect::<Vec<_>>();
1471 let mut present_features = self.read_indexed_features(&present_references)?.into_iter();
1472 let mut features = Vec::with_capacity(row_ids.len());
1473 for reference in references {
1474 if reference.is_some() {
1475 let feature = present_features.next().ok_or_else(|| {
1476 import_error("feature reconstruction returned fewer models than references")
1477 })?;
1478 features.push(Some(feature));
1479 } else {
1480 features.push(None);
1481 }
1482 }
1483 Ok(features)
1484 }
1485
1486 pub fn read_feature_range_after_rowid(
1494 &self,
1495 after_row_id: Option<i64>,
1496 limit: usize,
1497 ) -> Result<Vec<IndexedFeature>> {
1498 if limit == 0 {
1499 return Err(import_error("limit must be greater than zero"));
1500 }
1501 let refs = self
1502 .index
1503 .lookup_all_ref_page(after_row_id, limit)?
1504 .into_iter()
1505 .map(|record| record.feature)
1506 .collect::<Vec<_>>();
1507 self.read_indexed_features(&refs)
1508 }
1509
1510 pub fn feature_ref_count(&self) -> Result<usize> {
1516 self.index.feature_count()
1517 }
1518
1519 pub fn source_count(&self) -> Result<usize> {
1525 self.index.source_count()
1526 }
1527
1528 pub fn cityobject_count(&self) -> Result<usize> {
1534 self.index.cityobject_count()
1535 }
1536
1537 pub fn feature_ref_page(&self, offset: usize, limit: usize) -> Result<Vec<IndexedFeatureRef>> {
1545 self.index.lookup_all_ref_page_window(offset, limit)
1546 }
1547
1548 pub fn get_bytes(&self, id: &str) -> Result<Option<Vec<u8>>> {
1554 let Some(loc) = self.index.lookup_id(id)? else {
1555 return Ok(None);
1556 };
1557 read_exact_range(&loc.source_path, loc.offset, loc.length).map(Some)
1558 }
1559
1560 pub fn read_feature_bytes(&self, feature: &IndexedFeatureRef) -> Result<Vec<u8>> {
1566 read_exact_range(&feature.source_path, feature.offset, feature.length)
1567 }
1568
1569 pub fn metadata(&self) -> Result<Vec<Arc<Meta>>> {
1575 self.index.metadata()
1576 }
1577
1578 fn read_feature_models(&self, features: &[IndexedFeatureRef]) -> Result<Vec<CityModel>> {
1579 let mut features_by_source: BTreeMap<i64, Vec<(usize, FeatureLocation)>> = BTreeMap::new();
1580 for (index, feature) in features.iter().enumerate() {
1581 features_by_source
1582 .entry(feature.source_id)
1583 .or_default()
1584 .push((index, feature.to_location()));
1585 }
1586
1587 let mut models = std::iter::repeat_with(|| None)
1588 .take(features.len())
1589 .collect::<Vec<Option<CityModel>>>();
1590 for (source_id, indexed_locations) in features_by_source {
1591 let metadata = self.index.get_cached_metadata(source_id)?;
1592 let locations = indexed_locations
1593 .iter()
1594 .map(|(_, location)| location)
1595 .collect::<Vec<_>>();
1596 let source_models = self
1597 .backend
1598 .read_many(&locations, Arc::clone(&metadata.bytes))?;
1599 for ((index, _), model) in indexed_locations.into_iter().zip(source_models) {
1600 models[index] = Some(model);
1601 }
1602 }
1603
1604 Ok(models
1605 .into_iter()
1606 .map(|model| model.expect("every input feature should have a decoded model"))
1607 .collect())
1608 }
1609}
1610
1611type Meta = serde_json::Value;
1612
1613#[derive(Clone)]
1614struct CachedMetadata {
1615 value: Arc<Meta>,
1616 bytes: Arc<[u8]>,
1617}
1618
1619struct Index {
1620 conn: rusqlite::Connection,
1621 metadata_cache: Mutex<HashMap<i64, CachedMetadata>>,
1622}
1623
1624struct FeatureLocation {
1625 feature_id: String,
1626 source_id: i64,
1627 source_path: PathBuf,
1628 offset: u64,
1629 length: u64,
1630 vertices_offset: Option<u64>,
1631 vertices_length: Option<u64>,
1632 member_ranges_json: Option<String>,
1633}
1634
1635struct IndexedFeatureLocation {
1636 row_id: i64,
1637 location: FeatureLocation,
1638}
1639
1640struct IndexedFeatureRefLocation {
1641 row_id: i64,
1642 feature: IndexedFeatureRef,
1643}
1644
1645struct FeatureIndexEntry {
1646 id: String,
1647 source_id: i64,
1648 path: PathBuf,
1649 file_size: u64,
1650 file_mtime_ns: i64,
1651 offset: u64,
1652 length: u64,
1653 bounds: FeatureBounds,
1654 cityobject_count: u64,
1655 member_ranges_json: Option<String>,
1656}
1657
1658struct IndexedSourceRecord {
1659 path: PathBuf,
1660 source_size: Option<u64>,
1661 source_mtime_ns: Option<i64>,
1662}
1663
1664struct IndexedFeaturePathRecord {
1665 path: PathBuf,
1666 file_size: Option<u64>,
1667 file_mtime_ns: Option<i64>,
1668}
1669
1670struct BBoxLocationIter<'a> {
1671 index: &'a Index,
1672 bbox: BBox,
1673 last_row_id: Option<i64>,
1674 page: std::vec::IntoIter<IndexedFeatureLocation>,
1675 finished: bool,
1676}
1677
1678struct AllLocationIter<'a> {
1679 index: &'a Index,
1680 last_row_id: Option<i64>,
1681 page: std::vec::IntoIter<IndexedFeatureLocation>,
1682 finished: bool,
1683}
1684
1685struct AllFeatureRefPageIter<'a> {
1686 index: &'a Index,
1687 page_size: usize,
1688 last_row_id: Option<i64>,
1689 finished: bool,
1690}
1691
1692struct AllIndexedFeaturePageIter<'a> {
1693 city_index: &'a CityIndex,
1694 ref_pages: AllFeatureRefPageIter<'a>,
1695}
1696
1697struct AllIndexedFeatureIter<'a> {
1698 pages: AllIndexedFeaturePageIter<'a>,
1699 page: std::vec::IntoIter<IndexedFeature>,
1700 finished: bool,
1701}
1702
1703impl<'a> BBoxLocationIter<'a> {
1704 const PAGE_SIZE: usize = 512;
1705
1706 fn new(index: &'a Index, bbox: BBox) -> Self {
1707 Self {
1708 index,
1709 bbox,
1710 last_row_id: None,
1711 page: Vec::new().into_iter(),
1712 finished: false,
1713 }
1714 }
1715
1716 fn next_location(&mut self) -> Result<Option<FeatureLocation>> {
1717 if self.finished {
1718 return Ok(None);
1719 }
1720
1721 if let Some(feature) = self.page.next() {
1722 self.last_row_id = Some(feature.row_id);
1723 return Ok(Some(feature.location));
1724 }
1725
1726 let page = self
1727 .index
1728 .lookup_bbox_page(&self.bbox, self.last_row_id, Self::PAGE_SIZE)?;
1729 if page.is_empty() {
1730 self.finished = true;
1731 return Ok(None);
1732 }
1733
1734 self.page = page.into_iter();
1735 let feature = self
1736 .page
1737 .next()
1738 .expect("non-empty page should yield at least one feature");
1739 self.last_row_id = Some(feature.row_id);
1740 Ok(Some(feature.location))
1741 }
1742}
1743
1744impl<'a> AllLocationIter<'a> {
1745 const PAGE_SIZE: usize = 512;
1746
1747 fn new(index: &'a Index) -> Self {
1748 Self {
1749 index,
1750 last_row_id: None,
1751 page: Vec::new().into_iter(),
1752 finished: false,
1753 }
1754 }
1755
1756 fn next_location(&mut self) -> Result<Option<IndexedFeatureLocation>> {
1757 if self.finished {
1758 return Ok(None);
1759 }
1760
1761 if let Some(feature) = self.page.next() {
1762 self.last_row_id = Some(feature.row_id);
1763 return Ok(Some(feature));
1764 }
1765
1766 let page = self
1767 .index
1768 .lookup_all_page(self.last_row_id, Self::PAGE_SIZE)?;
1769 if page.is_empty() {
1770 self.finished = true;
1771 return Ok(None);
1772 }
1773
1774 self.page = page.into_iter();
1775 let feature = self
1776 .page
1777 .next()
1778 .expect("non-empty page should yield at least one feature");
1779 self.last_row_id = Some(feature.row_id);
1780 Ok(Some(feature))
1781 }
1782}
1783
1784impl<'a> AllFeatureRefPageIter<'a> {
1785 fn new(index: &'a Index, page_size: usize) -> Result<Self> {
1786 if page_size == 0 {
1787 return Err(import_error("page_size must be greater than zero"));
1788 }
1789 Ok(Self {
1790 index,
1791 page_size,
1792 last_row_id: None,
1793 finished: false,
1794 })
1795 }
1796
1797 fn next_page(&mut self) -> Result<Option<Vec<IndexedFeatureRef>>> {
1798 if self.finished {
1799 return Ok(None);
1800 }
1801
1802 let page = self
1803 .index
1804 .lookup_all_ref_page(self.last_row_id, self.page_size)?;
1805 if page.is_empty() {
1806 self.finished = true;
1807 return Ok(None);
1808 }
1809
1810 self.last_row_id = Some(
1811 page.last()
1812 .expect("non-empty page should yield at least one feature")
1813 .row_id,
1814 );
1815 Ok(Some(
1816 page.into_iter().map(|record| record.feature).collect(),
1817 ))
1818 }
1819}
1820
1821impl Iterator for BBoxLocationIter<'_> {
1822 type Item = Result<FeatureLocation>;
1823
1824 fn next(&mut self) -> Option<Self::Item> {
1825 match self.next_location() {
1826 Ok(Some(feature)) => Some(Ok(feature)),
1827 Ok(None) => None,
1828 Err(error) => {
1829 self.finished = true;
1830 Some(Err(error))
1831 }
1832 }
1833 }
1834}
1835
1836impl Iterator for AllLocationIter<'_> {
1837 type Item = Result<IndexedFeatureLocation>;
1838
1839 fn next(&mut self) -> Option<Self::Item> {
1840 match self.next_location() {
1841 Ok(Some(feature)) => Some(Ok(feature)),
1842 Ok(None) => None,
1843 Err(error) => {
1844 self.finished = true;
1845 Some(Err(error))
1846 }
1847 }
1848 }
1849}
1850
1851impl Iterator for AllFeatureRefPageIter<'_> {
1852 type Item = Result<Vec<IndexedFeatureRef>>;
1853
1854 fn next(&mut self) -> Option<Self::Item> {
1855 match self.next_page() {
1856 Ok(Some(page)) => Some(Ok(page)),
1857 Ok(None) => None,
1858 Err(error) => {
1859 self.finished = true;
1860 Some(Err(error))
1861 }
1862 }
1863 }
1864}
1865
1866impl Iterator for AllIndexedFeaturePageIter<'_> {
1867 type Item = Result<Vec<IndexedFeature>>;
1868
1869 fn next(&mut self) -> Option<Self::Item> {
1870 self.ref_pages
1871 .next()
1872 .map(|page| page.and_then(|refs| self.city_index.read_indexed_features(&refs)))
1873 }
1874}
1875
1876impl<'a> AllIndexedFeatureIter<'a> {
1877 fn new(pages: AllIndexedFeaturePageIter<'a>) -> Self {
1878 Self {
1879 pages,
1880 page: Vec::new().into_iter(),
1881 finished: false,
1882 }
1883 }
1884}
1885
1886impl Iterator for AllIndexedFeatureIter<'_> {
1887 type Item = Result<IndexedFeature>;
1888
1889 fn next(&mut self) -> Option<Self::Item> {
1890 if self.finished {
1891 return None;
1892 }
1893
1894 loop {
1895 if let Some(feature) = self.page.next() {
1896 return Some(Ok(feature));
1897 }
1898 match self.pages.next() {
1899 Some(Ok(page)) => {
1900 self.page = page.into_iter();
1901 }
1902 Some(Err(error)) => {
1903 self.finished = true;
1904 return Some(Err(error));
1905 }
1906 None => {
1907 self.finished = true;
1908 return None;
1909 }
1910 }
1911 }
1912 }
1913}
1914
1915impl Index {
1916 fn open(path: &Path) -> Result<Self> {
1917 if let Some(parent) = path
1918 .parent()
1919 .filter(|parent| !parent.as_os_str().is_empty())
1920 {
1921 fs::create_dir_all(parent)?;
1922 }
1923
1924 let conn = sqlite_result(rusqlite::Connection::open(path))?;
1925 sqlite_result(conn.execute_batch(
1926 r"
1927 PRAGMA foreign_keys = ON;
1928
1929 CREATE TABLE IF NOT EXISTS sources (
1930 id INTEGER PRIMARY KEY AUTOINCREMENT,
1931 path TEXT NOT NULL UNIQUE,
1932 metadata TEXT NOT NULL,
1933 vertices_offset INTEGER,
1934 vertices_length INTEGER,
1935 source_size INTEGER,
1936 source_mtime_ns INTEGER
1937 );
1938
1939 CREATE TABLE IF NOT EXISTS features (
1940 id INTEGER PRIMARY KEY AUTOINCREMENT,
1941 feature_id TEXT NOT NULL,
1942 source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
1943 path TEXT NOT NULL,
1944 file_size INTEGER,
1945 file_mtime_ns INTEGER,
1946 offset INTEGER NOT NULL,
1947 length INTEGER NOT NULL,
1948 min_z REAL,
1949 max_z REAL,
1950 cityobject_count INTEGER,
1951 member_ranges TEXT
1952 );
1953
1954 CREATE VIRTUAL TABLE IF NOT EXISTS feature_bbox
1955 USING rtree(
1956 feature_rowid,
1957 min_x,
1958 max_x,
1959 min_y,
1960 max_y
1961 );
1962
1963 CREATE TABLE IF NOT EXISTS bbox_map (
1964 feature_rowid INTEGER PRIMARY KEY,
1965 feature_id TEXT NOT NULL
1966 );
1967 ",
1968 ))?;
1969 Self::ensure_duplicate_feature_ids_allowed(&conn)?;
1970 Self::ensure_member_ranges_column(&conn)?;
1971 Self::ensure_source_status_columns(&conn)?;
1972 Self::ensure_feature_status_columns(&conn)?;
1973 Self::ensure_feature_bounds_columns(&conn)?;
1974
1975 Ok(Self {
1976 conn,
1977 metadata_cache: Mutex::new(HashMap::new()),
1978 })
1979 }
1980
1981 fn rebuild(&mut self, scans: &[SourceScan]) -> Result<()> {
1982 let tx = sqlite_result(self.conn.transaction())?;
1983 Self::clear_tables(&tx)?;
1984
1985 let mut feature_entries = Vec::new();
1986 for scan in scans {
1987 let source_id = Self::insert_source_in_tx(
1988 &tx,
1989 scan.path.as_path(),
1990 &scan.metadata,
1991 scan.vertices_offset,
1992 scan.vertices_length,
1993 scan.source_size,
1994 scan.source_mtime_ns,
1995 )?;
1996 for feature in &scan.features {
1997 feature_entries.push(FeatureIndexEntry {
1998 id: feature.id.clone(),
1999 source_id,
2000 path: feature.path.clone(),
2001 file_size: feature.file_size,
2002 file_mtime_ns: feature.file_mtime_ns,
2003 offset: feature.offset,
2004 length: feature.length,
2005 bounds: feature.bounds,
2006 cityobject_count: feature.cityobject_count,
2007 member_ranges_json: feature
2008 .member_ranges
2009 .as_ref()
2010 .map(json_string)
2011 .transpose()?,
2012 });
2013 }
2014 }
2015 Self::insert_features_in_tx(&tx, &feature_entries)?;
2016 sqlite_result(tx.commit())?;
2017
2018 self.metadata_cache
2019 .lock()
2020 .unwrap_or_else(std::sync::PoisonError::into_inner)
2021 .clear();
2022 Ok(())
2023 }
2024
2025 fn lookup_id(&self, id: &str) -> Result<Option<FeatureLocation>> {
2026 sqlite_result(
2027 self.conn
2028 .query_row(
2029 r"
2030 SELECT
2031 f.feature_id,
2032 s.id,
2033 f.path,
2034 f.offset,
2035 f.length,
2036 s.vertices_offset,
2037 s.vertices_length,
2038 f.member_ranges
2039 FROM features AS f
2040 JOIN sources AS s ON s.id = f.source_id
2041 WHERE f.feature_id = ?1
2042 ORDER BY f.id
2043 LIMIT 1
2044 ",
2045 params![id],
2046 Self::feature_location_from_row,
2047 )
2048 .optional(),
2049 )
2050 }
2051
2052 fn lookup_feature_ref(&self, id: &str) -> Result<Option<IndexedFeatureRef>> {
2053 sqlite_result(
2054 self.conn
2055 .query_row(
2056 r"
2057 SELECT
2058 f.id,
2059 f.feature_id,
2060 s.id,
2061 f.path,
2062 f.offset,
2063 f.length,
2064 s.vertices_offset,
2065 s.vertices_length,
2066 f.member_ranges,
2067 fb.min_x,
2068 fb.max_x,
2069 fb.min_y,
2070 fb.max_y,
2071 f.min_z,
2072 f.max_z
2073 FROM features AS f
2074 JOIN sources AS s ON s.id = f.source_id
2075 JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2076 WHERE f.feature_id = ?1
2077 ORDER BY f.id
2078 LIMIT 1
2079 ",
2080 params![id],
2081 Self::indexed_feature_ref_location_from_row,
2082 )
2083 .optional()
2084 .map(|maybe| maybe.map(|record| record.feature)),
2085 )
2086 }
2087
2088 fn lookup_feature_refs(&self, id: &str) -> Result<Vec<IndexedFeatureRef>> {
2089 let mut stmt = sqlite_result(self.conn.prepare(
2090 r"
2091 SELECT
2092 f.id,
2093 f.feature_id,
2094 s.id,
2095 f.path,
2096 f.offset,
2097 f.length,
2098 s.vertices_offset,
2099 s.vertices_length,
2100 f.member_ranges,
2101 fb.min_x,
2102 fb.max_x,
2103 fb.min_y,
2104 fb.max_y,
2105 f.min_z,
2106 f.max_z
2107 FROM features AS f
2108 JOIN sources AS s ON s.id = f.source_id
2109 JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2110 WHERE f.feature_id = ?1
2111 ORDER BY f.id
2112 ",
2113 ))?;
2114 let rows = sqlite_result(
2115 stmt.query_map(params![id], Self::indexed_feature_ref_location_from_row),
2116 )?;
2117 sqlite_result(rows.map(|row| row.map(|record| record.feature)).collect())
2118 }
2119
2120 fn lookup_feature_ref_by_rowid(&self, row_id: i64) -> Result<Option<IndexedFeatureRef>> {
2121 sqlite_result(
2122 self.conn
2123 .query_row(
2124 r"
2125 SELECT
2126 f.id,
2127 f.feature_id,
2128 s.id,
2129 f.path,
2130 f.offset,
2131 f.length,
2132 s.vertices_offset,
2133 s.vertices_length,
2134 f.member_ranges,
2135 fb.min_x,
2136 fb.max_x,
2137 fb.min_y,
2138 fb.max_y,
2139 f.min_z,
2140 f.max_z
2141 FROM features AS f
2142 JOIN sources AS s ON s.id = f.source_id
2143 JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2144 WHERE f.id = ?1
2145 ",
2146 params![row_id],
2147 Self::indexed_feature_ref_location_from_row,
2148 )
2149 .optional()
2150 .map(|maybe| maybe.map(|record| record.feature)),
2151 )
2152 }
2153
2154 fn lookup_feature_refs_by_rowids(
2155 &self,
2156 row_ids: &[i64],
2157 ) -> Result<Vec<Option<IndexedFeatureRef>>> {
2158 row_ids
2159 .iter()
2160 .map(|row_id| self.lookup_feature_ref_by_rowid(*row_id))
2161 .collect()
2162 }
2163
2164 fn lookup_bbox_iter(&self, bbox: BBox) -> BBoxLocationIter<'_> {
2165 BBoxLocationIter::new(self, bbox)
2166 }
2167
2168 fn lookup_all_iter(&self) -> AllLocationIter<'_> {
2169 AllLocationIter::new(self)
2170 }
2171
2172 fn lookup_all_ref_page_iter(&self, page_size: usize) -> Result<AllFeatureRefPageIter<'_>> {
2173 AllFeatureRefPageIter::new(self, page_size)
2174 }
2175
2176 fn lookup_all_ref_page_window(
2177 &self,
2178 offset: usize,
2179 limit: usize,
2180 ) -> Result<Vec<IndexedFeatureRef>> {
2181 let mut stmt = sqlite_result(self.conn.prepare(
2182 r"
2183 SELECT
2184 f.id,
2185 f.feature_id,
2186 s.id,
2187 f.path,
2188 f.offset,
2189 f.length,
2190 s.vertices_offset,
2191 s.vertices_length,
2192 f.member_ranges,
2193 fb.min_x,
2194 fb.max_x,
2195 fb.min_y,
2196 fb.max_y,
2197 f.min_z,
2198 f.max_z
2199 FROM features AS f
2200 JOIN sources AS s ON s.id = f.source_id
2201 JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2202 ORDER BY f.id
2203 LIMIT ?2 OFFSET ?1
2204 ",
2205 ))?;
2206 let rows = sqlite_result(stmt.query_map(
2207 params![offset, limit],
2208 Self::indexed_feature_ref_location_from_row,
2209 ))?;
2210 sqlite_result(rows.map(|row| row.map(|record| record.feature)).collect())
2211 }
2212
2213 fn lookup_bbox_page(
2214 &self,
2215 bbox: &BBox,
2216 after_row_id: Option<i64>,
2217 limit: usize,
2218 ) -> Result<Vec<IndexedFeatureLocation>> {
2219 let mut stmt = sqlite_result(self.conn.prepare(
2220 r"
2221 SELECT
2222 f.id,
2223 f.feature_id,
2224 s.id,
2225 f.path,
2226 f.offset,
2227 f.length,
2228 s.vertices_offset,
2229 s.vertices_length,
2230 f.member_ranges
2231 FROM feature_bbox AS fb
2232 JOIN bbox_map AS bm ON bm.feature_rowid = fb.feature_rowid
2233 JOIN features AS f ON f.id = bm.feature_rowid
2234 JOIN sources AS s ON s.id = f.source_id
2235 WHERE fb.min_x <= ?2
2236 AND fb.max_x >= ?1
2237 AND fb.min_y <= ?4
2238 AND fb.max_y >= ?3
2239 AND (?5 IS NULL OR f.id > ?5)
2240 ORDER BY f.id
2241 LIMIT ?6
2242 ",
2243 ))?;
2244 let rows = sqlite_result(stmt.query_map(
2245 params![
2246 bbox.min_x,
2247 bbox.max_x,
2248 bbox.min_y,
2249 bbox.max_y,
2250 after_row_id,
2251 limit
2252 ],
2253 Self::indexed_feature_location_from_row,
2254 ))?;
2255 sqlite_result(rows.collect())
2256 }
2257
2258 fn lookup_all_page(
2259 &self,
2260 after_row_id: Option<i64>,
2261 limit: usize,
2262 ) -> Result<Vec<IndexedFeatureLocation>> {
2263 let sql = match after_row_id {
2264 Some(_) => {
2265 r"
2266 SELECT
2267 f.id,
2268 f.feature_id,
2269 s.id,
2270 f.path,
2271 f.offset,
2272 f.length,
2273 s.vertices_offset,
2274 s.vertices_length,
2275 f.member_ranges
2276 FROM features AS f
2277 JOIN sources AS s ON s.id = f.source_id
2278 WHERE f.id > ?1
2279 ORDER BY f.id
2280 LIMIT ?2
2281 "
2282 }
2283 None => {
2284 r"
2285 SELECT
2286 f.id,
2287 f.feature_id,
2288 s.id,
2289 f.path,
2290 f.offset,
2291 f.length,
2292 s.vertices_offset,
2293 s.vertices_length,
2294 f.member_ranges
2295 FROM features AS f
2296 JOIN sources AS s ON s.id = f.source_id
2297 ORDER BY f.id
2298 LIMIT ?1
2299 "
2300 }
2301 };
2302 let mut stmt = sqlite_result(self.conn.prepare(sql))?;
2303 let rows = if let Some(after_row_id) = after_row_id {
2304 sqlite_result(stmt.query_map(
2305 params![after_row_id, limit],
2306 Self::indexed_feature_location_from_row,
2307 ))?
2308 } else {
2309 sqlite_result(stmt.query_map(params![limit], Self::indexed_feature_location_from_row))?
2310 };
2311 sqlite_result(rows.collect())
2312 }
2313
2314 fn lookup_all_ref_page(
2315 &self,
2316 after_row_id: Option<i64>,
2317 limit: usize,
2318 ) -> Result<Vec<IndexedFeatureRefLocation>> {
2319 let sql = match after_row_id {
2320 Some(_) => {
2321 r"
2322 SELECT
2323 f.id,
2324 f.feature_id,
2325 s.id,
2326 f.path,
2327 f.offset,
2328 f.length,
2329 s.vertices_offset,
2330 s.vertices_length,
2331 f.member_ranges,
2332 fb.min_x,
2333 fb.max_x,
2334 fb.min_y,
2335 fb.max_y,
2336 f.min_z,
2337 f.max_z
2338 FROM features AS f
2339 JOIN sources AS s ON s.id = f.source_id
2340 JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2341 WHERE f.id > ?1
2342 ORDER BY f.id
2343 LIMIT ?2
2344 "
2345 }
2346 None => {
2347 r"
2348 SELECT
2349 f.id,
2350 f.feature_id,
2351 s.id,
2352 f.path,
2353 f.offset,
2354 f.length,
2355 s.vertices_offset,
2356 s.vertices_length,
2357 f.member_ranges,
2358 fb.min_x,
2359 fb.max_x,
2360 fb.min_y,
2361 fb.max_y,
2362 f.min_z,
2363 f.max_z
2364 FROM features AS f
2365 JOIN sources AS s ON s.id = f.source_id
2366 JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2367 ORDER BY f.id
2368 LIMIT ?1
2369 "
2370 }
2371 };
2372 let mut stmt = sqlite_result(self.conn.prepare(sql))?;
2373 let rows = if let Some(after_row_id) = after_row_id {
2374 sqlite_result(stmt.query_map(
2375 params![after_row_id, limit],
2376 Self::indexed_feature_ref_location_from_row,
2377 ))?
2378 } else {
2379 sqlite_result(
2380 stmt.query_map(params![limit], Self::indexed_feature_ref_location_from_row),
2381 )?
2382 };
2383 sqlite_result(rows.collect())
2384 }
2385
2386 fn get_cached_metadata(&self, source_id: i64) -> Result<CachedMetadata> {
2387 if let Some(metadata) = self
2388 .metadata_cache
2389 .lock()
2390 .unwrap_or_else(std::sync::PoisonError::into_inner)
2391 .get(&source_id)
2392 .cloned()
2393 {
2394 return Ok(metadata);
2395 }
2396
2397 let metadata_json: String = sqlite_result(self.conn.query_row(
2398 "SELECT metadata FROM sources WHERE id = ?1",
2399 params![source_id],
2400 |row| row.get(0),
2401 ))?;
2402 let metadata: Meta = parse_json_str(&metadata_json)?;
2403 let metadata = CachedMetadata {
2404 value: Arc::new(metadata),
2405 bytes: Arc::from(metadata_json.into_bytes()),
2406 };
2407
2408 self.metadata_cache
2409 .lock()
2410 .unwrap_or_else(std::sync::PoisonError::into_inner)
2411 .insert(source_id, metadata.clone());
2412
2413 Ok(metadata)
2414 }
2415
2416 fn get_metadata(&self, source_id: i64) -> Result<Arc<Meta>> {
2417 self.get_cached_metadata(source_id)
2418 .map(|metadata| metadata.value)
2419 }
2420
2421 fn metadata(&self) -> Result<Vec<Arc<Meta>>> {
2422 let mut stmt = sqlite_result(self.conn.prepare("SELECT id FROM sources ORDER BY id"))?;
2423 let rows = sqlite_result(stmt.query_map([], |row| row.get::<_, i64>(0)))?;
2424 let source_ids = sqlite_result(rows.collect::<rusqlite::Result<Vec<_>>>())?;
2425 source_ids
2426 .into_iter()
2427 .map(|source_id| self.get_metadata(source_id))
2428 .collect()
2429 }
2430
2431 fn source_count(&self) -> Result<usize> {
2432 self.query_count("SELECT COUNT(*) FROM sources")
2433 }
2434
2435 fn feature_count(&self) -> Result<usize> {
2436 self.query_count("SELECT COUNT(*) FROM features")
2437 }
2438
2439 fn cityobject_count(&self) -> Result<usize> {
2440 let total = sqlite_result(self.conn.query_row(
2441 "SELECT COALESCE(SUM(cityobject_count), 0) FROM features",
2442 [],
2443 |row| row.get::<_, i64>(0),
2444 ))?;
2445 usize::try_from(total)
2446 .map_err(|_| import_error("indexed CityObject count does not fit in usize"))
2447 }
2448
2449 fn query_count(&self, sql: &str) -> Result<usize> {
2450 let count = sqlite_result(self.conn.query_row(sql, [], |row| row.get::<_, i64>(0)))?;
2451 usize::try_from(count).map_err(|_| import_error("count does not fit in usize"))
2452 }
2453
2454 fn feature_bounds_summary(&self) -> Result<Option<FeatureBoundsSummary>> {
2455 let summary = sqlite_result(self.conn.query_row(
2456 r"
2457 SELECT
2458 COUNT(*),
2459 MIN(fb.min_x),
2460 MAX(fb.max_x),
2461 MIN(fb.min_y),
2462 MAX(fb.max_y),
2463 MIN(f.min_z),
2464 MAX(f.max_z)
2465 FROM features AS f
2466 JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2467 ",
2468 [],
2469 |row| {
2470 let count = row.get::<_, i64>(0)?;
2471 if count == 0 {
2472 return Ok(None);
2473 }
2474 let feature_count = usize::try_from(count).map_err(|error| {
2475 rusqlite::Error::FromSqlConversionFailure(
2476 0,
2477 rusqlite::types::Type::Integer,
2478 Box::new(error),
2479 )
2480 })?;
2481 Ok(Some(FeatureBoundsSummary {
2482 bounds: FeatureBounds {
2483 min_x: row.get::<_, f64>(1)?,
2484 max_x: row.get::<_, f64>(2)?,
2485 min_y: row.get::<_, f64>(3)?,
2486 max_y: row.get::<_, f64>(4)?,
2487 min_z: row.get::<_, f64>(5)?,
2488 max_z: row.get::<_, f64>(6)?,
2489 },
2490 feature_count,
2491 }))
2492 },
2493 ))?;
2494 Ok(summary)
2495 }
2496
2497 fn indexed_sources(&self) -> Result<Vec<IndexedSourceRecord>> {
2498 let mut stmt = sqlite_result(self.conn.prepare(
2499 r"
2500 SELECT path, source_size, source_mtime_ns
2501 FROM sources
2502 ORDER BY path
2503 ",
2504 ))?;
2505 let rows = sqlite_result(stmt.query_map([], |row| {
2506 Ok(IndexedSourceRecord {
2507 path: PathBuf::from(row.get::<_, String>(0)?),
2508 source_size: row.get::<_, Option<i64>>(1)?.map(i64_to_u64).transpose()?,
2509 source_mtime_ns: row.get::<_, Option<i64>>(2)?,
2510 })
2511 }))?;
2512 sqlite_result(rows.collect())
2513 }
2514
2515 fn indexed_feature_paths(&self) -> Result<Vec<IndexedFeaturePathRecord>> {
2516 let mut stmt = sqlite_result(self.conn.prepare(
2517 r"
2518 SELECT DISTINCT path, file_size, file_mtime_ns
2519 FROM features
2520 ORDER BY path
2521 ",
2522 ))?;
2523 let rows = sqlite_result(stmt.query_map([], |row| {
2524 Ok(IndexedFeaturePathRecord {
2525 path: PathBuf::from(row.get::<_, String>(0)?),
2526 file_size: row.get::<_, Option<i64>>(1)?.map(i64_to_u64).transpose()?,
2527 file_mtime_ns: row.get::<_, Option<i64>>(2)?,
2528 })
2529 }))?;
2530 sqlite_result(rows.collect())
2531 }
2532
2533 fn ensure_member_ranges_column(conn: &rusqlite::Connection) -> Result<()> {
2534 let mut stmt = sqlite_result(conn.prepare("PRAGMA table_info(features)"))?;
2535 let rows = sqlite_result(stmt.query_map([], |row| row.get::<_, String>(1)))?;
2536 let columns = sqlite_result(rows.collect::<rusqlite::Result<Vec<_>>>())?;
2537 if !columns.iter().any(|column| column == "member_ranges") {
2538 sqlite_result(conn.execute("ALTER TABLE features ADD COLUMN member_ranges TEXT", []))?;
2539 }
2540 Ok(())
2541 }
2542
2543 fn ensure_source_status_columns(conn: &rusqlite::Connection) -> Result<()> {
2544 let mut stmt = sqlite_result(conn.prepare("PRAGMA table_info(sources)"))?;
2545 let rows = sqlite_result(stmt.query_map([], |row| row.get::<_, String>(1)))?;
2546 let columns = sqlite_result(rows.collect::<rusqlite::Result<Vec<_>>>())?;
2547 if !columns.iter().any(|column| column == "source_size") {
2548 sqlite_result(conn.execute("ALTER TABLE sources ADD COLUMN source_size INTEGER", []))?;
2549 }
2550 if !columns.iter().any(|column| column == "source_mtime_ns") {
2551 sqlite_result(
2552 conn.execute("ALTER TABLE sources ADD COLUMN source_mtime_ns INTEGER", []),
2553 )?;
2554 }
2555 Ok(())
2556 }
2557
2558 fn ensure_feature_status_columns(conn: &rusqlite::Connection) -> Result<()> {
2559 let mut stmt = sqlite_result(conn.prepare("PRAGMA table_info(features)"))?;
2560 let rows = sqlite_result(stmt.query_map([], |row| row.get::<_, String>(1)))?;
2561 let columns = sqlite_result(rows.collect::<rusqlite::Result<Vec<_>>>())?;
2562 if !columns.iter().any(|column| column == "file_size") {
2563 sqlite_result(conn.execute("ALTER TABLE features ADD COLUMN file_size INTEGER", []))?;
2564 }
2565 if !columns.iter().any(|column| column == "file_mtime_ns") {
2566 sqlite_result(
2567 conn.execute("ALTER TABLE features ADD COLUMN file_mtime_ns INTEGER", []),
2568 )?;
2569 }
2570 if !columns.iter().any(|column| column == "cityobject_count") {
2571 sqlite_result(conn.execute(
2572 "ALTER TABLE features ADD COLUMN cityobject_count INTEGER",
2573 [],
2574 ))?;
2575 }
2576 Ok(())
2577 }
2578
2579 fn ensure_feature_bounds_columns(conn: &rusqlite::Connection) -> Result<()> {
2580 let mut stmt = sqlite_result(conn.prepare("PRAGMA table_info(features)"))?;
2581 let rows = sqlite_result(stmt.query_map([], |row| row.get::<_, String>(1)))?;
2582 let columns = sqlite_result(rows.collect::<rusqlite::Result<Vec<_>>>())?;
2583 if !columns.iter().any(|column| column == "min_z") {
2584 sqlite_result(conn.execute("ALTER TABLE features ADD COLUMN min_z REAL", []))?;
2585 }
2586 if !columns.iter().any(|column| column == "max_z") {
2587 sqlite_result(conn.execute("ALTER TABLE features ADD COLUMN max_z REAL", []))?;
2588 }
2589 Ok(())
2590 }
2591
2592 fn feature_bounds_complete(&self) -> Result<bool> {
2593 let missing = sqlite_result(self.conn.query_row(
2594 "SELECT COUNT(*) FROM features WHERE min_z IS NULL OR max_z IS NULL",
2595 [],
2596 |row| row.get::<_, i64>(0),
2597 ))?;
2598 Ok(missing == 0)
2599 }
2600
2601 fn ensure_duplicate_feature_ids_allowed(conn: &rusqlite::Connection) -> Result<()> {
2602 if table_sql_contains(conn, "features", "feature_id TEXT NOT NULL UNIQUE")?
2603 || table_sql_contains(conn, "bbox_map", "feature_id TEXT NOT NULL UNIQUE")?
2604 {
2605 sqlite_result(conn.execute_batch(
2606 r"
2607 PRAGMA foreign_keys = OFF;
2608
2609 DROP TABLE IF EXISTS bbox_map_new;
2610 CREATE TABLE bbox_map_new (
2611 feature_rowid INTEGER PRIMARY KEY,
2612 feature_id TEXT NOT NULL
2613 );
2614 INSERT INTO bbox_map_new (feature_rowid, feature_id)
2615 SELECT feature_rowid, feature_id FROM bbox_map;
2616 DROP TABLE bbox_map;
2617 ALTER TABLE bbox_map_new RENAME TO bbox_map;
2618
2619 DROP TABLE IF EXISTS features_new;
2620 CREATE TABLE features_new (
2621 id INTEGER PRIMARY KEY AUTOINCREMENT,
2622 feature_id TEXT NOT NULL,
2623 source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
2624 path TEXT NOT NULL,
2625 file_size INTEGER,
2626 file_mtime_ns INTEGER,
2627 offset INTEGER NOT NULL,
2628 length INTEGER NOT NULL,
2629 min_z REAL,
2630 max_z REAL,
2631 cityobject_count INTEGER,
2632 member_ranges TEXT
2633 );
2634 INSERT INTO features_new (
2635 id,
2636 feature_id,
2637 source_id,
2638 path,
2639 file_size,
2640 file_mtime_ns,
2641 offset,
2642 length,
2643 min_z,
2644 max_z,
2645 cityobject_count,
2646 member_ranges
2647 )
2648 SELECT
2649 id,
2650 feature_id,
2651 source_id,
2652 path,
2653 file_size,
2654 file_mtime_ns,
2655 offset,
2656 length,
2657 min_z,
2658 max_z,
2659 cityobject_count,
2660 member_ranges
2661 FROM features;
2662 DROP TABLE features;
2663 ALTER TABLE features_new RENAME TO features;
2664
2665 PRAGMA foreign_keys = ON;
2666 ",
2667 ))?;
2668 }
2669 Ok(())
2670 }
2671
2672 fn clear_tables(tx: &rusqlite::Transaction<'_>) -> Result<()> {
2673 sqlite_result(tx.execute_batch(
2674 r"
2675 DELETE FROM bbox_map;
2676 DELETE FROM feature_bbox;
2677 DELETE FROM features;
2678 DELETE FROM sources;
2679 ",
2680 ))?;
2681 Ok(())
2682 }
2683
2684 fn insert_source_in_tx(
2685 tx: &rusqlite::Transaction<'_>,
2686 path: &Path,
2687 meta: &Meta,
2688 vertices_offset: Option<u64>,
2689 vertices_length: Option<u64>,
2690 source_size: u64,
2691 source_mtime_ns: i64,
2692 ) -> Result<i64> {
2693 let metadata_json = json_string(meta)?;
2694 let vertices_offset = sqlite_result(vertices_offset.map(u64_to_i64).transpose())?;
2695 let vertices_length = sqlite_result(vertices_length.map(u64_to_i64).transpose())?;
2696 let source_size = sqlite_result(u64_to_i64(source_size))?;
2697 sqlite_result(tx.execute(
2698 r"
2699 INSERT INTO sources (
2700 path,
2701 metadata,
2702 vertices_offset,
2703 vertices_length,
2704 source_size,
2705 source_mtime_ns
2706 )
2707 VALUES (?1, ?2, ?3, ?4, ?5, ?6)
2708 ",
2709 params![
2710 path.to_string_lossy(),
2711 metadata_json,
2712 vertices_offset,
2713 vertices_length,
2714 source_size,
2715 source_mtime_ns,
2716 ],
2717 ))?;
2718 Ok(tx.last_insert_rowid())
2719 }
2720
2721 fn insert_features_in_tx(
2722 tx: &rusqlite::Transaction<'_>,
2723 entries: &[FeatureIndexEntry],
2724 ) -> Result<()> {
2725 let mut feature_stmt = sqlite_result(tx.prepare(
2726 r"
2727 INSERT INTO features (
2728 feature_id,
2729 source_id,
2730 path,
2731 file_size,
2732 file_mtime_ns,
2733 offset,
2734 length,
2735 min_z,
2736 max_z,
2737 cityobject_count,
2738 member_ranges
2739 )
2740 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
2741 ",
2742 ))?;
2743 let mut bbox_stmt = sqlite_result(tx.prepare(
2744 r"
2745 INSERT INTO feature_bbox (feature_rowid, min_x, max_x, min_y, max_y)
2746 VALUES (?1, ?2, ?3, ?4, ?5)
2747 ",
2748 ))?;
2749 let mut map_stmt = sqlite_result(tx.prepare(
2750 r"
2751 INSERT INTO bbox_map (feature_rowid, feature_id)
2752 VALUES (?1, ?2)
2753 ",
2754 ))?;
2755 for entry in entries {
2756 let file_size = sqlite_result(u64_to_i64(entry.file_size))?;
2757 let offset = sqlite_result(u64_to_i64(entry.offset))?;
2758 let length = sqlite_result(u64_to_i64(entry.length))?;
2759 let cityobject_count = sqlite_result(u64_to_i64(entry.cityobject_count))?;
2760 sqlite_result(feature_stmt.execute(params![
2761 &entry.id,
2762 entry.source_id,
2763 entry.path.to_string_lossy(),
2764 file_size,
2765 entry.file_mtime_ns,
2766 offset,
2767 length,
2768 entry.bounds.min_z,
2769 entry.bounds.max_z,
2770 cityobject_count,
2771 &entry.member_ranges_json,
2772 ]))?;
2773 let feature_rowid = tx.last_insert_rowid();
2774 sqlite_result(bbox_stmt.execute(params![
2775 feature_rowid,
2776 entry.bounds.min_x,
2777 entry.bounds.max_x,
2778 entry.bounds.min_y,
2779 entry.bounds.max_y,
2780 ]))?;
2781 sqlite_result(map_stmt.execute(params![feature_rowid, &entry.id]))?;
2782 }
2783
2784 Ok(())
2785 }
2786
2787 fn feature_location_from_row(row: &rusqlite::Row<'_>) -> rusqlite::Result<FeatureLocation> {
2788 Self::feature_location_from_row_offset(row, 0)
2789 }
2790
2791 fn feature_location_from_row_offset(
2792 row: &rusqlite::Row<'_>,
2793 col: usize,
2794 ) -> rusqlite::Result<FeatureLocation> {
2795 let feature_id = row.get::<_, String>(col)?;
2796 let source_id = row.get::<_, i64>(col + 1)?;
2797 let source_path = PathBuf::from(row.get::<_, String>(col + 2)?);
2798 let offset = i64_to_u64(row.get::<_, i64>(col + 3)?)?;
2799 let length = i64_to_u64(row.get::<_, i64>(col + 4)?)?;
2800 let vertices_offset = match row.get::<_, Option<i64>>(col + 5)? {
2801 Some(value) => Some(i64_to_u64(value)?),
2802 None => None,
2803 };
2804 let vertices_length = match row.get::<_, Option<i64>>(col + 6)? {
2805 Some(value) => Some(i64_to_u64(value)?),
2806 None => None,
2807 };
2808 let member_ranges_json = row.get::<_, Option<String>>(col + 7)?;
2809
2810 Ok(FeatureLocation {
2811 feature_id,
2812 source_id,
2813 source_path,
2814 offset,
2815 length,
2816 vertices_offset,
2817 vertices_length,
2818 member_ranges_json,
2819 })
2820 }
2821
2822 fn indexed_feature_location_from_row(
2823 row: &rusqlite::Row<'_>,
2824 ) -> rusqlite::Result<IndexedFeatureLocation> {
2825 let row_id = row.get::<_, i64>(0)?;
2826 let location = Self::feature_location_from_row_offset(row, 1)?;
2827 Ok(IndexedFeatureLocation { row_id, location })
2828 }
2829
2830 fn indexed_feature_ref_location_from_row(
2831 row: &rusqlite::Row<'_>,
2832 ) -> rusqlite::Result<IndexedFeatureRefLocation> {
2833 let row_id = row.get::<_, i64>(0)?;
2834 let feature_id = row.get::<_, String>(1)?;
2835 let source_id = row.get::<_, i64>(2)?;
2836 let source_path = PathBuf::from(row.get::<_, String>(3)?);
2837 let offset = i64_to_u64(row.get::<_, i64>(4)?)?;
2838 let length = i64_to_u64(row.get::<_, i64>(5)?)?;
2839 let vertices_offset = match row.get::<_, Option<i64>>(6)? {
2840 Some(value) => Some(i64_to_u64(value)?),
2841 None => None,
2842 };
2843 let vertices_length = match row.get::<_, Option<i64>>(7)? {
2844 Some(value) => Some(i64_to_u64(value)?),
2845 None => None,
2846 };
2847 let member_ranges_json = row.get::<_, Option<String>>(8)?;
2848 let bounds = FeatureBounds {
2849 min_x: row.get::<_, f64>(9)?,
2850 max_x: row.get::<_, f64>(10)?,
2851 min_y: row.get::<_, f64>(11)?,
2852 max_y: row.get::<_, f64>(12)?,
2853 min_z: row.get::<_, f64>(13)?,
2854 max_z: row.get::<_, f64>(14)?,
2855 };
2856
2857 Ok(IndexedFeatureRefLocation {
2858 row_id,
2859 feature: IndexedFeatureRef {
2860 row_id,
2861 feature_id,
2862 source_id,
2863 source_path,
2864 offset,
2865 length,
2866 vertices_offset,
2867 vertices_length,
2868 member_ranges_json,
2869 bounds,
2870 },
2871 })
2872 }
2873}
2874
2875trait StorageBackend: Send + Sync {
2876 fn scan(&self, worker_count: usize) -> Result<Vec<SourceScan>>;
2877 fn read_one(&self, loc: &FeatureLocation, metadata_bytes: Arc<[u8]>) -> Result<CityModel>;
2878 fn read_many(
2879 &self,
2880 locations: &[&FeatureLocation],
2881 metadata_bytes: Arc<[u8]>,
2882 ) -> Result<Vec<CityModel>> {
2883 locations
2884 .iter()
2885 .map(|loc| self.read_one(loc, Arc::clone(&metadata_bytes)))
2886 .collect()
2887 }
2888}
2889
2890struct SourceScan {
2891 path: PathBuf,
2892 metadata: Meta,
2893 vertices_offset: Option<u64>,
2894 vertices_length: Option<u64>,
2895 source_size: u64,
2896 source_mtime_ns: i64,
2897 features: Vec<ScannedFeature>,
2898}
2899
2900struct ScannedFeature {
2901 id: String,
2902 path: PathBuf,
2903 file_size: u64,
2904 file_mtime_ns: i64,
2905 offset: u64,
2906 length: u64,
2907 bounds: FeatureBounds,
2908 cityobject_count: u64,
2909 member_ranges: Option<Vec<IndexedObjectRange>>,
2910}
2911
2912#[derive(Clone, Debug, Serialize, Deserialize)]
2913struct IndexedObjectRange {
2914 id: String,
2915 offset: u64,
2916 length: u64,
2917}
2918
2919struct LocalizedFeatureParts {
2920 feature_id: String,
2921 cityobjects: Vec<LocalizedFeatureObject>,
2922 vertices: Vec<[i64; 3]>,
2923}
2924
2925struct LocalizedFeatureObject {
2926 id: String,
2927 object_json: Box<RawValue>,
2928}
2929
2930struct NdjsonBackend {
2931 paths: Vec<PathBuf>,
2932}
2933
2934impl StorageBackend for NdjsonBackend {
2935 fn scan(&self, worker_count: usize) -> Result<Vec<SourceScan>> {
2936 let paths = collect_layout_files(&self.paths, ".jsonl")?;
2937 parallel_scan_items(&paths, worker_count, |path| {
2938 scan_ndjson_source(path.as_path())
2939 })
2940 }
2941
2942 fn read_one(&self, loc: &FeatureLocation, metadata_bytes: Arc<[u8]>) -> Result<CityModel> {
2943 let bytes = read_exact_range(&loc.source_path, loc.offset, loc.length)?;
2944 feature_slice_with_indexed_id(&bytes, loc, metadata_bytes.as_ref())
2945 }
2946
2947 fn read_many(
2948 &self,
2949 locations: &[&FeatureLocation],
2950 metadata_bytes: Arc<[u8]>,
2951 ) -> Result<Vec<CityModel>> {
2952 read_feature_slices_with_base(locations, metadata_bytes.as_ref())
2953 }
2954}
2955
2956struct CityJsonBackend {
2957 paths: Vec<PathBuf>,
2958 vertices_cache: Mutex<LruCache<PathBuf, Arc<Vec<[i64; 3]>>>>,
2959}
2960
2961impl CityJsonBackend {
2962 fn new(paths: Vec<PathBuf>) -> Self {
2963 Self {
2964 paths,
2965 vertices_cache: Mutex::new(LruCache::unbounded()),
2966 }
2967 }
2968
2969 fn load_shared_vertices(
2970 &self,
2971 source_path: &Path,
2972 source_file: &mut fs::File,
2973 offset: u64,
2974 length: u64,
2975 ) -> Result<Arc<Vec<[i64; 3]>>> {
2976 let mut cache = self
2977 .vertices_cache
2978 .lock()
2979 .unwrap_or_else(std::sync::PoisonError::into_inner);
2980 if let Some(vertices) = cache.get(source_path) {
2981 return Ok(Arc::clone(vertices));
2982 }
2983
2984 let vertices_bytes = read_exact_range_from_file(source_file, source_path, offset, length)?;
2985 let vertices = Arc::new(parse_vertices_fragment(&vertices_bytes)?);
2986 cache.put(source_path.to_path_buf(), Arc::clone(&vertices));
2987 Ok(vertices)
2988 }
2989}
2990
2991impl StorageBackend for CityJsonBackend {
2992 fn scan(&self, worker_count: usize) -> Result<Vec<SourceScan>> {
2993 let _ = &self.vertices_cache;
2994 let paths = collect_layout_files(&self.paths, ".city.json")?;
2995 parallel_scan_items(&paths, worker_count, |path| {
2996 scan_cityjson_source(path.as_path())
2997 })
2998 }
2999
3000 fn read_one(&self, loc: &FeatureLocation, metadata_bytes: Arc<[u8]>) -> Result<CityModel> {
3001 let mut source_file = fs::File::open(&loc.source_path)?;
3002 self.read_one_from_file(loc, metadata_bytes.as_ref(), &mut source_file)
3003 }
3004
3005 fn read_many(
3006 &self,
3007 locations: &[&FeatureLocation],
3008 metadata_bytes: Arc<[u8]>,
3009 ) -> Result<Vec<CityModel>> {
3010 let mut locations_by_path: BTreeMap<PathBuf, Vec<usize>> = BTreeMap::new();
3011 for (index, location) in locations.iter().enumerate() {
3012 locations_by_path
3013 .entry(location.source_path.clone())
3014 .or_default()
3015 .push(index);
3016 }
3017
3018 let mut models = std::iter::repeat_with(|| None)
3019 .take(locations.len())
3020 .collect::<Vec<Option<CityModel>>>();
3021 for (path, mut indexes) in locations_by_path {
3022 indexes.sort_by_key(|index| locations[*index].offset);
3023 let mut source_file = fs::File::open(&path)?;
3024 for index in indexes {
3025 let model = self.read_one_from_file(
3026 locations[index],
3027 metadata_bytes.as_ref(),
3028 &mut source_file,
3029 )?;
3030 models[index] = Some(model);
3031 }
3032 }
3033
3034 Ok(models
3035 .into_iter()
3036 .map(|model| model.expect("every input feature should have a decoded model"))
3037 .collect())
3038 }
3039}
3040
3041impl CityJsonBackend {
3042 fn read_one_from_file(
3043 &self,
3044 loc: &FeatureLocation,
3045 metadata_bytes: &[u8],
3046 source_file: &mut fs::File,
3047 ) -> Result<CityModel> {
3048 let vertices_offset = loc.vertices_offset.ok_or_else(|| {
3049 Error::UnsupportedFeature(
3050 "regular CityJSON reads require an indexed shared vertices range".into(),
3051 )
3052 })?;
3053 let vertices_length = loc.vertices_length.ok_or_else(|| {
3054 Error::UnsupportedFeature(
3055 "regular CityJSON reads require an indexed shared vertices range".into(),
3056 )
3057 })?;
3058
3059 let member_ranges = loc
3060 .member_ranges_json
3061 .as_deref()
3062 .map(parse_json_str::<Vec<IndexedObjectRange>>)
3063 .transpose()?
3064 .unwrap_or_else(|| {
3065 vec![IndexedObjectRange {
3066 id: loc.feature_id.clone(),
3067 offset: loc.offset,
3068 length: loc.length,
3069 }]
3070 });
3071 let mut object_entries = Vec::with_capacity(member_ranges.len());
3072 for member_range in &member_ranges {
3073 let object_fragment = read_exact_range_from_file(
3074 source_file,
3075 &loc.source_path,
3076 member_range.offset,
3077 member_range.length,
3078 )?;
3079 let (object_id, object_value) = parse_cityobject_entry(&object_fragment)?;
3080 if object_id != member_range.id {
3081 return Err(import_error(format!(
3082 "indexed CityJSON member {} resolved to fragment for {}",
3083 member_range.id, object_id
3084 )));
3085 }
3086 object_entries.push((object_id, object_value));
3087 }
3088 let shared_vertices = self.load_shared_vertices(
3089 &loc.source_path,
3090 source_file,
3091 vertices_offset,
3092 vertices_length,
3093 )?;
3094 let feature_parts =
3095 build_feature_parts(&loc.feature_id, object_entries, shared_vertices.as_ref())?;
3096 let cityobjects = feature_parts
3097 .cityobjects
3098 .iter()
3099 .map(|cityobject| staged::FeatureObjectFragment {
3100 id: cityobject.id.as_str(),
3101 object: cityobject.object_json.as_ref(),
3102 })
3103 .collect::<Vec<_>>();
3104 let assembly = staged::FeatureAssembly {
3105 id: feature_parts.feature_id.as_str(),
3106 cityobjects: &cityobjects,
3107 vertices: &feature_parts.vertices,
3108 };
3109
3110 staged::from_feature_assembly_with_base(assembly, metadata_bytes)
3111 }
3112}
3113
3114struct FeatureFilesBackend {
3115 root: PathBuf,
3116 metadata_glob: GlobMatcher,
3117 feature_glob: GlobMatcher,
3118}
3119
3120struct FeatureFileSourcePlan {
3121 path: PathBuf,
3122 metadata: Meta,
3123 source_size: u64,
3124 source_mtime_ns: i64,
3125 feature_paths: Vec<PathBuf>,
3126}
3127
3128struct FeatureFileScanItem<'a> {
3129 source_index: usize,
3130 metadata: &'a Meta,
3131 path: &'a Path,
3132}
3133
3134impl FeatureFilesBackend {
3135 fn new(root: PathBuf, metadata_glob: &str, feature_glob: &str) -> Self {
3136 let metadata_glob = globset::Glob::new(metadata_glob)
3137 .expect("metadata glob must be valid")
3138 .compile_matcher();
3139 let feature_glob = globset::Glob::new(feature_glob)
3140 .expect("feature glob must be valid")
3141 .compile_matcher();
3142 Self {
3143 root,
3144 metadata_glob,
3145 feature_glob,
3146 }
3147 }
3148}
3149
3150impl StorageBackend for FeatureFilesBackend {
3151 fn scan(&self, worker_count: usize) -> Result<Vec<SourceScan>> {
3152 scan_feature_files_root(
3153 &self.root,
3154 &self.metadata_glob,
3155 &self.feature_glob,
3156 worker_count,
3157 )
3158 }
3159
3160 fn read_one(&self, loc: &FeatureLocation, metadata_bytes: Arc<[u8]>) -> Result<CityModel> {
3161 let feature_bytes = read_exact_range(&loc.source_path, loc.offset, loc.length)?;
3162 feature_slice_with_indexed_id(&feature_bytes, loc, metadata_bytes.as_ref())
3163 }
3164
3165 fn read_many(
3166 &self,
3167 locations: &[&FeatureLocation],
3168 metadata_bytes: Arc<[u8]>,
3169 ) -> Result<Vec<CityModel>> {
3170 read_feature_slices_with_base(locations, metadata_bytes.as_ref())
3171 }
3172}
3173
3174fn scan_feature_files_root(
3175 root: &Path,
3176 metadata_glob: &GlobMatcher,
3177 feature_glob: &GlobMatcher,
3178 worker_count: usize,
3179) -> Result<Vec<SourceScan>> {
3180 let plans = discover_feature_file_sources(root, metadata_glob, feature_glob)?;
3181 let mut sources = plans
3182 .iter()
3183 .map(|plan| SourceScan {
3184 path: plan.path.clone(),
3185 metadata: plan.metadata.clone(),
3186 vertices_offset: None,
3187 vertices_length: None,
3188 source_size: plan.source_size,
3189 source_mtime_ns: plan.source_mtime_ns,
3190 features: Vec::with_capacity(plan.feature_paths.len()),
3191 })
3192 .collect::<Vec<_>>();
3193 let scan_items = plans
3194 .iter()
3195 .enumerate()
3196 .flat_map(|(source_index, plan)| {
3197 plan.feature_paths
3198 .iter()
3199 .map(move |path| FeatureFileScanItem {
3200 source_index,
3201 metadata: &plan.metadata,
3202 path: path.as_path(),
3203 })
3204 })
3205 .collect::<Vec<_>>();
3206 let features = parallel_scan_items(&scan_items, worker_count, scan_feature_file)?;
3207 for (source_index, features) in features {
3208 sources[source_index].features.extend(features);
3209 }
3210 Ok(sources)
3211}
3212
3213fn discover_feature_file_sources(
3214 root: &Path,
3215 metadata_glob: &GlobMatcher,
3216 feature_glob: &GlobMatcher,
3217) -> Result<Vec<FeatureFileSourcePlan>> {
3218 let mut metadata_files = Vec::new();
3219 let mut feature_files = Vec::new();
3220
3221 for entry in WalkBuilder::new(root)
3222 .hidden(false)
3223 .follow_links(true)
3224 .build()
3225 {
3226 let entry = entry.map_err(|error| import_error(error.to_string()))?;
3227 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
3228 continue;
3229 }
3230 if entry.metadata().is_ok_and(|meta| meta.len() == 0) {
3231 continue;
3232 }
3233 let path = entry.into_path();
3234 let rel = path.strip_prefix(root).unwrap_or(path.as_path());
3235 if metadata_glob.is_match(rel) {
3236 metadata_files.push(path);
3237 } else if feature_glob.is_match(rel) {
3238 feature_files.push(path);
3239 }
3240 }
3241
3242 metadata_files.sort();
3243 feature_files.sort();
3244
3245 if metadata_files.is_empty() {
3246 return Err(import_error(format!(
3247 "feature-files root {} does not contain any metadata files",
3248 root.display()
3249 )));
3250 }
3251
3252 let mut metadata_by_dir = BTreeMap::new();
3253 let mut sources = BTreeMap::new();
3254
3255 for metadata_path in metadata_files {
3256 let metadata: Meta = read_json(&metadata_path)?;
3257 let (source_size, source_mtime_ns) = file_status(&metadata_path)?;
3258 let parent = metadata_path.parent().unwrap_or(root).to_path_buf();
3259 metadata_by_dir.insert(parent, metadata_path.clone());
3260 sources.insert(
3261 metadata_path.clone(),
3262 FeatureFileSourcePlan {
3263 path: metadata_path,
3264 metadata,
3265 source_size,
3266 source_mtime_ns,
3267 feature_paths: Vec::new(),
3268 },
3269 );
3270 }
3271
3272 for feature_path in feature_files {
3273 let metadata_path = resolve_feature_metadata_path(root, &feature_path, &metadata_by_dir)
3274 .ok_or_else(|| {
3275 import_error(format!(
3276 "no ancestor metadata file found for feature {}",
3277 feature_path.display()
3278 ))
3279 })?;
3280 let source = sources.get_mut(&metadata_path).ok_or_else(|| {
3281 import_error(format!(
3282 "feature {} resolved to missing metadata source {}",
3283 feature_path.display(),
3284 metadata_path.display()
3285 ))
3286 })?;
3287 source.feature_paths.push(feature_path);
3288 }
3289
3290 Ok(sources.into_values().collect())
3291}
3292
3293fn scan_feature_file(item: &FeatureFileScanItem<'_>) -> Result<(usize, Vec<ScannedFeature>)> {
3294 let feature: Value = read_json(item.path)?;
3295 let (ids, bounds, cityobject_count) = parse_feature_file_bounds(&feature, item.metadata)?;
3296 let (file_size, file_mtime_ns) = file_status(item.path)?;
3297 let features = ids
3298 .into_iter()
3299 .map(|id| ScannedFeature {
3300 id,
3301 path: item.path.to_path_buf(),
3302 file_size,
3303 file_mtime_ns,
3304 offset: 0,
3305 length: file_size,
3306 bounds,
3307 cityobject_count,
3308 member_ranges: None,
3309 })
3310 .collect();
3311 Ok((item.source_index, features))
3312}
3313
3314fn resolve_feature_metadata_path(
3315 root: &Path,
3316 feature_path: &Path,
3317 metadata_by_dir: &BTreeMap<PathBuf, PathBuf>,
3318) -> Option<PathBuf> {
3319 let mut current = feature_path.parent();
3320 while let Some(dir) = current {
3321 if let Some(metadata_path) = metadata_by_dir.get(dir) {
3322 return Some(metadata_path.clone());
3323 }
3324 if dir == root {
3325 break;
3326 }
3327 current = dir.parent();
3328 }
3329 None
3330}
3331
3332fn parse_feature_file_bounds(
3333 feature: &Value,
3334 metadata: &Meta,
3335) -> Result<(Vec<String>, FeatureBounds, u64)> {
3336 let ids = feature_cityobject_keys(feature, "feature file")?;
3337 let vertices = feature
3338 .get("vertices")
3339 .cloned()
3340 .ok_or_else(|| import_error("feature file is missing vertices"))?;
3341 let vertices: Vec<[i64; 3]> = parse_json_value(vertices)?;
3342
3343 let referenced_vertices = collect_feature_vertex_indices(feature, vertices.len())?;
3344 let (scale, translate) = parse_ndjson_transform(metadata)?;
3345 let bounds = feature_bounds_from_vertices(&vertices, &referenced_vertices, scale, translate)?;
3346 let cityobject_count = feature_cityobject_count(feature, "feature file")?;
3347 Ok((ids, bounds, cityobject_count))
3348}
3349
3350fn trim_fragment_delimiters(bytes: &[u8]) -> &[u8] {
3351 let mut start = 0;
3352 let mut end = bytes.len();
3353
3354 while start < end && (bytes[start].is_ascii_whitespace() || bytes[start] == b',') {
3355 start += 1;
3356 }
3357 while end > start && (bytes[end - 1].is_ascii_whitespace() || bytes[end - 1] == b',') {
3358 end -= 1;
3359 }
3360
3361 &bytes[start..end]
3362}
3363
3364fn parse_cityobject_entry(fragment: &[u8]) -> Result<(String, Value)> {
3365 let fragment = trim_fragment_delimiters(fragment);
3366 if fragment.is_empty() {
3367 return Err(import_error("CityObject entry fragment is empty"));
3368 }
3369
3370 let mut wrapped = Vec::with_capacity(fragment.len() + 2);
3371 wrapped.push(b'{');
3372 wrapped.extend_from_slice(fragment);
3373 wrapped.push(b'}');
3374
3375 let entry: Map<String, Value> = parse_json_slice(&wrapped)?;
3376 if entry.len() != 1 {
3377 return Err(import_error(
3378 "CityObject entry fragment must contain exactly one object entry",
3379 ));
3380 }
3381
3382 let (object_id, object_value) = entry
3383 .into_iter()
3384 .next()
3385 .ok_or_else(|| import_error("CityObject entry fragment is empty"))?;
3386 if !object_value.is_object() {
3387 return Err(import_error("CityObject entry value must be a JSON object"));
3388 }
3389
3390 Ok((object_id, object_value))
3391}
3392
3393fn parse_vertices_fragment(fragment: &[u8]) -> Result<Vec<[i64; 3]>> {
3394 let fragment = trim_fragment_delimiters(fragment);
3395 if fragment.is_empty() {
3396 return Err(import_error("shared vertices fragment is empty"));
3397 }
3398 parse_json_slice(fragment)
3399}
3400
3401fn build_feature_parts(
3402 feature_id: &str,
3403 mut object_entries: Vec<(String, Value)>,
3404 shared_vertices: &[[i64; 3]],
3405) -> Result<LocalizedFeatureParts> {
3406 let retained_ids = object_entries
3407 .iter()
3408 .map(|(id, _)| id.clone())
3409 .collect::<BTreeSet<_>>();
3410
3411 for (_, object_value) in &mut object_entries {
3412 filter_local_relationships(object_value, &retained_ids)?;
3413 }
3414
3415 let mut referenced_vertices = BTreeSet::new();
3416 for (_, object_value) in &object_entries {
3417 collect_object_vertex_indices(object_value, &mut referenced_vertices)?;
3418 }
3419
3420 let local_vertices = build_local_vertices(shared_vertices, &referenced_vertices)?;
3421 let remap = referenced_vertices
3422 .iter()
3423 .enumerate()
3424 .map(|(new_index, old_index)| (*old_index, new_index))
3425 .collect::<HashMap<_, _>>();
3426
3427 for (_, object_value) in &mut object_entries {
3428 if let Some(geometries) = object_value
3429 .as_object_mut()
3430 .and_then(|object| object.get_mut("geometry"))
3431 .and_then(Value::as_array_mut)
3432 {
3433 for geometry in geometries {
3434 if let Some(boundaries) = geometry.get_mut("boundaries") {
3435 remap_vertex_indices(boundaries, &remap)?;
3436 }
3437 }
3438 }
3439 }
3440
3441 let cityobjects = object_entries
3442 .into_iter()
3443 .map(|(id, object_value)| {
3444 let object_json = RawValue::from_string(json_string(&object_value)?)
3445 .map_err(|error| import_error(error.to_string()))?;
3446 Ok(LocalizedFeatureObject { id, object_json })
3447 })
3448 .collect::<Result<Vec<_>>>()?;
3449
3450 Ok(LocalizedFeatureParts {
3451 feature_id: feature_id.to_owned(),
3452 cityobjects,
3453 vertices: local_vertices,
3454 })
3455}
3456
3457fn filter_local_relationships(
3458 object_value: &mut Value,
3459 retained_ids: &BTreeSet<String>,
3460) -> Result<()> {
3461 let object = object_value
3462 .as_object_mut()
3463 .ok_or_else(|| import_error("CityObject value must be a JSON object"))?;
3464
3465 for key in ["children", "parents"] {
3466 let remove_key = match object.get_mut(key) {
3467 Some(value) => {
3468 let refs = value
3469 .as_array_mut()
3470 .ok_or_else(|| import_error(format!("{key} must be an array")))?;
3471 refs.retain(|entry| {
3472 entry
3473 .as_str()
3474 .is_some_and(|object_id| retained_ids.contains(object_id))
3475 });
3476 refs.is_empty()
3477 }
3478 None => false,
3479 };
3480
3481 if remove_key {
3482 object.remove(key);
3483 }
3484 }
3485
3486 Ok(())
3487}
3488
3489fn collect_vertex_indices(value: &Value, indices: &mut BTreeSet<usize>) -> Result<()> {
3490 match value {
3491 Value::Array(items) => {
3492 for item in items {
3493 collect_vertex_indices(item, indices)?;
3494 }
3495 Ok(())
3496 }
3497 Value::Number(number) => {
3498 indices.insert(number_to_index(number)?);
3499 Ok(())
3500 }
3501 Value::Null => Ok(()),
3502 other => Err(import_error(format!(
3503 "boundary values must be arrays or non-negative integers, found {}",
3504 value_kind(other)
3505 ))),
3506 }
3507}
3508
3509fn remap_vertex_indices(value: &mut Value, remap: &HashMap<usize, usize>) -> Result<()> {
3510 match value {
3511 Value::Array(items) => {
3512 for item in items {
3513 remap_vertex_indices(item, remap)?;
3514 }
3515 Ok(())
3516 }
3517 Value::Number(number) => {
3518 let old_index = number_to_index(number)?;
3519 let new_index = remap.get(&old_index).copied().ok_or_else(|| {
3520 import_error(format!(
3521 "missing remap entry for referenced vertex index {old_index}"
3522 ))
3523 })?;
3524 *value =
3525 Value::Number(Number::from(u64::try_from(new_index).map_err(|_| {
3526 import_error("localized vertex index does not fit in u64")
3527 })?));
3528 Ok(())
3529 }
3530 Value::Null => Ok(()),
3531 other => Err(import_error(format!(
3532 "boundary values must be arrays or non-negative integers, found {}",
3533 value_kind(other)
3534 ))),
3535 }
3536}
3537
3538fn build_local_vertices(
3539 shared_vertices: &[[i64; 3]],
3540 referenced_vertices: &BTreeSet<usize>,
3541) -> Result<Vec<[i64; 3]>> {
3542 let mut vertices = Vec::with_capacity(referenced_vertices.len());
3543
3544 for &index in referenced_vertices {
3545 let vertex = shared_vertices.get(index).copied().ok_or_else(|| {
3546 import_error(format!(
3547 "vertex index {index} is outside the shared vertices array"
3548 ))
3549 })?;
3550 vertices.push(vertex);
3551 }
3552
3553 Ok(vertices)
3554}
3555
3556fn number_to_index(number: &Number) -> Result<usize> {
3557 let index = number
3558 .as_u64()
3559 .ok_or_else(|| import_error("boundary vertex indices must be non-negative integers"))?;
3560 usize::try_from(index)
3561 .map_err(|_| import_error(format!("vertex index {index} does not fit in usize")))
3562}
3563
3564fn value_kind(value: &Value) -> &'static str {
3565 match value {
3566 Value::Null => "null",
3567 Value::Bool(_) => "bool",
3568 Value::Number(_) => "number",
3569 Value::String(_) => "string",
3570 Value::Array(_) => "array",
3571 Value::Object(_) => "object",
3572 }
3573}
3574
3575fn import_error(message: impl Into<String>) -> Error {
3576 Error::Import(message.into())
3577}
3578
3579pub fn configured_worker_count() -> Result<usize> {
3585 match std::env::var(WORKER_COUNT_ENV) {
3586 Ok(value) => {
3587 let worker_count = value.parse::<usize>().map_err(|error| {
3588 import_error(format!(
3589 "{WORKER_COUNT_ENV} must be a positive integer: {error}"
3590 ))
3591 })?;
3592 if worker_count == 0 {
3593 return Err(import_error(format!(
3594 "{WORKER_COUNT_ENV} must be greater than zero"
3595 )));
3596 }
3597 Ok(worker_count)
3598 }
3599 Err(std::env::VarError::NotPresent) => {
3600 Ok(std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get))
3601 }
3602 Err(std::env::VarError::NotUnicode(_)) => Err(import_error(format!(
3603 "{WORKER_COUNT_ENV} must contain valid UTF-8"
3604 ))),
3605 }
3606}
3607
3608fn parallel_scan_items<T, U, F>(items: &[T], worker_count: usize, scan: F) -> Result<Vec<U>>
3609where
3610 T: Sync,
3611 U: Send,
3612 F: Fn(&T) -> Result<U> + Sync,
3613{
3614 if items.is_empty() {
3615 return Ok(Vec::new());
3616 }
3617
3618 let shard_count = worker_count.max(1).min(items.len());
3619 if shard_count == 1 {
3620 return items.iter().map(scan).collect();
3621 }
3622
3623 let chunk_size = items.len().div_ceil(shard_count);
3624 std::thread::scope(|scope| -> Result<Vec<U>> {
3625 let mut handles = Vec::with_capacity(shard_count);
3626 let scan = &scan;
3627 for shard in items.chunks(chunk_size) {
3628 handles.push(scope.spawn(move || {
3629 let mut shard_results = Vec::with_capacity(shard.len());
3630 for item in shard {
3631 shard_results.push(scan(item)?);
3632 }
3633 Ok::<Vec<U>, Error>(shard_results)
3634 }));
3635 }
3636
3637 let mut results = Vec::with_capacity(items.len());
3638 for handle in handles {
3639 let shard_results = handle
3640 .join()
3641 .map_err(|_| import_error("parallel scan worker panicked"))??;
3642 results.extend(shard_results);
3643 }
3644 Ok(results)
3645 })
3646}
3647
3648fn serde_json_error(error: &serde_json::Error) -> Error {
3649 import_error(error.to_string())
3650}
3651
3652fn parse_json_slice<T: DeserializeOwned>(bytes: &[u8]) -> Result<T> {
3653 serde_json::from_slice(bytes).map_err(|error| serde_json_error(&error))
3654}
3655
3656fn parse_json_str<T: DeserializeOwned>(value: &str) -> Result<T> {
3657 serde_json::from_str(value).map_err(|error| serde_json_error(&error))
3658}
3659
3660fn parse_json_value<T: DeserializeOwned>(value: Value) -> Result<T> {
3661 serde_json::from_value(value).map_err(|error| serde_json_error(&error))
3662}
3663
3664fn json_string<T: Serialize + ?Sized>(value: &T) -> Result<String> {
3665 serde_json::to_string(value).map_err(|error| serde_json_error(&error))
3666}
3667
3668fn table_sql_contains(conn: &rusqlite::Connection, table: &str, needle: &str) -> Result<bool> {
3669 let sql = sqlite_result(
3670 conn.query_row(
3671 "SELECT sql FROM sqlite_master WHERE type = 'table' AND name = ?1",
3672 params![table],
3673 |row| row.get::<_, Option<String>>(0),
3674 )
3675 .optional(),
3676 )?
3677 .flatten()
3678 .unwrap_or_default();
3679 Ok(sql.contains(needle))
3680}
3681
3682fn read_exact_range(path: &Path, offset: u64, length: u64) -> Result<Vec<u8>> {
3683 let mut file = fs::File::open(path)
3684 .map_err(|error| import_error(format!("failed to open {}: {error}", path.display())))?;
3685 read_exact_range_from_file(&mut file, path, offset, length)
3686}
3687
3688fn read_feature_slices_with_base(
3689 locations: &[&FeatureLocation],
3690 metadata_bytes: &[u8],
3691) -> Result<Vec<CityModel>> {
3692 let mut locations_by_path: BTreeMap<PathBuf, Vec<usize>> = BTreeMap::new();
3693 for (index, location) in locations.iter().enumerate() {
3694 locations_by_path
3695 .entry(location.source_path.clone())
3696 .or_default()
3697 .push(index);
3698 }
3699
3700 let mut models = std::iter::repeat_with(|| None)
3701 .take(locations.len())
3702 .collect::<Vec<Option<CityModel>>>();
3703 for (path, mut indexes) in locations_by_path {
3704 indexes.sort_by_key(|index| locations[*index].offset);
3705 let mut file = fs::File::open(&path)
3706 .map_err(|error| import_error(format!("failed to open {}: {error}", path.display())))?;
3707 for index in indexes {
3708 let location = locations[index];
3709 let feature_bytes =
3710 read_exact_range_from_file(&mut file, &path, location.offset, location.length)?;
3711 let model = feature_slice_with_indexed_id(&feature_bytes, location, metadata_bytes)?;
3712 models[index] = Some(model);
3713 }
3714 }
3715
3716 Ok(models
3717 .into_iter()
3718 .map(|model| model.expect("every input feature should have a decoded model"))
3719 .collect())
3720}
3721
3722fn feature_slice_with_indexed_id(
3723 feature_bytes: &[u8],
3724 loc: &FeatureLocation,
3725 metadata_bytes: &[u8],
3726) -> Result<CityModel> {
3727 staged::from_feature_slice_with_indexed_id_and_base(
3728 feature_bytes,
3729 loc.feature_id.as_str(),
3730 metadata_bytes,
3731 )
3732}
3733
3734fn read_exact_range_from_file(
3735 file: &mut fs::File,
3736 path: &Path,
3737 offset: u64,
3738 length: u64,
3739) -> Result<Vec<u8>> {
3740 let length = usize::try_from(length).map_err(|_| {
3741 import_error(format!(
3742 "requested read of {length} bytes from {} exceeds the supported buffer size",
3743 path.display()
3744 ))
3745 })?;
3746 if length > isize::MAX as usize {
3747 return Err(import_error(format!(
3748 "requested read of {length} bytes from {} exceeds the supported buffer size",
3749 path.display()
3750 )));
3751 }
3752
3753 let mut bytes = Vec::new();
3754 bytes.try_reserve_exact(length).map_err(|error| {
3755 import_error(format!(
3756 "failed to allocate buffer for {} bytes from {}: {error}",
3757 length,
3758 path.display()
3759 ))
3760 })?;
3761 bytes.resize(length, 0);
3762
3763 file.seek(SeekFrom::Start(offset)).map_err(|error| {
3764 import_error(format!(
3765 "failed to seek to byte offset {offset} in {}: {error}",
3766 path.display()
3767 ))
3768 })?;
3769 file.read_exact(&mut bytes).map_err(|error| {
3770 if error.kind() == ErrorKind::UnexpectedEof {
3771 import_error(format!(
3772 "short read while reading {length} bytes at offset {offset} from {}",
3773 path.display()
3774 ))
3775 } else {
3776 import_error(format!(
3777 "failed to read {length} bytes at offset {offset} from {}: {error}",
3778 path.display()
3779 ))
3780 }
3781 })?;
3782
3783 Ok(bytes)
3784}
3785
3786fn read_json(path: impl AsRef<Path>) -> Result<Value> {
3787 let bytes = fs::read(path.as_ref())?;
3788 parse_json_slice(&bytes)
3789}
3790
3791fn file_status(path: &Path) -> Result<(u64, i64)> {
3792 let metadata = fs::metadata(path)?;
3793 let modified = metadata.modified().map_err(|error| {
3794 import_error(format!(
3795 "failed to read modified time for {}: {error}",
3796 path.display()
3797 ))
3798 })?;
3799 let since_epoch = modified.duration_since(UNIX_EPOCH).map_err(|error| {
3800 import_error(format!(
3801 "modified time for {} is before the unix epoch: {error}",
3802 path.display()
3803 ))
3804 })?;
3805 let nanos = i64::try_from(since_epoch.as_nanos())
3806 .map_err(|_| import_error("modified time does not fit in i64 nanoseconds"))?;
3807 Ok((metadata.len(), nanos))
3808}
3809
3810fn feature_cityobject_count(feature: &Value, context: &str) -> Result<u64> {
3811 let cityobjects = feature
3812 .get("CityObjects")
3813 .and_then(Value::as_object)
3814 .ok_or_else(|| import_error(format!("{context} is missing CityObjects")))?;
3815 u64::try_from(cityobjects.len())
3816 .map_err(|_| import_error("CityObject count does not fit in u64"))
3817}
3818
3819fn scan_ndjson_source(path: &Path) -> Result<SourceScan> {
3820 let bytes = fs::read(path)?;
3821 let (source_size, source_mtime_ns) = file_status(path)?;
3822 let line_spans = line_spans(&bytes);
3823 let Some((_, metadata_bytes)) = line_spans.first() else {
3824 return Err(import_error(format!(
3825 "NDJSON source {} is empty",
3826 path.display()
3827 )));
3828 };
3829
3830 let metadata: Meta = parse_json_slice(metadata_bytes)?;
3831 let (scale, translate) = parse_ndjson_transform(&metadata)?;
3832 let mut features = Vec::new();
3833
3834 for (offset, line_bytes) in line_spans.into_iter().skip(1) {
3835 if line_bytes.iter().all(u8::is_ascii_whitespace) {
3836 continue;
3837 }
3838
3839 let feature: Value = parse_json_slice(line_bytes)?;
3840 let (ids, bounds) = parse_ndjson_feature_bounds(&feature, scale, translate)?;
3841 let cityobject_count = feature_cityobject_count(&feature, "ndjson feature")?;
3842 let length = u64::try_from(line_bytes.len())
3843 .map_err(|_| import_error("NDJSON feature line length does not fit in u64"))?;
3844 features.extend(ids.into_iter().map(|id| ScannedFeature {
3845 id,
3846 path: path.to_path_buf(),
3847 file_size: source_size,
3848 file_mtime_ns: source_mtime_ns,
3849 offset,
3850 length,
3851 bounds,
3852 cityobject_count,
3853 member_ranges: None,
3854 }));
3855 }
3856
3857 Ok(SourceScan {
3858 path: path.to_path_buf(),
3859 metadata,
3860 vertices_offset: None,
3861 vertices_length: None,
3862 source_size,
3863 source_mtime_ns,
3864 features,
3865 })
3866}
3867
3868fn collect_layout_files(paths: &[PathBuf], suffix: &str) -> Result<Vec<PathBuf>> {
3869 let mut files = Vec::new();
3870
3871 for root in paths {
3872 if root.is_file() {
3873 if root.to_string_lossy().ends_with(suffix) {
3874 files.push(root.clone());
3875 }
3876 continue;
3877 }
3878
3879 for entry in WalkBuilder::new(root)
3880 .hidden(false)
3881 .follow_links(true)
3882 .build()
3883 {
3884 let entry = entry.map_err(|error| import_error(error.to_string()))?;
3885 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
3886 continue;
3887 }
3888 let path = entry.into_path();
3889 if path.to_string_lossy().ends_with(suffix) {
3890 files.push(path);
3891 }
3892 }
3893 }
3894
3895 files.sort();
3896 files.dedup();
3897 Ok(files)
3898}
3899
3900fn scan_cityjson_source(path: &Path) -> Result<SourceScan> {
3901 let bytes = fs::read(path)?;
3902 let (source_size, source_mtime_ns) = file_status(path)?;
3903 let document: Value = parse_json_slice(&bytes)?;
3904 let metadata = cityjson_base_metadata(&document)?;
3905 let (scale, translate) = parse_ndjson_transform(&metadata)?;
3906
3907 let cityobjects = document
3908 .get("CityObjects")
3909 .and_then(Value::as_object)
3910 .ok_or_else(|| {
3911 import_error(format!(
3912 "CityJSON source {} is missing CityObjects",
3913 path.display()
3914 ))
3915 })?;
3916 let vertices_value = document.get("vertices").ok_or_else(|| {
3917 import_error(format!(
3918 "CityJSON source {} is missing vertices",
3919 path.display()
3920 ))
3921 })?;
3922 let vertices: Vec<[i64; 3]> = parse_json_value(vertices_value.clone())?;
3923 let (vertices_offset, vertices_length) = top_level_value_range(&bytes, "vertices")?;
3924 let cityobject_ranges = cityobject_entry_ranges(&bytes)?
3925 .into_iter()
3926 .map(|(id, offset, length)| (id, (offset, length)))
3927 .collect::<HashMap<_, _>>();
3928
3929 let root_ids = root_cityobject_ids(cityobjects);
3930 let mut features = Vec::with_capacity(root_ids.len());
3931 for id in root_ids {
3932 let (offset, length) = cityobject_ranges.get(id).copied().ok_or_else(|| {
3933 import_error(format!(
3934 "CityObject fragment for {id} could not be located in {}",
3935 path.display()
3936 ))
3937 })?;
3938 let member_ids = collect_cityjson_feature_members(id, cityobjects)?;
3939 let member_ranges = member_ids
3940 .iter()
3941 .map(|member_id| {
3942 let (member_offset, member_length) =
3943 cityobject_ranges.get(member_id).copied().ok_or_else(|| {
3944 import_error(format!(
3945 "CityObject fragment for {member_id} could not be located in {}",
3946 path.display()
3947 ))
3948 })?;
3949 Ok(IndexedObjectRange {
3950 id: member_id.clone(),
3951 offset: member_offset,
3952 length: member_length,
3953 })
3954 })
3955 .collect::<Result<Vec<_>>>()?;
3956 let mut referenced_vertices = BTreeSet::new();
3957 let mut visited = BTreeSet::new();
3958 collect_cityjson_object_vertex_indices(
3959 id,
3960 cityobjects,
3961 &mut referenced_vertices,
3962 &mut visited,
3963 )?;
3964 if referenced_vertices.is_empty() {
3965 return Err(import_error(format!(
3966 "CityObject {id} in {} does not reference any vertices",
3967 path.display()
3968 )));
3969 }
3970 let bounds =
3971 feature_bounds_from_vertices(&vertices, &referenced_vertices, scale, translate)?;
3972 features.push(ScannedFeature {
3973 id: id.clone(),
3974 path: path.to_path_buf(),
3975 file_size: source_size,
3976 file_mtime_ns: source_mtime_ns,
3977 offset,
3978 length,
3979 bounds,
3980 cityobject_count: u64::try_from(member_ranges.len())
3981 .map_err(|_| import_error("CityObject count does not fit in u64"))?,
3982 member_ranges: Some(member_ranges),
3983 });
3984 }
3985
3986 Ok(SourceScan {
3987 path: path.to_path_buf(),
3988 metadata,
3989 vertices_offset: Some(vertices_offset),
3990 vertices_length: Some(vertices_length),
3991 source_size,
3992 source_mtime_ns,
3993 features,
3994 })
3995}
3996
3997fn cityjson_base_metadata(document: &Value) -> Result<Meta> {
3998 let mut metadata = document.clone();
3999 let root = metadata
4000 .as_object_mut()
4001 .ok_or_else(|| import_error("CityJSON document root must be a JSON object"))?;
4002 root.insert("CityObjects".to_owned(), Value::Object(Map::new()));
4003 root.insert("vertices".to_owned(), Value::Array(Vec::new()));
4004 Ok(metadata)
4005}
4006
4007fn root_cityobject_ids(cityobjects: &Map<String, Value>) -> Vec<&String> {
4008 let mut child_ids = BTreeSet::new();
4009 let mut ids = cityobjects.keys().collect::<Vec<_>>();
4010
4011 for object in cityobjects.values() {
4012 if let Some(children) = object.get("children").and_then(Value::as_array) {
4013 for child in children {
4014 if let Some(child_id) = child.as_str() {
4015 child_ids.insert(child_id.to_owned());
4016 }
4017 }
4018 }
4019 }
4020
4021 ids.sort();
4022 ids.into_iter()
4023 .filter(|id| {
4024 cityobjects
4025 .get(*id)
4026 .and_then(|object| object.get("parents"))
4027 .and_then(Value::as_array)
4028 .is_none_or(Vec::is_empty)
4029 && !child_ids.contains(id.as_str())
4030 })
4031 .collect()
4032}
4033
4034fn collect_cityjson_feature_members(
4035 root_id: &str,
4036 cityobjects: &Map<String, Value>,
4037) -> Result<Vec<String>> {
4038 let mut members = Vec::new();
4039 let mut visited = BTreeSet::new();
4040 collect_cityjson_feature_members_recursive(root_id, cityobjects, &mut members, &mut visited)?;
4041 Ok(members)
4042}
4043
4044fn collect_cityjson_feature_members_recursive(
4045 object_id: &str,
4046 cityobjects: &Map<String, Value>,
4047 members: &mut Vec<String>,
4048 visited: &mut BTreeSet<String>,
4049) -> Result<()> {
4050 if !visited.insert(object_id.to_owned()) {
4051 return Ok(());
4052 }
4053
4054 let object = cityobjects.get(object_id).ok_or_else(|| {
4055 import_error(format!(
4056 "CityJSON source is missing referenced CityObject {object_id}"
4057 ))
4058 })?;
4059 members.push(object_id.to_owned());
4060
4061 if let Some(children) = object.get("children").and_then(Value::as_array) {
4062 for child in children {
4063 let Some(child_id) = child.as_str() else {
4064 return Err(import_error(
4065 "CityObject children must be string identifiers",
4066 ));
4067 };
4068 if cityobjects.contains_key(child_id) {
4069 collect_cityjson_feature_members_recursive(
4070 child_id,
4071 cityobjects,
4072 members,
4073 visited,
4074 )?;
4075 }
4076 }
4077 }
4078
4079 Ok(())
4080}
4081
4082fn collect_cityjson_object_vertex_indices(
4083 object_id: &str,
4084 cityobjects: &Map<String, Value>,
4085 indices: &mut BTreeSet<usize>,
4086 visited: &mut BTreeSet<String>,
4087) -> Result<()> {
4088 if !visited.insert(object_id.to_owned()) {
4089 return Ok(());
4090 }
4091
4092 let object = cityobjects.get(object_id).ok_or_else(|| {
4093 import_error(format!(
4094 "CityJSON source is missing referenced CityObject {object_id}"
4095 ))
4096 })?;
4097 collect_object_vertex_indices(object, indices)?;
4098
4099 if let Some(children) = object.get("children").and_then(Value::as_array) {
4100 for child in children {
4101 let Some(child_id) = child.as_str() else {
4102 return Err(import_error(
4103 "CityObject children must be string identifiers",
4104 ));
4105 };
4106 if cityobjects.contains_key(child_id) {
4107 collect_cityjson_object_vertex_indices(child_id, cityobjects, indices, visited)?;
4108 }
4109 }
4110 }
4111
4112 Ok(())
4113}
4114
4115fn collect_object_vertex_indices(object: &Value, indices: &mut BTreeSet<usize>) -> Result<()> {
4116 if let Some(geometries) = object.get("geometry").and_then(Value::as_array) {
4117 for geometry in geometries {
4118 if let Some(boundaries) = geometry.get("boundaries") {
4119 collect_vertex_indices(boundaries, indices)?;
4120 }
4121 }
4122 }
4123 Ok(())
4124}
4125
4126fn top_level_value_range(bytes: &[u8], key: &str) -> Result<(u64, u64)> {
4127 let key_start = find_json_key(bytes, key)
4128 .ok_or_else(|| import_error(format!("top-level key {key} could not be located")))?;
4129 let mut cursor = skip_json_whitespace(bytes, key_start + key.len() + 2);
4130 if bytes.get(cursor) != Some(&b':') {
4131 return Err(import_error(format!(
4132 "top-level key {key} is missing a value separator"
4133 )));
4134 }
4135 cursor = skip_json_whitespace(bytes, cursor + 1);
4136 let value_end = json_value_end(bytes, cursor)?;
4137 Ok((
4138 u64::try_from(cursor).map_err(|_| import_error("value offset does not fit in u64"))?,
4139 u64::try_from(value_end - cursor)
4140 .map_err(|_| import_error("value length does not fit in u64"))?,
4141 ))
4142}
4143
4144fn cityobject_entry_ranges(bytes: &[u8]) -> Result<Vec<(String, u64, u64)>> {
4145 let key_start = find_json_key(bytes, "CityObjects")
4146 .ok_or_else(|| import_error("top-level key CityObjects could not be located"))?;
4147 let mut cursor = skip_json_whitespace(bytes, key_start + "\"CityObjects\"".len());
4148 if bytes.get(cursor) != Some(&b':') {
4149 return Err(import_error("CityObjects key is missing a value separator"));
4150 }
4151 cursor = skip_json_whitespace(bytes, cursor + 1);
4152 if bytes.get(cursor) != Some(&b'{') {
4153 return Err(import_error("CityObjects must be a JSON object"));
4154 }
4155 cursor += 1;
4156
4157 let mut entries = Vec::new();
4158 loop {
4159 cursor = skip_json_whitespace(bytes, cursor);
4160 match bytes.get(cursor) {
4161 Some(b'}') => break,
4162 Some(b'"') => {
4163 let entry_start = cursor;
4164 let (id, after_key) = parse_json_string(bytes, cursor)?;
4165 cursor = skip_json_whitespace(bytes, after_key);
4166 if bytes.get(cursor) != Some(&b':') {
4167 return Err(import_error(
4168 "CityObject entry is missing a value separator",
4169 ));
4170 }
4171 cursor = skip_json_whitespace(bytes, cursor + 1);
4172 let value_end = json_value_end(bytes, cursor)?;
4173 let offset = u64::try_from(entry_start)
4174 .map_err(|_| import_error("CityObject entry offset does not fit in u64"))?;
4175 let length = u64::try_from(value_end - entry_start)
4176 .map_err(|_| import_error("CityObject entry length does not fit in u64"))?;
4177 entries.push((id, offset, length));
4178 cursor = skip_json_whitespace(bytes, value_end);
4179 match bytes.get(cursor) {
4180 Some(b',') => cursor += 1,
4181 Some(b'}') => break,
4182 _ => {
4183 return Err(import_error(
4184 "CityObjects entries must be separated by commas",
4185 ));
4186 }
4187 }
4188 }
4189 _ => return Err(import_error("unexpected token inside CityObjects object")),
4190 }
4191 }
4192
4193 Ok(entries)
4194}
4195
4196fn find_json_key(bytes: &[u8], key: &str) -> Option<usize> {
4197 let needle = format!("\"{key}\"");
4198 bytes
4199 .windows(needle.len())
4200 .position(|window| window == needle.as_bytes())
4201}
4202
4203fn skip_json_whitespace(bytes: &[u8], mut index: usize) -> usize {
4204 while bytes.get(index).is_some_and(u8::is_ascii_whitespace) {
4205 index += 1;
4206 }
4207 index
4208}
4209
4210fn parse_json_string(bytes: &[u8], start: usize) -> Result<(String, usize)> {
4211 let mut index = start + 1;
4212 let mut escaped = false;
4213
4214 while let Some(byte) = bytes.get(index) {
4215 if escaped {
4216 escaped = false;
4217 } else if *byte == b'\\' {
4218 escaped = true;
4219 } else if *byte == b'"' {
4220 let end = index + 1;
4221 return Ok((parse_json_slice(&bytes[start..end])?, end));
4222 }
4223 index += 1;
4224 }
4225
4226 Err(import_error("unterminated JSON string"))
4227}
4228
4229fn json_value_end(bytes: &[u8], start: usize) -> Result<usize> {
4230 match bytes.get(start) {
4231 Some(b'{') => nested_json_end(bytes, start, b'{', b'}'),
4232 Some(b'[') => nested_json_end(bytes, start, b'[', b']'),
4233 Some(b'"') => parse_json_string(bytes, start).map(|(_, end)| end),
4234 Some(_) => {
4235 let mut end = start;
4236 while let Some(byte) = bytes.get(end) {
4237 if byte.is_ascii_whitespace() || matches!(*byte, b',' | b'}' | b']') {
4238 break;
4239 }
4240 end += 1;
4241 }
4242 Ok(end)
4243 }
4244 None => Err(import_error("unexpected end of JSON input")),
4245 }
4246}
4247
4248fn nested_json_end(bytes: &[u8], start: usize, open: u8, close: u8) -> Result<usize> {
4249 let mut depth = 0usize;
4250 let mut index = start;
4251 let mut in_string = false;
4252 let mut escaped = false;
4253
4254 while let Some(byte) = bytes.get(index) {
4255 if in_string {
4256 if escaped {
4257 escaped = false;
4258 } else if *byte == b'\\' {
4259 escaped = true;
4260 } else if *byte == b'"' {
4261 in_string = false;
4262 }
4263 } else if *byte == b'"' {
4264 in_string = true;
4265 } else if *byte == open {
4266 depth += 1;
4267 } else if *byte == close {
4268 depth -= 1;
4269 if depth == 0 {
4270 return Ok(index + 1);
4271 }
4272 }
4273 index += 1;
4274 }
4275
4276 Err(import_error("unterminated JSON value"))
4277}
4278
4279fn parse_ndjson_transform(metadata: &Value) -> Result<([f64; 3], [f64; 3])> {
4280 let transform = metadata
4281 .get("transform")
4282 .and_then(Value::as_object)
4283 .ok_or_else(|| import_error("NDJSON metadata is missing transform"))?;
4284
4285 let scale = parse_vector3_f64(transform, "scale")?;
4286 let translate = parse_vector3_f64(transform, "translate")?;
4287 Ok((scale, translate))
4288}
4289
4290fn feature_cityobject_keys(feature: &Value, label: &str) -> Result<Vec<String>> {
4291 let cityobjects = feature
4292 .get("CityObjects")
4293 .ok_or_else(|| import_error(format!("{label} is missing CityObjects")))?
4294 .as_object()
4295 .ok_or_else(|| import_error(format!("{label} CityObjects must be an object")))?;
4296 if cityobjects.is_empty() {
4297 return Err(import_error(format!(
4298 "{label} CityObjects must contain at least one CityObject"
4299 )));
4300 }
4301 Ok(cityobjects.keys().cloned().collect())
4302}
4303
4304fn collect_feature_vertex_indices(feature: &Value, vertex_count: usize) -> Result<BTreeSet<usize>> {
4305 let mut indices = BTreeSet::new();
4306 let cityobjects = feature
4307 .get("CityObjects")
4308 .and_then(Value::as_object)
4309 .ok_or_else(|| import_error("feature package is missing CityObjects"))?;
4310
4311 for object in cityobjects.values() {
4312 collect_object_vertex_indices(object, &mut indices)?;
4313 }
4314
4315 if indices.is_empty() {
4316 indices.extend(0..vertex_count);
4317 }
4318
4319 Ok(indices)
4320}
4321
4322fn parse_vector3_f64(object: &Map<String, Value>, key: &str) -> Result<[f64; 3]> {
4323 let array = object
4324 .get(key)
4325 .and_then(Value::as_array)
4326 .ok_or_else(|| import_error(format!("transform is missing {key}")))?;
4327 if array.len() != 3 {
4328 return Err(import_error(format!(
4329 "transform {key} must contain three values"
4330 )));
4331 }
4332
4333 Ok([
4334 array[0]
4335 .as_f64()
4336 .ok_or_else(|| import_error(format!("transform {key}[0] must be numeric")))?,
4337 array[1]
4338 .as_f64()
4339 .ok_or_else(|| import_error(format!("transform {key}[1] must be numeric")))?,
4340 array[2]
4341 .as_f64()
4342 .ok_or_else(|| import_error(format!("transform {key}[2] must be numeric")))?,
4343 ])
4344}
4345
4346fn parse_ndjson_feature_bounds(
4347 feature: &Value,
4348 scale: [f64; 3],
4349 translate: [f64; 3],
4350) -> Result<(Vec<String>, FeatureBounds)> {
4351 let ids = feature_cityobject_keys(feature, "NDJSON feature")?;
4352 let vertices = feature
4353 .get("vertices")
4354 .ok_or_else(|| import_error("NDJSON feature is missing vertices"))?;
4355 let vertices: Vec<[i64; 3]> = parse_json_value(vertices.clone())?;
4356 let referenced_vertices = collect_feature_vertex_indices(feature, vertices.len())?;
4357 let bounds = feature_bounds_from_vertices(&vertices, &referenced_vertices, scale, translate)?;
4358 Ok((ids, bounds))
4359}
4360
4361#[allow(clippy::cast_precision_loss)]
4362fn feature_bounds_from_vertices(
4363 vertices: &[[i64; 3]],
4364 referenced_vertices: &BTreeSet<usize>,
4365 scale: [f64; 3],
4366 translate: [f64; 3],
4367) -> Result<FeatureBounds> {
4368 let mut min_x = f64::INFINITY;
4369 let mut max_x = f64::NEG_INFINITY;
4370 let mut min_y = f64::INFINITY;
4371 let mut max_y = f64::NEG_INFINITY;
4372 let mut min_z = f64::INFINITY;
4373 let mut max_z = f64::NEG_INFINITY;
4374
4375 for &index in referenced_vertices {
4376 let vertex = vertices.get(index).copied().ok_or_else(|| {
4377 import_error(format!(
4378 "vertex index {index} is outside the NDJSON feature vertex array"
4379 ))
4380 })?;
4381 let x = translate[0] + scale[0] * vertex[0] as f64;
4382 let y = translate[1] + scale[1] * vertex[1] as f64;
4383 let z = translate[2] + scale[2] * vertex[2] as f64;
4384 min_x = min_x.min(x);
4385 max_x = max_x.max(x);
4386 min_y = min_y.min(y);
4387 max_y = max_y.max(y);
4388 min_z = min_z.min(z);
4389 max_z = max_z.max(z);
4390 }
4391
4392 if !min_x.is_finite()
4393 || !min_y.is_finite()
4394 || !min_z.is_finite()
4395 || !max_x.is_finite()
4396 || !max_y.is_finite()
4397 || !max_z.is_finite()
4398 {
4399 return Err(import_error("NDJSON feature bbox could not be computed"));
4400 }
4401
4402 Ok(FeatureBounds {
4403 min_x,
4404 max_x,
4405 min_y,
4406 max_y,
4407 min_z,
4408 max_z,
4409 })
4410}
4411
4412fn line_spans(bytes: &[u8]) -> Vec<(u64, &[u8])> {
4413 let mut spans = Vec::new();
4414 let mut offset = 0u64;
4415
4416 for chunk in bytes.split_inclusive(|byte| *byte == b'\n') {
4417 spans.push((offset, trim_line_ending(chunk)));
4418 offset += u64::try_from(chunk.len()).expect("line chunk length fits in u64");
4419 }
4420
4421 if bytes.is_empty() {
4422 spans.clear();
4423 }
4424
4425 spans
4426}
4427
4428fn trim_line_ending(bytes: &[u8]) -> &[u8] {
4429 let mut end = bytes.len();
4430 while end > 0 && (bytes[end - 1] == b'\n' || bytes[end - 1] == b'\r') {
4431 end -= 1;
4432 }
4433 &bytes[..end]
4434}
4435
4436fn sqlite_result<T>(result: rusqlite::Result<T>) -> Result<T> {
4437 result.map_err(|value| Error::Import(value.to_string()))
4438}
4439
4440fn u64_to_i64(value: u64) -> rusqlite::Result<i64> {
4441 i64::try_from(value).map_err(|_| {
4442 rusqlite::Error::ToSqlConversionFailure(Box::new(import_error(format!(
4443 "value {value} does not fit in SQLite integer storage"
4444 ))))
4445 })
4446}
4447
4448fn i64_to_u64(value: i64) -> rusqlite::Result<u64> {
4449 u64::try_from(value).map_err(|_| {
4450 rusqlite::Error::ToSqlConversionFailure(Box::new(import_error(format!(
4451 "value {value} is not representable as u64"
4452 ))))
4453 })
4454}
4455
4456#[cfg(test)]
4457mod tests {
4458 use super::*;
4459 use std::time::{SystemTime, UNIX_EPOCH};
4460
4461 fn parent_child_lod_fixture() -> CityModel {
4462 cityjson_lib::json::from_feature_slice(
4463 br#"{
4464 "type":"CityJSONFeature",
4465 "id":"building",
4466 "CityObjects":{
4467 "building":{
4468 "type":"Building",
4469 "children":["building-part"]
4470 },
4471 "building-part":{
4472 "type":"BuildingPart",
4473 "parents":["building"],
4474 "geometry":[
4475 {"type":"MultiSurface","lod":"1","boundaries":[[[0,1,2]]]},
4476 {"type":"MultiSurface","lod":"2","boundaries":[[[0,2,3]]]}
4477 ]
4478 },
4479 "road":{
4480 "type":"Road",
4481 "geometry":[{"type":"MultiSurface","lod":"1","boundaries":[[[4,5,6]]]}]
4482 }
4483 },
4484 "vertices":[[0,0,0],[1,0,0],[1,1,0],[0,1,0],[10,0,0],[11,0,0],[10,1,0]]
4485 }"#,
4486 )
4487 .expect("parent-child fixture should parse")
4488 }
4489
4490 #[test]
4491 fn feature_filter_selecting_parent_type_retains_child_geometry() {
4492 let filter = FeatureFilter {
4493 cityobject_types: Some(BTreeSet::from(["Building".to_owned()])),
4494 default_lod: LodSelection::Highest,
4495 lods_by_type: BTreeMap::new(),
4496 };
4497
4498 let filtered = filter
4499 .apply(&parent_child_lod_fixture())
4500 .expect("filter should succeed");
4501
4502 assert_eq!(
4503 filtered.diagnostics.retained_types,
4504 BTreeSet::from(["Building".to_owned(), "BuildingPart".to_owned()])
4505 );
4506 assert_eq!(
4507 filtered.diagnostics.retained_lods.get("BuildingPart"),
4508 Some(&BTreeSet::from(["2".to_owned()]))
4509 );
4510 assert!(filtered.model.cityobjects().iter().any(|(_, cityobject)| {
4511 cityobject.id() == "building-part"
4512 && cityobject
4513 .geometry()
4514 .is_some_and(|geometries| !geometries.is_empty())
4515 }));
4516 assert!(
4517 !filtered
4518 .model
4519 .cityobjects()
4520 .iter()
4521 .any(|(_, cityobject)| cityobject.id() == "road")
4522 );
4523 }
4524
4525 #[test]
4526 fn feature_filter_summary_reports_missing_explicit_lod() {
4527 let filter = FeatureFilter {
4528 cityobject_types: None,
4529 default_lod: LodSelection::Highest,
4530 lods_by_type: BTreeMap::from([(
4531 "BuildingPart".to_owned(),
4532 LodSelection::Exact("3".to_owned()),
4533 )]),
4534 };
4535 let filtered = filter
4536 .apply(&parent_child_lod_fixture())
4537 .expect("filter should succeed");
4538 let mut summary = FeatureFilterSummary::default();
4539 summary.add(&filtered.diagnostics);
4540
4541 let failures = summary.requested_lod_failures(&filter);
4542
4543 assert_eq!(
4544 failures,
4545 vec![MissingLodSelection {
4546 cityobject_type: "BuildingPart".to_owned(),
4547 requested_lod: "3".to_owned(),
4548 available_lods: BTreeSet::from(["1".to_owned(), "2".to_owned()]),
4549 }]
4550 );
4551 }
4552
4553 #[test]
4554 fn cityjson_read_one_localizes_vertices_and_preserves_base_root_members() {
4555 let selected_id = "building-1";
4556 let selected_object = serde_json::json!({
4557 "type": "Building",
4558 "children": ["building-1-part"],
4559 "geometry": [{
4560 "type": "MultiSurface",
4561 "lod": "0",
4562 "boundaries": [[[2, 7, 5]]]
4563 }]
4564 });
4565 let other_object = serde_json::json!({
4566 "type": "Building",
4567 "geometry": [{
4568 "type": "MultiSurface",
4569 "lod": "0",
4570 "boundaries": [[[0, 1, 3]]]
4571 }]
4572 });
4573 let vertices = serde_json::json!([
4574 [100, 0, 0],
4575 [101, 0, 0],
4576 [0, 0, 0],
4577 [102, 0, 0],
4578 [103, 0, 0],
4579 [2, 0, 0],
4580 [104, 0, 0],
4581 [1, 0, 0]
4582 ]);
4583 let document = serde_json::json!({
4584 "type": "CityJSON",
4585 "version": "2.0",
4586 "transform": {
4587 "scale": [0.5, 0.5, 0.5],
4588 "translate": [10.0, 20.0, 30.0]
4589 },
4590 "metadata": {
4591 "title": "unit-test-fixture"
4592 },
4593 "CityObjects": {
4594 selected_id: selected_object.clone(),
4595 "other-object": other_object
4596 },
4597 "vertices": vertices.clone()
4598 });
4599 let document_bytes = serde_json::to_vec(&document).expect("fixture JSON");
4600 let base_document = cityjson_base_metadata(&document).expect("base CityJSON metadata");
4601 let base_document_bytes: Arc<[u8]> =
4602 Arc::from(serde_json::to_vec(&base_document).expect("base CityJSON metadata bytes"));
4603 let object_fragment = object_entry_fragment(selected_id, &selected_object);
4604 let vertices_fragment = serde_json::to_vec(&vertices).expect("vertices fragment");
4605 let loc = FeatureLocation {
4606 feature_id: selected_id.to_owned(),
4607 source_id: 0,
4608 source_path: write_temp_cityjson(&document_bytes),
4609 offset: find_subslice(&document_bytes, &object_fragment)
4610 .expect("selected object offset") as u64,
4611 length: object_fragment.len() as u64,
4612 vertices_offset: Some(
4613 find_subslice(&document_bytes, &vertices_fragment).expect("vertices offset") as u64,
4614 ),
4615 vertices_length: Some(vertices_fragment.len() as u64),
4616 member_ranges_json: None,
4617 };
4618
4619 let backend = CityJsonBackend::new(vec![loc.source_path.clone()]);
4620 let model = backend
4621 .read_one(&loc, base_document_bytes)
4622 .expect("CityJSON read should succeed");
4623 let output: Value =
4624 serde_json::from_str(&cityjson_lib::json::to_string(&model).expect("serialize result"))
4625 .expect("valid output JSON");
4626
4627 let cityobjects = output["CityObjects"]
4628 .as_object()
4629 .expect("result CityObjects must be an object");
4630 assert_eq!(cityobjects.len(), 1);
4631 assert!(cityobjects.contains_key(selected_id));
4632 assert_eq!(output["transform"], document["transform"]);
4633 assert_eq!(output["metadata"], document["metadata"]);
4634 assert!(cityobjects[selected_id].get("children").is_none());
4635 assert_eq!(
4636 output["vertices"],
4637 serde_json::json!([[0, 0, 0], [2, 0, 0], [1, 0, 0]])
4638 );
4639 assert_eq!(
4640 cityobjects[selected_id]["geometry"][0]["boundaries"],
4641 serde_json::json!([[[0, 2, 1]]])
4642 );
4643 }
4644
4645 #[test]
4646 fn cityjson_scan_and_read_one_group_root_objects_with_children() {
4647 let document = serde_json::json!({
4648 "type": "CityJSON",
4649 "version": "2.0",
4650 "transform": {
4651 "scale": [1.0, 1.0, 1.0],
4652 "translate": [0.0, 0.0, 0.0]
4653 },
4654 "CityObjects": {
4655 "building-1": {
4656 "type": "Building",
4657 "children": ["building-1-part"],
4658 "geometry": [{
4659 "type": "MultiSurface",
4660 "lod": "1.0",
4661 "boundaries": [[[0, 1, 2]]]
4662 }]
4663 },
4664 "building-1-part": {
4665 "type": "BuildingPart",
4666 "parents": ["building-1"],
4667 "geometry": [{
4668 "type": "MultiSurface",
4669 "lod": "1.0",
4670 "boundaries": [[[3, 4, 5]]]
4671 }]
4672 }
4673 },
4674 "vertices": [
4675 [0, 0, 0],
4676 [1, 0, 0],
4677 [0, 1, 0],
4678 [2, 0, 0],
4679 [3, 0, 0],
4680 [2, 1, 0]
4681 ]
4682 });
4683 let bytes = serde_json::to_vec(&document).expect("fixture JSON");
4684 let path = write_temp_cityjson(&bytes);
4685 let scan = scan_cityjson_source(&path).expect("scan should succeed");
4686
4687 assert_eq!(scan.features.len(), 1);
4688 assert_eq!(scan.features[0].id, "building-1");
4689 let member_ranges = scan.features[0]
4690 .member_ranges
4691 .as_ref()
4692 .expect("root feature should carry member ranges");
4693 assert_eq!(member_ranges.len(), 2);
4694 assert_eq!(member_ranges[0].id, "building-1");
4695 assert_eq!(member_ranges[1].id, "building-1-part");
4696
4697 let loc = FeatureLocation {
4698 feature_id: scan.features[0].id.clone(),
4699 source_id: 0,
4700 source_path: path,
4701 offset: scan.features[0].offset,
4702 length: scan.features[0].length,
4703 vertices_offset: scan.vertices_offset,
4704 vertices_length: scan.vertices_length,
4705 member_ranges_json: Some(
4706 serde_json::to_string(member_ranges).expect("member ranges JSON"),
4707 ),
4708 };
4709 let backend = CityJsonBackend::new(vec![loc.source_path.clone()]);
4710 let metadata_bytes: Arc<[u8]> =
4711 Arc::from(serde_json::to_vec(&scan.metadata).expect("metadata JSON"));
4712 let model = backend
4713 .read_one(&loc, metadata_bytes)
4714 .expect("CityJSON read should succeed");
4715 let output: Value =
4716 serde_json::from_str(&cityjson_lib::json::to_string(&model).expect("serialize result"))
4717 .expect("valid output JSON");
4718 let cityobjects = output["CityObjects"]
4719 .as_object()
4720 .expect("result CityObjects must be an object");
4721
4722 assert_eq!(cityobjects.len(), 2);
4723 assert!(cityobjects.contains_key("building-1"));
4724 assert!(cityobjects.contains_key("building-1-part"));
4725 assert_eq!(
4726 cityobjects["building-1"]["children"],
4727 serde_json::json!(["building-1-part"])
4728 );
4729 assert_eq!(
4730 cityobjects["building-1-part"]["parents"],
4731 serde_json::json!(["building-1"])
4732 );
4733 }
4734
4735 #[test]
4736 fn feature_parts_builder_drops_dangling_parent_links() {
4737 let parts = build_feature_parts(
4738 "building-1-part",
4739 vec![(
4740 "building-1-part".to_owned(),
4741 serde_json::json!({
4742 "type": "BuildingPart",
4743 "parents": ["building-1"],
4744 "geometry": [{
4745 "type": "MultiSurface",
4746 "lod": "0",
4747 "boundaries": [[[5, 9, 7]]]
4748 }]
4749 }),
4750 )],
4751 &[
4752 [100, 0, 0],
4753 [101, 0, 0],
4754 [102, 0, 0],
4755 [103, 0, 0],
4756 [104, 0, 0],
4757 [0, 0, 0],
4758 [105, 0, 0],
4759 [2, 0, 0],
4760 [106, 0, 0],
4761 [1, 0, 0],
4762 ],
4763 )
4764 .expect("feature parts should build");
4765 let object: Value = serde_json::from_str(parts.cityobjects[0].object_json.get())
4766 .expect("valid object JSON");
4767
4768 assert_eq!(parts.feature_id, "building-1-part");
4769 assert!(object.get("parents").is_none());
4770 assert_eq!(parts.vertices, vec![[0, 0, 0], [2, 0, 0], [1, 0, 0]]);
4771 assert_eq!(
4772 object["geometry"][0]["boundaries"],
4773 serde_json::json!([[[0, 2, 1]]])
4774 );
4775 }
4776
4777 #[test]
4778 fn ndjson_backend_scan_and_index_lookup_roundtrip() {
4779 let metadata = serde_json::json!({
4780 "type": "CityJSON",
4781 "version": "2.0",
4782 "transform": {
4783 "scale": [1.0, 1.0, 1.0],
4784 "translate": [0.0, 0.0, 0.0]
4785 }
4786 });
4787 let feature = serde_json::json!({
4788 "type": "CityJSONFeature",
4789 "id": "ndjson-test-feature",
4790 "CityObjects": {
4791 "ndjson-test-feature": {
4792 "type": "Building",
4793 "geometry": [{
4794 "type": "MultiSurface",
4795 "lod": "1.0",
4796 "boundaries": [[[0, 1, 2]]]
4797 }]
4798 }
4799 },
4800 "vertices": [
4801 [0, 0, 0],
4802 [1, 0, 0],
4803 [0, 1, 0]
4804 ]
4805 });
4806 let ndjson_path = write_temp_ndjson(&metadata, &feature);
4807 let backend = NdjsonBackend {
4808 paths: vec![ndjson_path.clone()],
4809 };
4810 let scans = backend.scan(1).expect("NDJSON scan should succeed");
4811 assert_eq!(scans.len(), 1);
4812 assert_eq!(scans[0].features.len(), 1);
4813 assert_eq!(scans[0].features[0].id, "ndjson-test-feature");
4814
4815 let index_path = write_temp_index_path();
4816 let mut index = Index::open(&index_path).expect("SQLite index should open");
4817 index.rebuild(&scans).expect("NDJSON scan should index");
4818
4819 let by_id = index
4820 .lookup_id("ndjson-test-feature")
4821 .expect("id lookup should succeed");
4822 assert!(
4823 by_id.is_some(),
4824 "indexed feature should be addressable by id"
4825 );
4826
4827 let hits = index
4828 .lookup_bbox_iter(BBox {
4829 min_x: -1.0,
4830 max_x: 1.0,
4831 min_y: -1.0,
4832 max_y: 1.0,
4833 })
4834 .collect::<Result<Vec<_>>>()
4835 .expect("bbox lookup should collect");
4836 assert_eq!(hits.len(), 1);
4837 assert_eq!(hits[0].source_path, ndjson_path);
4838 }
4839
4840 #[test]
4841 fn opening_old_unique_schema_removes_feature_id_uniqueness() {
4842 let index_path = write_temp_index_path_with_prefix("old-unique-schema");
4843 {
4844 let conn = rusqlite::Connection::open(&index_path).expect("old index should open");
4845 conn.execute_batch(
4846 r"
4847 CREATE TABLE sources (
4848 id INTEGER PRIMARY KEY AUTOINCREMENT,
4849 path TEXT NOT NULL UNIQUE,
4850 metadata TEXT NOT NULL,
4851 vertices_offset INTEGER,
4852 vertices_length INTEGER,
4853 source_size INTEGER,
4854 source_mtime_ns INTEGER
4855 );
4856 CREATE TABLE features (
4857 id INTEGER PRIMARY KEY AUTOINCREMENT,
4858 feature_id TEXT NOT NULL UNIQUE,
4859 source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
4860 path TEXT NOT NULL,
4861 file_size INTEGER,
4862 file_mtime_ns INTEGER,
4863 offset INTEGER NOT NULL,
4864 length INTEGER NOT NULL,
4865 min_z REAL,
4866 max_z REAL,
4867 cityobject_count INTEGER,
4868 member_ranges TEXT
4869 );
4870 CREATE VIRTUAL TABLE feature_bbox
4871 USING rtree(feature_rowid, min_x, max_x, min_y, max_y);
4872 CREATE TABLE bbox_map (
4873 feature_rowid INTEGER PRIMARY KEY,
4874 feature_id TEXT NOT NULL UNIQUE REFERENCES features(feature_id) ON DELETE CASCADE
4875 );
4876 INSERT INTO sources (path, metadata, source_size, source_mtime_ns)
4877 VALUES ('metadata.json', '{}', 0, 0);
4878 INSERT INTO features (
4879 feature_id,
4880 source_id,
4881 path,
4882 file_size,
4883 file_mtime_ns,
4884 offset,
4885 length,
4886 min_z,
4887 max_z,
4888 cityobject_count,
4889 member_ranges
4890 )
4891 VALUES ('duplicate', 1, 'feature-a.city.jsonl', 0, 0, 0, 1, 0, 0, 1, NULL);
4892 INSERT INTO feature_bbox (feature_rowid, min_x, max_x, min_y, max_y)
4893 VALUES (1, 0, 1, 0, 1);
4894 INSERT INTO bbox_map (feature_rowid, feature_id) VALUES (1, 'duplicate');
4895 ",
4896 )
4897 .expect("old schema should initialize");
4898 }
4899
4900 let index = Index::open(&index_path).expect("index migration should succeed");
4901
4902 assert!(
4903 !table_sql_contains(&index.conn, "features", "feature_id TEXT NOT NULL UNIQUE",)
4904 .expect("features schema should load")
4905 );
4906 assert!(
4907 !table_sql_contains(&index.conn, "bbox_map", "feature_id TEXT NOT NULL UNIQUE",)
4908 .expect("bbox_map schema should load")
4909 );
4910
4911 index
4912 .conn
4913 .execute(
4914 r"
4915 INSERT INTO features (
4916 feature_id,
4917 source_id,
4918 path,
4919 file_size,
4920 file_mtime_ns,
4921 offset,
4922 length,
4923 min_z,
4924 max_z,
4925 cityobject_count,
4926 member_ranges
4927 )
4928 VALUES ('duplicate', 1, 'feature-b.city.jsonl', 0, 0, 0, 1, 0, 0, 1, NULL)
4929 ",
4930 [],
4931 )
4932 .expect("duplicate feature_id should insert after migration");
4933 let row_id = index.conn.last_insert_rowid();
4934 index
4935 .conn
4936 .execute(
4937 "INSERT INTO bbox_map (feature_rowid, feature_id) VALUES (?1, 'duplicate')",
4938 params![row_id],
4939 )
4940 .expect("duplicate bbox_map feature_id should insert after migration");
4941 }
4942
4943 #[test]
4944 fn iter_all_scans_each_supported_layout_in_deterministic_order() {
4945 let expected_ids = vec!["alpha", "beta", "gamma"];
4946 let feature_files_root = write_temp_feature_files_root(&expected_ids);
4947 let feature_files_index_path = write_temp_index_path_with_prefix("feature-files");
4948 let mut feature_files_index = CityIndex::open(
4949 StorageLayout::FeatureFiles {
4950 root: feature_files_root,
4951 metadata_glob: "**/metadata.json".to_owned(),
4952 feature_glob: "**/*.city.jsonl".to_owned(),
4953 },
4954 &feature_files_index_path,
4955 )
4956 .expect("feature-files index should open");
4957 feature_files_index
4958 .reindex()
4959 .expect("feature-files dataset should index");
4960 assert_full_scan_order(&feature_files_index, &expected_ids);
4961
4962 let cityjson_root = write_temp_cityjson_root(&expected_ids);
4963 let cityjson_index_path = write_temp_index_path_with_prefix("cityjson");
4964 let mut cityjson_index = CityIndex::open(
4965 StorageLayout::CityJson {
4966 paths: vec![cityjson_root],
4967 },
4968 &cityjson_index_path,
4969 )
4970 .expect("cityjson index should open");
4971 cityjson_index
4972 .reindex()
4973 .expect("cityjson dataset should index");
4974 assert_full_scan_order(&cityjson_index, &expected_ids);
4975
4976 let ndjson_root = write_temp_ndjson_root(&expected_ids);
4977 let ndjson_index_path = write_temp_index_path_with_prefix("ndjson");
4978 let mut ndjson_index = CityIndex::open(
4979 StorageLayout::Ndjson {
4980 paths: vec![ndjson_root],
4981 },
4982 &ndjson_index_path,
4983 )
4984 .expect("ndjson index should open");
4985 ndjson_index.reindex().expect("ndjson dataset should index");
4986 assert_full_scan_order(&ndjson_index, &expected_ids);
4987 assert_full_scan_pages(&ndjson_index, &expected_ids);
4988 }
4989
4990 #[test]
4991 fn iter_all_paginates_across_multiple_pages() {
4992 let ids = (0..600)
4993 .map(|idx| format!("feature-{idx:03}"))
4994 .collect::<Vec<_>>();
4995 let id_refs = ids.iter().map(String::as_str).collect::<Vec<_>>();
4996 let root = write_temp_ndjson_root(&id_refs);
4997 let index_path = write_temp_index_path_with_prefix("iter-all-pages");
4998 let layout = StorageLayout::Ndjson {
4999 paths: vec![root.clone()],
5000 };
5001 let mut index = CityIndex::open(layout, &index_path).expect("index should open");
5002 index.reindex().expect("dataset should index");
5003
5004 let scanned_ids = index
5005 .iter_all_with_ids()
5006 .expect("iter_all_with_ids should build")
5007 .map(|result| result.map(|(id, _)| id))
5008 .collect::<Result<Vec<_>>>()
5009 .expect("iter_all_with_ids should collect");
5010
5011 assert_eq!(scanned_ids.len(), 600);
5012 assert_eq!(scanned_ids.first().expect("first id"), "feature-000");
5013 assert_eq!(scanned_ids.last().expect("last id"), "feature-599");
5014
5015 let ref_pages = index
5016 .iter_all_feature_ref_pages(128)
5017 .expect("iter_all_feature_ref_pages should build")
5018 .collect::<Result<Vec<_>>>()
5019 .expect("iter_all_feature_ref_pages should collect");
5020 assert_eq!(
5021 ref_pages.iter().map(Vec::len).collect::<Vec<_>>(),
5022 vec![128, 128, 128, 128, 88]
5023 );
5024 assert_eq!(
5025 ref_pages
5026 .iter()
5027 .flat_map(|page| page.iter().map(|feature| feature.feature_id.as_str()))
5028 .collect::<Vec<_>>(),
5029 ids.iter().map(String::as_str).collect::<Vec<_>>()
5030 );
5031 assert_eq!(
5032 ref_pages
5033 .iter()
5034 .flat_map(|page| page.iter().map(|feature| feature.row_id))
5035 .collect::<Vec<_>>(),
5036 (1..=600).collect::<Vec<_>>()
5037 );
5038
5039 let first_batch = index
5040 .read_features(ref_pages.first().expect("first page should exist"))
5041 .expect("feature batch should reconstruct");
5042 assert_eq!(first_batch.len(), 128);
5043
5044 assert_indexed_batch_preserves_order(&index, ref_pages.first().expect("first page"), &ids);
5045 assert_decoded_scan_pages(&index, &ids);
5046 assert_rowid_feature_reads(&index);
5047
5048 for page in &ref_pages {
5049 for feature in page {
5050 let model = index
5051 .read_feature(feature)
5052 .expect("feature should reconstruct");
5053 assert!(model_contains_id(&model, &feature.feature_id));
5054 assert_eq!(
5055 feature_bounds_for_model(&model).expect("bounds should be computable"),
5056 feature.bounds
5057 );
5058 }
5059 }
5060
5061 let bbox_pages = index
5062 .iter_all_bbox_pages(128)
5063 .expect("iter_all_bbox_pages should build")
5064 .collect::<Result<Vec<_>>>()
5065 .expect("iter_all_bbox_pages should collect");
5066 assert_eq!(
5067 bbox_pages
5068 .iter()
5069 .flat_map(|page| page.iter().map(|feature| feature.feature_id.as_str()))
5070 .collect::<Vec<_>>(),
5071 ids.iter().map(String::as_str).collect::<Vec<_>>()
5072 );
5073 }
5074
5075 #[test]
5076 fn iter_all_feature_ref_pages_handles_page_boundaries_without_gaps() {
5077 let ids = (0..256)
5078 .map(|idx| format!("boundary-{idx:03}"))
5079 .collect::<Vec<_>>();
5080 let id_refs = ids.iter().map(String::as_str).collect::<Vec<_>>();
5081 let root = write_temp_ndjson_root(&id_refs);
5082 let index_path = write_temp_index_path_with_prefix("iter-boundary-pages");
5083 let mut index = CityIndex::open(StorageLayout::Ndjson { paths: vec![root] }, &index_path)
5084 .expect("index should open");
5085 index.reindex().expect("dataset should index");
5086
5087 let pages = index
5088 .iter_all_feature_ref_pages(128)
5089 .expect("iter_all_feature_ref_pages should build")
5090 .collect::<Result<Vec<_>>>()
5091 .expect("iter_all_feature_ref_pages should collect");
5092
5093 assert_eq!(
5094 pages.iter().map(Vec::len).collect::<Vec<_>>(),
5095 vec![128, 128]
5096 );
5097 assert_eq!(
5098 pages
5099 .iter()
5100 .flat_map(|page| page.iter().map(|feature| feature.feature_id.as_str()))
5101 .collect::<Vec<_>>(),
5102 ids.iter().map(String::as_str).collect::<Vec<_>>()
5103 );
5104 }
5105
5106 #[test]
5107 fn feature_bounds_summary_matches_iterative_bounds() {
5108 let ids = ["alpha", "beta", "gamma"];
5109 let root = write_temp_ndjson_root(&ids);
5110 let index_path = write_temp_index_path_with_prefix("bounds-summary");
5111 let mut index = CityIndex::open(StorageLayout::Ndjson { paths: vec![root] }, &index_path)
5112 .expect("index should open");
5113 index.reindex().expect("dataset should index");
5114
5115 let summary = index
5116 .feature_bounds_summary()
5117 .expect("bounds summary should load")
5118 .expect("non-empty index should have a summary");
5119 let mut pages = index
5120 .iter_all_bbox_pages(2)
5121 .expect("bbox pages should build")
5122 .collect::<Result<Vec<_>>>()
5123 .expect("bbox pages should collect")
5124 .into_iter()
5125 .flatten();
5126 let first = pages.next().expect("first indexed feature");
5127 let mut expected = first.bounds;
5128 let mut count = 1usize;
5129 for feature in pages {
5130 expected.min_x = expected.min_x.min(feature.bounds.min_x);
5131 expected.max_x = expected.max_x.max(feature.bounds.max_x);
5132 expected.min_y = expected.min_y.min(feature.bounds.min_y);
5133 expected.max_y = expected.max_y.max(feature.bounds.max_y);
5134 expected.min_z = expected.min_z.min(feature.bounds.min_z);
5135 expected.max_z = expected.max_z.max(feature.bounds.max_z);
5136 count += 1;
5137 }
5138
5139 assert_eq!(summary.feature_count, count);
5140 assert_eq!(summary.bounds, expected);
5141 }
5142
5143 #[test]
5144 fn feature_bounds_summary_returns_none_for_empty_index() {
5145 let index_path = write_temp_index_path_with_prefix("empty-bounds-summary");
5146 let index = CityIndex::open(StorageLayout::Ndjson { paths: Vec::new() }, &index_path)
5147 .expect("index should open");
5148
5149 assert_eq!(
5150 index.feature_bounds_summary().expect("summary should load"),
5151 None
5152 );
5153 }
5154
5155 #[test]
5156 fn iter_all_feature_ref_pages_rejects_zero_page_size() {
5157 let root = write_temp_ndjson_root(&["alpha"]);
5158 let index_path = write_temp_index_path_with_prefix("page-size-zero");
5159 let mut index = CityIndex::open(StorageLayout::Ndjson { paths: vec![root] }, &index_path)
5160 .expect("index should open");
5161 index.reindex().expect("dataset should index");
5162
5163 match index.iter_all_feature_ref_pages(0) {
5164 Ok(_) => panic!("zero page size should be rejected"),
5165 Err(error) => assert!(error.to_string().contains("page_size")),
5166 }
5167 }
5168
5169 #[test]
5170 fn read_exact_range_reads_only_the_requested_span() {
5171 let path = write_temp_bytes(b"abcdefghij");
5172
5173 let bytes = read_exact_range(&path, 3, 4).expect("range read should succeed");
5174
5175 assert_eq!(bytes, b"defg");
5176 }
5177
5178 #[test]
5179 fn read_exact_range_rejects_short_reads() {
5180 let path = write_temp_bytes(b"abc");
5181
5182 let error = read_exact_range(&path, 2, 4).expect_err("range read should fail");
5183
5184 assert!(error.to_string().contains("short read"));
5185 }
5186
5187 #[test]
5188 fn read_exact_range_rejects_oversized_lengths() {
5189 let path = write_temp_bytes(b"abc");
5190
5191 let error = read_exact_range(&path, 0, u64::MAX).expect_err("range read should fail");
5192
5193 assert!(
5194 error
5195 .to_string()
5196 .contains("exceeds the supported buffer size")
5197 );
5198 }
5199
5200 #[test]
5201 fn feature_files_metadata_resolution_prefers_nearest_ancestor() {
5202 let root = PathBuf::from("/data/root");
5203 let mut metadata_by_dir = BTreeMap::new();
5204 metadata_by_dir.insert(root.clone(), root.join("metadata.json"));
5205 metadata_by_dir.insert(
5206 root.join("features/8"),
5207 root.join("features/8/metadata.json"),
5208 );
5209
5210 let feature_path = root.join("features/8/296/592/sample.city.jsonl");
5211 let resolved = resolve_feature_metadata_path(&root, &feature_path, &metadata_by_dir)
5212 .expect("metadata must resolve");
5213
5214 assert_eq!(resolved, root.join("features/8/metadata.json"));
5215 }
5216
5217 fn object_entry_fragment(object_id: &str, object: &Value) -> Vec<u8> {
5218 let mut map = Map::new();
5219 map.insert(object_id.to_owned(), object.clone());
5220 let serialized = serde_json::to_vec(&Value::Object(map)).expect("object entry");
5221 serialized[1..serialized.len() - 1].to_vec()
5222 }
5223
5224 fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
5225 haystack
5226 .windows(needle.len())
5227 .position(|window| window == needle)
5228 }
5229
5230 fn write_temp_cityjson(bytes: &[u8]) -> PathBuf {
5231 let unique = SystemTime::now()
5232 .duration_since(UNIX_EPOCH)
5233 .expect("system time")
5234 .as_nanos();
5235 let path =
5236 std::env::temp_dir().join(format!("cityjson-index-cityjson-read-one-{unique}.json"));
5237 fs::write(&path, bytes).expect("write temp cityjson");
5238 path
5239 }
5240
5241 fn write_temp_ndjson(metadata: &Value, feature: &Value) -> PathBuf {
5242 let unique = SystemTime::now()
5243 .duration_since(UNIX_EPOCH)
5244 .expect("system time")
5245 .as_nanos();
5246 let path = std::env::temp_dir().join(format!("cityjson-index-ndjson-{unique}.jsonl"));
5247 let contents = format!(
5248 "{}\n{}\n",
5249 serde_json::to_string(metadata).expect("metadata JSON"),
5250 serde_json::to_string(feature).expect("feature JSON")
5251 );
5252 fs::write(&path, contents).expect("write temp ndjson");
5253 path
5254 }
5255
5256 fn write_temp_index_path() -> PathBuf {
5257 let unique = SystemTime::now()
5258 .duration_since(UNIX_EPOCH)
5259 .expect("system time")
5260 .as_nanos();
5261 let path = std::env::temp_dir().join(format!("cityjson-index-ndjson-{unique}.sqlite"));
5262 if path.exists() {
5263 fs::remove_file(&path).expect("remove temp sqlite");
5264 }
5265 path
5266 }
5267
5268 fn write_temp_index_path_with_prefix(prefix: &str) -> PathBuf {
5269 let unique = SystemTime::now()
5270 .duration_since(UNIX_EPOCH)
5271 .expect("system time")
5272 .as_nanos();
5273 let path = std::env::temp_dir().join(format!("cityjson-index-{prefix}-{unique}.sqlite"));
5274 if path.exists() {
5275 fs::remove_file(&path).expect("remove temp sqlite");
5276 }
5277 path
5278 }
5279
5280 fn write_temp_feature_files_root(ids: &[&str]) -> PathBuf {
5281 let root = write_temp_dir("cityjson-index-feature-files");
5282 fs::write(
5283 root.join("metadata.json"),
5284 serde_json::to_vec(&base_document()).expect("metadata JSON"),
5285 )
5286 .expect("write metadata");
5287 for (idx, id) in ids.iter().enumerate() {
5288 let feature_path = root.join(format!("features/{idx:03}.city.jsonl"));
5289 let idx = i64::try_from(idx).expect("test index fits in i64");
5290 if let Some(parent) = feature_path.parent() {
5291 fs::create_dir_all(parent).expect("create feature directory");
5292 }
5293 fs::write(
5294 &feature_path,
5295 serde_json::to_vec(&feature_feature_document(id, idx)).expect("feature JSON"),
5296 )
5297 .expect("write feature file");
5298 }
5299 root
5300 }
5301
5302 fn write_temp_cityjson_root(ids: &[&str]) -> PathBuf {
5303 let root = write_temp_dir("cityjson-index-cityjson");
5304 let mut cityobjects = Map::new();
5305 for id in ids {
5306 cityobjects.insert((*id).to_owned(), feature_object(0));
5307 }
5308 let document = serde_json::json!({
5309 "type": "CityJSON",
5310 "version": "2.0",
5311 "transform": {
5312 "scale": [1.0, 1.0, 1.0],
5313 "translate": [0.0, 0.0, 0.0]
5314 },
5315 "metadata": {
5316 "referenceSystem": "https://www.opengis.net/def/crs/EPSG/0/7415"
5317 },
5318 "CityObjects": cityobjects,
5319 "vertices": [
5320 [0, 0, 0],
5321 [1, 0, 0],
5322 [0, 1, 0]
5323 ]
5324 });
5325 fs::write(
5326 root.join("dataset.city.json"),
5327 serde_json::to_vec(&document).expect("cityjson JSON"),
5328 )
5329 .expect("write cityjson");
5330 root
5331 }
5332
5333 fn write_temp_ndjson_root(ids: &[&str]) -> PathBuf {
5334 let root = write_temp_dir("cityjson-index-ndjson-root");
5335 let mut contents = serde_json::to_string(&base_document()).expect("metadata JSON");
5336 contents.push('\n');
5337 for (idx, id) in ids.iter().enumerate() {
5338 let idx = i64::try_from(idx).expect("test index fits in i64");
5339 contents.push_str(
5340 &serde_json::to_string(&feature_feature_document(id, idx)).expect("feature JSON"),
5341 );
5342 contents.push('\n');
5343 }
5344 fs::write(root.join("dataset.city.jsonl"), contents).expect("write ndjson");
5345 root
5346 }
5347
5348 fn write_temp_dir(prefix: &str) -> PathBuf {
5349 let unique = SystemTime::now()
5350 .duration_since(UNIX_EPOCH)
5351 .expect("system time")
5352 .as_nanos();
5353 let path = std::env::temp_dir().join(format!("{prefix}-{unique}"));
5354 fs::create_dir_all(&path).expect("create temp dir");
5355 path
5356 }
5357
5358 fn base_document() -> Value {
5359 serde_json::json!({
5360 "type": "CityJSON",
5361 "version": "2.0",
5362 "transform": {
5363 "scale": [1.0, 1.0, 1.0],
5364 "translate": [0.0, 0.0, 0.0]
5365 },
5366 "metadata": {
5367 "referenceSystem": "https://www.opengis.net/def/crs/EPSG/0/7415"
5368 },
5369 "CityObjects": {},
5370 "vertices": []
5371 })
5372 }
5373
5374 fn feature_feature_document(id: &str, offset: i64) -> Value {
5375 let object = feature_object(offset);
5376 serde_json::json!({
5377 "type": "CityJSONFeature",
5378 "id": id,
5379 "CityObjects": {
5380 id: object
5381 },
5382 "vertices": [
5383 [offset, 0, 0],
5384 [offset + 1, 0, 0],
5385 [offset, 1, 0]
5386 ]
5387 })
5388 }
5389
5390 fn feature_object(_offset: i64) -> Value {
5391 serde_json::json!({
5392 "type": "Building",
5393 "geometry": [{
5394 "type": "MultiSurface",
5395 "lod": "1.0",
5396 "boundaries": [[[0, 1, 2]]]
5397 }]
5398 })
5399 }
5400
5401 fn assert_full_scan_order(index: &CityIndex, expected_ids: &[&str]) {
5402 let ids = index
5403 .iter_all_with_ids()
5404 .expect("iter_all_with_ids should build")
5405 .collect::<Result<Vec<_>>>()
5406 .expect("iter_all_with_ids should collect");
5407 assert_eq!(
5408 ids.iter().map(|(id, _)| id.as_str()).collect::<Vec<_>>(),
5409 expected_ids
5410 );
5411
5412 let models = index
5413 .iter_all()
5414 .expect("iter_all should build")
5415 .collect::<Result<Vec<_>>>()
5416 .expect("iter_all should collect");
5417 assert_eq!(models.len(), expected_ids.len());
5418
5419 let models_with_metadata = index
5420 .iter_all_with_metadata()
5421 .expect("iter_all_with_metadata should build")
5422 .collect::<Result<Vec<_>>>()
5423 .expect("iter_all_with_metadata should collect");
5424 assert_eq!(models_with_metadata.len(), expected_ids.len());
5425 }
5426
5427 fn assert_full_scan_pages(index: &CityIndex, expected_ids: &[&str]) {
5428 let pages = index
5429 .iter_all_feature_ref_pages(2)
5430 .expect("iter_all_feature_ref_pages should build")
5431 .collect::<Result<Vec<_>>>()
5432 .expect("iter_all_feature_ref_pages should collect");
5433 assert_eq!(
5434 pages
5435 .iter()
5436 .flat_map(|page| page.iter().map(|feature| feature.feature_id.as_str()))
5437 .collect::<Vec<_>>(),
5438 expected_ids
5439 );
5440
5441 let bbox_pages = index
5442 .iter_all_bbox_pages(2)
5443 .expect("iter_all_bbox_pages should build")
5444 .collect::<Result<Vec<_>>>()
5445 .expect("iter_all_bbox_pages should collect");
5446 assert_eq!(
5447 bbox_pages
5448 .iter()
5449 .flat_map(|page| page.iter().map(|feature| feature.feature_id.as_str()))
5450 .collect::<Vec<_>>(),
5451 expected_ids
5452 );
5453
5454 for page in pages {
5455 for feature in page {
5456 let model = index
5457 .read_feature(&feature)
5458 .expect("feature should reconstruct");
5459 assert!(model_contains_id(&model, &feature.feature_id));
5460 assert_eq!(
5461 feature_bounds_for_model(&model).expect("bounds should be computable"),
5462 feature.bounds
5463 );
5464 }
5465 }
5466 }
5467
5468 fn assert_indexed_batch_preserves_order(
5469 index: &CityIndex,
5470 features: &[IndexedFeatureRef],
5471 ids: &[String],
5472 ) {
5473 let indexed_features = index
5474 .read_indexed_features(features)
5475 .expect("indexed feature batch should reconstruct");
5476 assert_eq!(
5477 indexed_features
5478 .iter()
5479 .map(|feature| feature.reference.feature_id.as_str())
5480 .collect::<Vec<_>>(),
5481 ids.iter()
5482 .take(features.len())
5483 .map(String::as_str)
5484 .collect::<Vec<_>>()
5485 );
5486 }
5487
5488 fn assert_decoded_scan_pages(index: &CityIndex, ids: &[String]) {
5489 let scan_pages = index
5490 .scan_feature_pages(128)
5491 .expect("scan_feature_pages should build")
5492 .collect::<Result<Vec<_>>>()
5493 .expect("scan_feature_pages should collect");
5494 assert_eq!(
5495 scan_pages.iter().map(Vec::len).collect::<Vec<_>>(),
5496 vec![128, 128, 128, 128, 88]
5497 );
5498 assert_eq!(
5499 scan_pages
5500 .iter()
5501 .flat_map(|page| {
5502 page.iter()
5503 .map(|feature| feature.reference.feature_id.as_str())
5504 })
5505 .collect::<Vec<_>>(),
5506 ids.iter().map(String::as_str).collect::<Vec<_>>()
5507 );
5508
5509 let scanned_ids = index
5510 .scan_features()
5511 .expect("scan_features should build")
5512 .map(|result| result.map(|feature| feature.reference.feature_id))
5513 .collect::<Result<Vec<_>>>()
5514 .expect("scan_features should collect");
5515 assert_eq!(scanned_ids, ids);
5516 }
5517
5518 fn assert_rowid_feature_reads(index: &CityIndex) {
5519 let first_ref = index
5520 .lookup_feature_ref_by_rowid(1)
5521 .expect("rowid lookup should load")
5522 .expect("first rowid should exist");
5523 assert_eq!(first_ref.feature_id, "feature-000");
5524 assert_eq!(
5525 index
5526 .lookup_feature_ref_by_rowid(9999)
5527 .expect("missing rowid lookup should load"),
5528 None
5529 );
5530
5531 let rowid_features = index
5532 .read_features_by_rowids(&[2, 9999, 1, 2])
5533 .expect("rowid batch should reconstruct");
5534 assert_eq!(
5535 rowid_features
5536 .iter()
5537 .map(|feature| feature
5538 .as_ref()
5539 .map(|feature| feature.reference.feature_id.as_str()))
5540 .collect::<Vec<_>>(),
5541 vec![
5542 Some("feature-001"),
5543 None,
5544 Some("feature-000"),
5545 Some("feature-001")
5546 ]
5547 );
5548
5549 let range_features = index
5550 .read_feature_range_after_rowid(Some(127), 3)
5551 .expect("rowid range should reconstruct");
5552 assert_eq!(
5553 range_features
5554 .iter()
5555 .map(|feature| feature.reference.feature_id.as_str())
5556 .collect::<Vec<_>>(),
5557 vec!["feature-127", "feature-128", "feature-129"]
5558 );
5559 }
5560
5561 fn model_contains_id(model: &CityModel, id: &str) -> bool {
5562 let value: Value =
5563 serde_json::from_str(&cityjson_lib::json::to_string(model).expect("serialize model"))
5564 .expect("model JSON");
5565 value["CityObjects"]
5566 .as_object()
5567 .is_some_and(|cityobjects| cityobjects.contains_key(id))
5568 }
5569
5570 fn feature_bounds_for_model(model: &CityModel) -> Result<FeatureBounds> {
5571 let value: Value =
5572 serde_json::from_str(&cityjson_lib::json::to_string(model).expect("serialize model"))
5573 .expect("model JSON");
5574 let vertices = value
5575 .get("vertices")
5576 .and_then(Value::as_array)
5577 .ok_or_else(|| import_error("model JSON is missing vertices"))?;
5578 let transform = value
5579 .get("transform")
5580 .and_then(Value::as_object)
5581 .ok_or_else(|| import_error("model JSON is missing transform"))?;
5582 let scale = parse_transform_component(transform, "scale")?;
5583 let translate = parse_transform_component(transform, "translate")?;
5584
5585 let mut min_x = f64::INFINITY;
5586 let mut max_x = f64::NEG_INFINITY;
5587 let mut min_y = f64::INFINITY;
5588 let mut max_y = f64::NEG_INFINITY;
5589 let mut min_z = f64::INFINITY;
5590 let mut max_z = f64::NEG_INFINITY;
5591
5592 for vertex in vertices {
5593 let coords = vertex
5594 .as_array()
5595 .ok_or_else(|| import_error("vertex must be an array"))?;
5596 if coords.len() != 3 {
5597 return Err(import_error("vertex must have three coordinates"));
5598 }
5599 let x = translate[0] + scale[0] * value_as_f64(&coords[0])?;
5600 let y = translate[1] + scale[1] * value_as_f64(&coords[1])?;
5601 let z = translate[2] + scale[2] * value_as_f64(&coords[2])?;
5602 min_x = min_x.min(x);
5603 max_x = max_x.max(x);
5604 min_y = min_y.min(y);
5605 max_y = max_y.max(y);
5606 min_z = min_z.min(z);
5607 max_z = max_z.max(z);
5608 }
5609
5610 if !min_x.is_finite()
5611 || !max_x.is_finite()
5612 || !min_y.is_finite()
5613 || !max_y.is_finite()
5614 || !min_z.is_finite()
5615 || !max_z.is_finite()
5616 {
5617 return Err(import_error(
5618 "could not compute a finite bbox from the model",
5619 ));
5620 }
5621
5622 Ok(FeatureBounds {
5623 min_x,
5624 max_x,
5625 min_y,
5626 max_y,
5627 min_z,
5628 max_z,
5629 })
5630 }
5631
5632 fn parse_transform_component(
5633 transform: &serde_json::Map<String, Value>,
5634 key: &str,
5635 ) -> Result<[f64; 3]> {
5636 let values = transform
5637 .get(key)
5638 .and_then(Value::as_array)
5639 .ok_or_else(|| import_error(format!("transform is missing {key}")))?;
5640 if values.len() != 3 {
5641 return Err(import_error(format!(
5642 "transform {key} must contain three values"
5643 )));
5644 }
5645 Ok([
5646 value_as_f64(&values[0])?,
5647 value_as_f64(&values[1])?,
5648 value_as_f64(&values[2])?,
5649 ])
5650 }
5651
5652 fn value_as_f64(value: &Value) -> Result<f64> {
5653 value
5654 .as_f64()
5655 .ok_or_else(|| import_error("expected a numeric value"))
5656 }
5657
5658 fn write_temp_bytes(bytes: &[u8]) -> PathBuf {
5659 let unique = SystemTime::now()
5660 .duration_since(UNIX_EPOCH)
5661 .expect("system time")
5662 .as_nanos();
5663 let path = std::env::temp_dir().join(format!("cityjson-index-range-read-{unique}.bin"));
5664 fs::write(&path, bytes).expect("write temp bytes");
5665 path
5666 }
5667}