Skip to main content

cityjson_index/
lib.rs

1pub mod benchmark;
2pub mod profile;
3
4use std::collections::{BTreeMap, BTreeSet, HashMap};
5use std::fs;
6use std::io::{ErrorKind, Read, Seek, SeekFrom};
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex};
9use std::time::UNIX_EPOCH;
10
11use cityjson_lib::json::staged;
12use cityjson_lib::{CityModel, Error, Result};
13use globset::GlobMatcher;
14use ignore::WalkBuilder;
15use lru::LruCache;
16use rusqlite::{OptionalExtension, params};
17use serde::de::DeserializeOwned;
18use serde::{Deserialize, Serialize};
19use serde_json::value::RawValue;
20use serde_json::{Map, Number, Value};
21
22#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
23pub struct BBox {
24    pub min_x: f64,
25    pub max_x: f64,
26    pub min_y: f64,
27    pub max_y: f64,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
31pub struct FeatureBounds {
32    pub min_x: f64,
33    pub max_x: f64,
34    pub min_y: f64,
35    pub max_y: f64,
36    pub min_z: f64,
37    pub max_z: f64,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
41pub struct FeatureBoundsSummary {
42    pub bounds: FeatureBounds,
43    pub feature_count: usize,
44}
45
46impl FeatureBounds {
47    #[must_use]
48    pub fn bbox_2d(self) -> BBox {
49        BBox {
50            min_x: self.min_x,
51            max_x: self.max_x,
52            min_y: self.min_y,
53            max_y: self.max_y,
54        }
55    }
56}
57
58pub struct CityIndex {
59    index: Index,
60    backend: Box<dyn StorageBackend>,
61}
62
63pub const WORKER_COUNT_ENV: &str = "CITYJSON_INDEX_WORKERS";
64const DEFAULT_SCAN_PAGE_SIZE: usize = 512;
65
66#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
67pub struct IndexedFeatureRef {
68    pub row_id: i64,
69    pub feature_id: String,
70    pub source_id: i64,
71    pub source_path: PathBuf,
72    pub offset: u64,
73    pub length: u64,
74    pub vertices_offset: Option<u64>,
75    pub vertices_length: Option<u64>,
76    pub member_ranges_json: Option<String>,
77    pub bounds: FeatureBounds,
78}
79
80pub struct IndexedFeature {
81    pub reference: IndexedFeatureRef,
82    pub model: CityModel,
83}
84
85#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
86pub enum LodSelection {
87    #[default]
88    All,
89    Highest,
90    Exact(String),
91}
92
93#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
94pub struct FeatureFilter {
95    pub cityobject_types: Option<BTreeSet<String>>,
96    pub default_lod: LodSelection,
97    pub lods_by_type: BTreeMap<String, LodSelection>,
98}
99
100#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
101pub struct MissingLodSelection {
102    pub cityobject_type: String,
103    pub requested_lod: String,
104    pub available_lods: BTreeSet<String>,
105}
106
107#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
108pub struct FeatureFilterDiagnostics {
109    pub available_types: BTreeSet<String>,
110    pub retained_types: BTreeSet<String>,
111    pub ignored_types: BTreeSet<String>,
112    pub available_lods: BTreeMap<String, BTreeSet<String>>,
113    pub retained_lods: BTreeMap<String, BTreeSet<String>>,
114    pub missing_lods: Vec<MissingLodSelection>,
115    pub retained_geometry_count: usize,
116}
117
118#[derive(Debug, Clone)]
119pub struct FilteredFeature {
120    pub model: CityModel,
121    pub diagnostics: FeatureFilterDiagnostics,
122}
123
124#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
125pub struct FeatureFilterSummary {
126    pub available_types: BTreeSet<String>,
127    pub retained_types: BTreeSet<String>,
128    pub ignored_types: BTreeSet<String>,
129    pub available_lods: BTreeMap<String, BTreeSet<String>>,
130    pub retained_lods: BTreeMap<String, BTreeSet<String>>,
131    pub missing_lods: BTreeMap<String, MissingLodSelection>,
132    pub retained_feature_count: usize,
133    pub ignored_feature_count: usize,
134}
135
136impl IndexedFeatureRef {
137    fn to_location(&self) -> FeatureLocation {
138        FeatureLocation {
139            feature_id: self.feature_id.clone(),
140            source_id: self.source_id,
141            source_path: self.source_path.clone(),
142            offset: self.offset,
143            length: self.length,
144            vertices_offset: self.vertices_offset,
145            vertices_length: self.vertices_length,
146            member_ranges_json: self.member_ranges_json.clone(),
147        }
148    }
149}
150
151impl FeatureFilter {
152    #[must_use]
153    pub fn is_active(&self) -> bool {
154        self.cityobject_types.is_some()
155            || self.default_lod != LodSelection::All
156            || self
157                .lods_by_type
158                .values()
159                .any(|selection| *selection != LodSelection::All)
160    }
161
162    /// Applies this filter to a single decoded `CityJSON` feature.
163    ///
164    /// Object-type selection keeps descendants of selected `CityObjects`. This
165    /// preserves common `CityJSON` features where a selected parent object, such
166    /// as `Building`, carries its renderable geometry on child objects.
167    ///
168    /// # Errors
169    ///
170    /// Returns an error if the source model contains invalid references or
171    /// `cityjson-lib` extraction fails.
172    pub fn apply(&self, model: &CityModel) -> Result<FilteredFeature> {
173        let retained_handles = retained_cityobject_handles(model, self)?;
174        let diagnostics = filter_diagnostics(model, &retained_handles, self);
175
176        if !self.is_active() {
177            return Ok(FilteredFeature {
178                model: model.clone(),
179                diagnostics,
180            });
181        }
182
183        let type_selection = self
184            .cityobject_types
185            .as_ref()
186            .map(|_| {
187                cityjson_lib::ops::select_cityobjects(model, |ctx| {
188                    retained_handles.contains(&ctx.handle())
189                })
190            })
191            .transpose()?;
192
193        let lod_selection = lod_selection(model, &retained_handles, self)?;
194        let selection = match (type_selection, lod_selection) {
195            (Some(types), Some(lods)) => types.intersection(&lods),
196            (Some(types), None) => types,
197            (None, Some(lods)) => lods,
198            (None, None) => {
199                return Ok(FilteredFeature {
200                    model: model.clone(),
201                    diagnostics,
202                });
203            }
204        };
205
206        let filtered = extract_or_empty_feature(model, &selection)?;
207        Ok(FilteredFeature {
208            model: filtered,
209            diagnostics,
210        })
211    }
212}
213
214impl FeatureFilterSummary {
215    pub fn add(&mut self, diagnostics: &FeatureFilterDiagnostics) {
216        self.available_types
217            .extend(diagnostics.available_types.iter().cloned());
218        self.retained_types
219            .extend(diagnostics.retained_types.iter().cloned());
220        self.ignored_types
221            .extend(diagnostics.ignored_types.iter().cloned());
222        merge_lod_sets(&mut self.available_lods, &diagnostics.available_lods);
223        merge_lod_sets(&mut self.retained_lods, &diagnostics.retained_lods);
224        for missing in &diagnostics.missing_lods {
225            self.missing_lods
226                .entry(missing.cityobject_type.clone())
227                .or_insert_with(|| missing.clone());
228        }
229        if diagnostics.retained_geometry_count == 0 {
230            self.ignored_feature_count += 1;
231        } else {
232            self.retained_feature_count += 1;
233        }
234    }
235
236    #[must_use]
237    pub fn requested_lod_failures(&self, filter: &FeatureFilter) -> Vec<MissingLodSelection> {
238        filter
239            .lods_by_type
240            .iter()
241            .filter_map(|(cityobject_type, selection)| {
242                let LodSelection::Exact(requested_lod) = selection else {
243                    return None;
244                };
245                let eligible = self.available_lods.contains_key(cityobject_type)
246                    || self.retained_types.contains(cityobject_type)
247                    || filter
248                        .cityobject_types
249                        .as_ref()
250                        .is_none_or(|types| types.contains(cityobject_type));
251                if !eligible {
252                    return None;
253                }
254                let available_lods = self
255                    .available_lods
256                    .get(cityobject_type)
257                    .cloned()
258                    .unwrap_or_default();
259                if available_lods.contains(requested_lod) {
260                    return None;
261                }
262                Some(MissingLodSelection {
263                    cityobject_type: cityobject_type.clone(),
264                    requested_lod: requested_lod.clone(),
265                    available_lods,
266                })
267            })
268            .collect()
269    }
270
271    /// # Errors
272    ///
273    /// Returns an error when any explicit `LoD` selector did not match the
274    /// scanned filtered dataset.
275    pub fn ensure_requested_lods_available(&self, filter: &FeatureFilter) -> Result<()> {
276        let failures = self.requested_lod_failures(filter);
277        if failures.is_empty() {
278            return Ok(());
279        }
280
281        let details = failures
282            .iter()
283            .map(|missing| {
284                let available = if missing.available_lods.is_empty() {
285                    "none".to_owned()
286                } else {
287                    missing
288                        .available_lods
289                        .iter()
290                        .cloned()
291                        .collect::<Vec<_>>()
292                        .join(", ")
293                };
294                format!(
295                    "{} requested LoD '{}' but available LoDs are: {}",
296                    missing.cityobject_type, missing.requested_lod, available
297                )
298            })
299            .collect::<Vec<_>>()
300            .join("; ");
301        Err(import_error(format!(
302            "requested LoD selector matched no geometry: {details}"
303        )))
304    }
305}
306
307fn merge_lod_sets(
308    target: &mut BTreeMap<String, BTreeSet<String>>,
309    source: &BTreeMap<String, BTreeSet<String>>,
310) {
311    for (cityobject_type, lods) in source {
312        target
313            .entry(cityobject_type.clone())
314            .or_default()
315            .extend(lods.iter().cloned());
316    }
317}
318
319type CityObjectHandle = cityjson_types::prelude::CityObjectHandle;
320type GeometryHandle = cityjson_types::prelude::GeometryHandle;
321
322fn retained_cityobject_handles(
323    model: &CityModel,
324    filter: &FeatureFilter,
325) -> Result<BTreeSet<CityObjectHandle>> {
326    let Some(selected_types) = filter.cityobject_types.as_ref() else {
327        return Ok(model
328            .cityobjects()
329            .iter()
330            .map(|(handle, _)| handle)
331            .collect());
332    };
333
334    let mut retained = BTreeSet::new();
335    for (handle, cityobject) in model.cityobjects().iter() {
336        if selected_types.contains(&cityobject.type_cityobject().to_string()) {
337            collect_cityobject_descendants(model, handle, &mut retained)?;
338        }
339    }
340    Ok(retained)
341}
342
343fn collect_cityobject_descendants(
344    model: &CityModel,
345    handle: CityObjectHandle,
346    retained: &mut BTreeSet<CityObjectHandle>,
347) -> Result<()> {
348    if !retained.insert(handle) {
349        return Ok(());
350    }
351    let cityobject = model.cityobjects().get(handle).ok_or_else(|| {
352        import_error(format!(
353            "missing CityObject handle in filter traversal: {handle:?}"
354        ))
355    })?;
356    if let Some(children) = cityobject.children() {
357        for child in children {
358            collect_cityobject_descendants(model, *child, retained)?;
359        }
360    }
361    Ok(())
362}
363
364fn lod_selection(
365    model: &CityModel,
366    retained_handles: &BTreeSet<CityObjectHandle>,
367    filter: &FeatureFilter,
368) -> Result<Option<cityjson_lib::ops::ModelSelection>> {
369    if filter.default_lod == LodSelection::All
370        && filter
371            .lods_by_type
372            .values()
373            .all(|selection| *selection == LodSelection::All)
374    {
375        return Ok(None);
376    }
377
378    let highest_lods = highest_lods_by_cityobject(model, retained_handles);
379    cityjson_lib::ops::select_geometries(model, |ctx| {
380        if !retained_handles.contains(&ctx.cityobject_handle()) {
381            return false;
382        }
383        let cityobject_type = ctx.cityobject().type_cityobject().to_string();
384        let selection = filter
385            .lods_by_type
386            .get(&cityobject_type)
387            .unwrap_or(&filter.default_lod);
388        geometry_matches_lod_selection(
389            ctx.geometry().lod(),
390            highest_lods.get(&ctx.cityobject_handle()),
391            selection,
392        )
393    })
394    .map(Some)
395}
396
397fn geometry_matches_lod_selection(
398    geometry_lod: Option<&cityjson_types::v2_0::LoD>,
399    highest_lod: Option<&String>,
400    selection: &LodSelection,
401) -> bool {
402    match selection {
403        LodSelection::All => true,
404        LodSelection::Highest => geometry_lod
405            .is_some_and(|lod| highest_lod.is_some_and(|highest| lod.to_string() == *highest)),
406        LodSelection::Exact(selected_lod) => {
407            geometry_lod.is_some_and(|lod| lod.to_string() == *selected_lod)
408        }
409    }
410}
411
412fn highest_lods_by_cityobject(
413    model: &CityModel,
414    retained_handles: &BTreeSet<CityObjectHandle>,
415) -> BTreeMap<CityObjectHandle, String> {
416    let mut highest = BTreeMap::new();
417    for handle in retained_handles {
418        let Some(cityobject) = model.cityobjects().get(*handle) else {
419            continue;
420        };
421        let Some(geometry_handles) = cityobject.geometry() else {
422            continue;
423        };
424        if let Some(lod) = highest_lod(model, geometry_handles) {
425            highest.insert(*handle, lod);
426        }
427    }
428    highest
429}
430
431fn highest_lod(model: &CityModel, geometries: &[GeometryHandle]) -> Option<String> {
432    geometries
433        .iter()
434        .filter_map(|geometry_handle| {
435            model
436                .get_geometry(*geometry_handle)
437                .and_then(|geometry| geometry.lod())
438                .map(std::string::ToString::to_string)
439        })
440        .max_by(|lhs, rhs| compare_lod_strings(lhs, rhs))
441}
442
443fn compare_lod_strings(lhs: &str, rhs: &str) -> std::cmp::Ordering {
444    match (lhs.parse::<f64>(), rhs.parse::<f64>()) {
445        (Ok(lhs), Ok(rhs)) => lhs.partial_cmp(&rhs).unwrap_or(std::cmp::Ordering::Equal),
446        _ => lhs.cmp(rhs),
447    }
448}
449
450fn filter_diagnostics(
451    model: &CityModel,
452    retained_handles: &BTreeSet<CityObjectHandle>,
453    filter: &FeatureFilter,
454) -> FeatureFilterDiagnostics {
455    let highest_lods = highest_lods_by_cityobject(model, retained_handles);
456    let mut diagnostics = FeatureFilterDiagnostics::default();
457
458    for (handle, cityobject) in model.cityobjects().iter() {
459        let cityobject_type = cityobject.type_cityobject().to_string();
460        diagnostics.available_types.insert(cityobject_type.clone());
461        if retained_handles.contains(&handle) {
462            diagnostics.retained_types.insert(cityobject_type.clone());
463        } else {
464            diagnostics.ignored_types.insert(cityobject_type.clone());
465            continue;
466        }
467
468        let Some(geometry_handles) = cityobject.geometry() else {
469            continue;
470        };
471        let selection = filter
472            .lods_by_type
473            .get(&cityobject_type)
474            .unwrap_or(&filter.default_lod);
475        for geometry_handle in geometry_handles {
476            let Some(geometry_lod) = model
477                .get_geometry(*geometry_handle)
478                .and_then(|geometry| geometry.lod())
479            else {
480                continue;
481            };
482            let lod = geometry_lod.to_string();
483            diagnostics
484                .available_lods
485                .entry(cityobject_type.clone())
486                .or_default()
487                .insert(lod.clone());
488            if geometry_matches_lod_selection(
489                Some(geometry_lod),
490                highest_lods.get(&handle),
491                selection,
492            ) {
493                diagnostics
494                    .retained_lods
495                    .entry(cityobject_type.clone())
496                    .or_default()
497                    .insert(lod);
498                diagnostics.retained_geometry_count += 1;
499            }
500        }
501    }
502
503    for (cityobject_type, selection) in &filter.lods_by_type {
504        let LodSelection::Exact(requested_lod) = selection else {
505            continue;
506        };
507        if !diagnostics.retained_types.contains(cityobject_type) {
508            continue;
509        }
510        let available_lods = diagnostics
511            .available_lods
512            .get(cityobject_type)
513            .cloned()
514            .unwrap_or_default();
515        if !available_lods.contains(requested_lod) {
516            diagnostics.missing_lods.push(MissingLodSelection {
517                cityobject_type: cityobject_type.clone(),
518                requested_lod: requested_lod.clone(),
519                available_lods,
520            });
521        }
522    }
523
524    diagnostics
525}
526
527fn extract_or_empty_feature(
528    model: &CityModel,
529    selection: &cityjson_lib::ops::ModelSelection,
530) -> Result<CityModel> {
531    if !selection.is_empty() {
532        return cityjson_lib::ops::extract(model, selection);
533    }
534
535    let mut empty = model.clone();
536    empty.clear_cityobjects();
537    empty.set_id(None);
538    Ok(empty)
539}
540
541#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
542pub enum DatasetLayoutKind {
543    #[serde(rename = "ndjson")]
544    Ndjson,
545    #[serde(rename = "cityjson")]
546    CityJson,
547    #[serde(rename = "feature-files")]
548    FeatureFiles,
549}
550
551impl DatasetLayoutKind {
552    #[must_use]
553    pub fn as_str(self) -> &'static str {
554        match self {
555            Self::Ndjson => "ndjson",
556            Self::CityJson => "cityjson",
557            Self::FeatureFiles => "feature-files",
558        }
559    }
560}
561
562#[derive(Debug, Clone, Serialize, Deserialize)]
563pub struct ManifestSummary {
564    pub path: PathBuf,
565    pub selected_tile_count: Option<usize>,
566    pub total_features: Option<usize>,
567    pub total_cityobjects: Option<usize>,
568}
569
570#[derive(Debug, Clone)]
571pub struct ResolvedDataset {
572    pub dataset_root: PathBuf,
573    pub index_path: PathBuf,
574    pub layout: DatasetLayoutKind,
575    pub manifest: Option<ManifestSummary>,
576    storage_layout: StorageLayout,
577    source_paths: Vec<PathBuf>,
578    feature_file_paths: Vec<PathBuf>,
579}
580
581#[derive(Debug, Clone, Serialize, Deserialize)]
582pub struct IndexStatus {
583    pub path: PathBuf,
584    pub exists: bool,
585    pub index_mtime_ns: Option<i64>,
586    pub indexed_source_count: Option<usize>,
587    pub indexed_feature_count: Option<usize>,
588    pub indexed_cityobject_count: Option<usize>,
589    pub fresh: Option<bool>,
590    pub covered: Option<bool>,
591    pub needs_reindex: bool,
592    pub missing_source_paths: Vec<PathBuf>,
593    pub unindexed_source_paths: Vec<PathBuf>,
594    pub changed_source_paths: Vec<PathBuf>,
595    pub missing_feature_paths: Vec<PathBuf>,
596    pub unindexed_feature_paths: Vec<PathBuf>,
597    pub changed_feature_paths: Vec<PathBuf>,
598    pub issues: Vec<String>,
599}
600
601#[derive(Debug, Clone, Serialize, Deserialize)]
602pub struct DatasetInspection {
603    pub dataset_root: PathBuf,
604    pub layout: DatasetLayoutKind,
605    pub manifest: Option<ManifestSummary>,
606    pub detected_source_count: usize,
607    pub detected_feature_file_count: usize,
608    pub index: IndexStatus,
609}
610
611#[derive(Debug, Clone, Serialize, Deserialize)]
612pub struct ValidationReport {
613    pub ok: bool,
614    pub inspection: DatasetInspection,
615}
616
617#[derive(Debug, Clone)]
618pub enum StorageLayout {
619    Ndjson {
620        paths: Vec<PathBuf>,
621    },
622    CityJson {
623        paths: Vec<PathBuf>,
624    },
625    FeatureFiles {
626        root: PathBuf,
627        metadata_glob: String,
628        feature_glob: String,
629    },
630}
631
632impl StorageLayout {
633    #[must_use]
634    pub fn layout_kind(&self) -> DatasetLayoutKind {
635        match self {
636            Self::Ndjson { .. } => DatasetLayoutKind::Ndjson,
637            Self::CityJson { .. } => DatasetLayoutKind::CityJson,
638            Self::FeatureFiles { .. } => DatasetLayoutKind::FeatureFiles,
639        }
640    }
641}
642
643impl ResolvedDataset {
644    #[must_use]
645    pub fn storage_layout(&self) -> StorageLayout {
646        self.storage_layout.clone()
647    }
648
649    #[must_use]
650    pub fn source_paths(&self) -> &[PathBuf] {
651        &self.source_paths
652    }
653
654    #[must_use]
655    pub fn feature_file_paths(&self) -> &[PathBuf] {
656        &self.feature_file_paths
657    }
658
659    /// Inspects the resolved dataset and its current index sidecar.
660    ///
661    /// # Errors
662    ///
663    /// Returns an error if the dataset or index cannot be inspected.
664    pub fn inspect(&self) -> Result<DatasetInspection> {
665        inspect_resolved_dataset(self)
666    }
667
668    /// Validates the resolved dataset and returns a structured report.
669    ///
670    /// # Errors
671    ///
672    /// Returns an error if the dataset or index cannot be inspected.
673    pub fn validate(&self) -> Result<ValidationReport> {
674        let inspection = self.inspect()?;
675        let ok = inspection.index.issues.is_empty();
676        Ok(ValidationReport { ok, inspection })
677    }
678}
679
680#[allow(clippy::too_many_lines)]
681fn inspect_resolved_dataset(resolved: &ResolvedDataset) -> Result<DatasetInspection> {
682    let mut status = IndexStatus {
683        path: resolved.index_path.clone(),
684        exists: resolved.index_path.exists(),
685        index_mtime_ns: None,
686        indexed_source_count: None,
687        indexed_feature_count: None,
688        indexed_cityobject_count: None,
689        fresh: None,
690        covered: None,
691        needs_reindex: false,
692        missing_source_paths: Vec::new(),
693        unindexed_source_paths: Vec::new(),
694        changed_source_paths: Vec::new(),
695        missing_feature_paths: Vec::new(),
696        unindexed_feature_paths: Vec::new(),
697        changed_feature_paths: Vec::new(),
698        issues: Vec::new(),
699    };
700
701    if status.exists {
702        let (_, mtime_ns) = file_status(&resolved.index_path)?;
703        status.index_mtime_ns = Some(mtime_ns);
704
705        let index = Index::open(&resolved.index_path)?;
706        status.indexed_source_count = Some(index.source_count()?);
707        status.indexed_feature_count = Some(index.feature_count()?);
708        status.indexed_cityobject_count = Some(index.cityobject_count()?);
709        if !index.feature_bounds_complete()? {
710            status.needs_reindex = true;
711            status
712                .issues
713                .push("index is missing persisted z bounds; run cjindex reindex".to_owned());
714        }
715
716        let indexed_sources = index.indexed_sources()?;
717        let current_sources = collect_current_file_statuses(&resolved.source_paths)?;
718        compare_path_statuses(
719            &current_sources,
720            &indexed_sources,
721            &mut status.missing_source_paths,
722            &mut status.unindexed_source_paths,
723            &mut status.changed_source_paths,
724            &mut status.needs_reindex,
725        );
726
727        if resolved.layout == DatasetLayoutKind::FeatureFiles {
728            let indexed_features = index.indexed_feature_paths()?;
729            let current_features = collect_current_file_statuses(&resolved.feature_file_paths)?;
730            compare_feature_statuses(
731                &current_features,
732                &indexed_features,
733                &mut status.missing_feature_paths,
734                &mut status.unindexed_feature_paths,
735                &mut status.changed_feature_paths,
736                &mut status.needs_reindex,
737            );
738        }
739
740        if let Some(manifest) = &resolved.manifest {
741            if let Some(expected_features) = manifest.total_features
742                && status.indexed_feature_count != Some(expected_features)
743            {
744                status.issues.push(format!(
745                    "indexed feature count {} does not match manifest count {}",
746                    status.indexed_feature_count.unwrap_or(0),
747                    expected_features
748                ));
749            }
750            if let Some(expected_cityobjects) = manifest.total_cityobjects
751                && status.indexed_cityobject_count != Some(expected_cityobjects)
752            {
753                status.issues.push(format!(
754                    "indexed CityObject count {} does not match manifest count {}",
755                    status.indexed_cityobject_count.unwrap_or(0),
756                    expected_cityobjects
757                ));
758            }
759            if let Some(expected_sources) = manifest.selected_tile_count
760                && resolved.layout != DatasetLayoutKind::FeatureFiles
761                && status.indexed_source_count != Some(expected_sources)
762            {
763                status.issues.push(format!(
764                    "indexed source count {} does not match manifest tile count {}",
765                    status.indexed_source_count.unwrap_or(0),
766                    expected_sources
767                ));
768            }
769        }
770
771        if let Some(source_count) = status.indexed_source_count
772            && source_count != resolved.source_paths.len()
773        {
774            status.issues.push(format!(
775                "indexed source count {} does not match detected source count {}",
776                source_count,
777                resolved.source_paths.len()
778            ));
779        }
780
781        if !status.missing_source_paths.is_empty() {
782            status.issues.push(format!(
783                "{} indexed source files are missing on disk",
784                status.missing_source_paths.len()
785            ));
786        }
787        if !status.unindexed_source_paths.is_empty() {
788            status.issues.push(format!(
789                "{} detected source files are missing from the index",
790                status.unindexed_source_paths.len()
791            ));
792        }
793        if !status.changed_source_paths.is_empty() {
794            status.issues.push(format!(
795                "{} indexed source files changed size or mtime",
796                status.changed_source_paths.len()
797            ));
798        }
799        if !status.missing_feature_paths.is_empty() {
800            status.issues.push(format!(
801                "{} indexed feature files are missing on disk",
802                status.missing_feature_paths.len()
803            ));
804        }
805        if !status.unindexed_feature_paths.is_empty() {
806            status.issues.push(format!(
807                "{} detected feature files are missing from the index",
808                status.unindexed_feature_paths.len()
809            ));
810        }
811        if !status.changed_feature_paths.is_empty() {
812            status.issues.push(format!(
813                "{} indexed feature files changed size or mtime",
814                status.changed_feature_paths.len()
815            ));
816        }
817        if status.needs_reindex {
818            status.issues.push(
819                "index is missing persisted freshness metadata; run cjindex reindex".to_owned(),
820            );
821        }
822
823        status.covered = Some(
824            status.missing_source_paths.is_empty()
825                && status.unindexed_source_paths.is_empty()
826                && status.missing_feature_paths.is_empty()
827                && status.unindexed_feature_paths.is_empty(),
828        );
829        status.fresh = Some(
830            status.covered == Some(true)
831                && status.changed_source_paths.is_empty()
832                && status.changed_feature_paths.is_empty()
833                && !status.needs_reindex,
834        );
835    } else {
836        status.issues.push(format!(
837            "index {} does not exist",
838            resolved.index_path.display()
839        ));
840    }
841
842    Ok(DatasetInspection {
843        dataset_root: resolved.dataset_root.clone(),
844        layout: resolved.layout,
845        manifest: resolved.manifest.clone(),
846        detected_source_count: resolved.source_paths.len(),
847        detected_feature_file_count: resolved.feature_file_paths.len(),
848        index: status,
849    })
850}
851
852fn resolve_manifest_summary(dataset_root: &Path) -> Result<Option<ManifestSummary>> {
853    let candidates = [
854        dataset_root.join("manifest.json"),
855        dataset_root.parent().map_or_else(
856            || dataset_root.join("manifest.json"),
857            |parent| parent.join("manifest.json"),
858        ),
859    ];
860    for candidate in candidates {
861        if !candidate.exists() {
862            continue;
863        }
864        let manifest: Value = read_json(&candidate)?;
865        let selected_tile_count = manifest
866            .get("selected_tiles")
867            .and_then(Value::as_array)
868            .map(Vec::len);
869        let total_features = manifest
870            .get("total_features")
871            .and_then(Value::as_u64)
872            .map(usize::try_from)
873            .transpose()
874            .map_err(|_| import_error("manifest total_features does not fit in usize"))?;
875        let total_cityobjects = manifest
876            .get("total_cityobjects")
877            .and_then(Value::as_u64)
878            .map(usize::try_from)
879            .transpose()
880            .map_err(|_| import_error("manifest total_cityobjects does not fit in usize"))?;
881        return Ok(Some(ManifestSummary {
882            path: candidate,
883            selected_tile_count,
884            total_features,
885            total_cityobjects,
886        }));
887    }
888    Ok(None)
889}
890
891fn collect_current_file_statuses(paths: &[PathBuf]) -> Result<BTreeMap<PathBuf, (u64, i64)>> {
892    paths
893        .iter()
894        .map(|path| file_status(path).map(|status| (path.clone(), status)))
895        .collect()
896}
897
898fn compare_path_statuses(
899    current: &BTreeMap<PathBuf, (u64, i64)>,
900    indexed: &[IndexedSourceRecord],
901    missing_on_disk: &mut Vec<PathBuf>,
902    missing_from_index: &mut Vec<PathBuf>,
903    changed: &mut Vec<PathBuf>,
904    needs_reindex: &mut bool,
905) {
906    let indexed_by_path = indexed
907        .iter()
908        .map(|record| {
909            (
910                record.path.clone(),
911                (record.source_size, record.source_mtime_ns),
912            )
913        })
914        .collect::<BTreeMap<_, _>>();
915
916    for path in current.keys() {
917        if !indexed_by_path.contains_key(path) {
918            missing_from_index.push(path.clone());
919        }
920    }
921
922    for (path, (expected_size, expected_mtime_ns)) in indexed_by_path {
923        let Some((current_size, current_mtime_ns)) = current.get(&path) else {
924            missing_on_disk.push(path);
925            continue;
926        };
927        let Some(expected_size) = expected_size else {
928            *needs_reindex = true;
929            continue;
930        };
931        let Some(expected_mtime_ns) = expected_mtime_ns else {
932            *needs_reindex = true;
933            continue;
934        };
935        if expected_size != *current_size || expected_mtime_ns != *current_mtime_ns {
936            changed.push(path);
937        }
938    }
939}
940
941fn compare_feature_statuses(
942    current: &BTreeMap<PathBuf, (u64, i64)>,
943    indexed: &[IndexedFeaturePathRecord],
944    missing_on_disk: &mut Vec<PathBuf>,
945    missing_from_index: &mut Vec<PathBuf>,
946    changed: &mut Vec<PathBuf>,
947    needs_reindex: &mut bool,
948) {
949    let indexed_by_path = indexed
950        .iter()
951        .map(|record| {
952            (
953                record.path.clone(),
954                (record.file_size, record.file_mtime_ns),
955            )
956        })
957        .collect::<BTreeMap<_, _>>();
958
959    for path in current.keys() {
960        if !indexed_by_path.contains_key(path) {
961            missing_from_index.push(path.clone());
962        }
963    }
964
965    for (path, (expected_size, expected_mtime_ns)) in indexed_by_path {
966        let Some((current_size, current_mtime_ns)) = current.get(&path) else {
967            missing_on_disk.push(path);
968            continue;
969        };
970        let Some(expected_size) = expected_size else {
971            *needs_reindex = true;
972            continue;
973        };
974        let Some(expected_mtime_ns) = expected_mtime_ns else {
975            *needs_reindex = true;
976            continue;
977        };
978        if expected_size != *current_size || expected_mtime_ns != *current_mtime_ns {
979            changed.push(path);
980        }
981    }
982}
983
984/// Resolves a dataset directory into one concrete storage layout plus the
985/// effective sidecar index location.
986///
987/// # Errors
988///
989/// Returns an error if the directory does not exist, no known layout matches,
990/// or multiple layouts match.
991pub fn resolve_dataset(
992    dataset_dir: &Path,
993    index_override: Option<PathBuf>,
994) -> Result<ResolvedDataset> {
995    let dataset_root = fs::canonicalize(dataset_dir).map_err(|error| {
996        import_error(format!(
997            "failed to resolve dataset directory {}: {error}",
998            dataset_dir.display()
999        ))
1000    })?;
1001    if !dataset_root.is_dir() {
1002        return Err(import_error(format!(
1003            "dataset path {} is not a directory",
1004            dataset_root.display()
1005        )));
1006    }
1007
1008    let roots = vec![dataset_root.clone()];
1009    let ndjson_paths = collect_layout_files(&roots, ".city.jsonl")?;
1010    let cityjson_paths = collect_layout_files(&roots, ".city.json")?;
1011    let metadata_paths = collect_layout_files(&roots, "metadata.json")?;
1012    let feature_file_paths = if metadata_paths.is_empty() {
1013        Vec::new()
1014    } else {
1015        ndjson_paths.clone()
1016    };
1017
1018    let feature_files_match = !metadata_paths.is_empty() && !feature_file_paths.is_empty();
1019    let ndjson_match = !ndjson_paths.is_empty() && !feature_files_match;
1020    let cityjson_match = !cityjson_paths.is_empty();
1021
1022    let mut matches = Vec::new();
1023    if ndjson_match {
1024        matches.push(DatasetLayoutKind::Ndjson);
1025    }
1026    if cityjson_match {
1027        matches.push(DatasetLayoutKind::CityJson);
1028    }
1029    if feature_files_match {
1030        matches.push(DatasetLayoutKind::FeatureFiles);
1031    }
1032
1033    if matches.is_empty() {
1034        return Err(import_error(format!(
1035            "dataset directory {} does not match ndjson, cityjson, or feature-files layouts",
1036            dataset_root.display()
1037        )));
1038    }
1039    if matches.len() > 1 {
1040        let matched_layouts = matches
1041            .into_iter()
1042            .map(DatasetLayoutKind::as_str)
1043            .collect::<Vec<_>>()
1044            .join(", ");
1045        return Err(import_error(format!(
1046            "dataset directory {} matches multiple layouts ({matched_layouts}); use explicit CLI flags instead",
1047            dataset_root.display(),
1048        )));
1049    }
1050
1051    let layout = matches[0];
1052    let storage_layout = match layout {
1053        DatasetLayoutKind::Ndjson => StorageLayout::Ndjson {
1054            paths: vec![dataset_root.clone()],
1055        },
1056        DatasetLayoutKind::CityJson => StorageLayout::CityJson {
1057            paths: vec![dataset_root.clone()],
1058        },
1059        DatasetLayoutKind::FeatureFiles => StorageLayout::FeatureFiles {
1060            root: dataset_root.clone(),
1061            metadata_glob: "**/metadata.json".to_owned(),
1062            feature_glob: "**/*.city.jsonl".to_owned(),
1063        },
1064    };
1065    let source_paths = match layout {
1066        DatasetLayoutKind::Ndjson => ndjson_paths,
1067        DatasetLayoutKind::CityJson => cityjson_paths,
1068        DatasetLayoutKind::FeatureFiles => metadata_paths,
1069    };
1070    let feature_file_paths = match layout {
1071        DatasetLayoutKind::FeatureFiles => feature_file_paths,
1072        _ => Vec::new(),
1073    };
1074
1075    Ok(ResolvedDataset {
1076        dataset_root: dataset_root.clone(),
1077        index_path: index_override.unwrap_or_else(|| dataset_root.join(".cityjson-index.sqlite")),
1078        layout,
1079        manifest: resolve_manifest_summary(&dataset_root)?,
1080        storage_layout,
1081        source_paths,
1082        feature_file_paths,
1083    })
1084}
1085
1086impl CityIndex {
1087    /// Opens an index for the given storage layout.
1088    ///
1089    /// # Errors
1090    ///
1091    /// Returns an error if the index backend cannot be created or the index
1092    /// store cannot be opened.
1093    pub fn open(layout: StorageLayout, index_path: &Path) -> Result<Self> {
1094        let backend: Box<dyn StorageBackend> = match layout {
1095            StorageLayout::Ndjson { paths } => Box::new(NdjsonBackend { paths }),
1096            StorageLayout::CityJson { paths } => Box::new(CityJsonBackend::new(paths)),
1097            StorageLayout::FeatureFiles {
1098                root,
1099                metadata_glob,
1100                feature_glob,
1101            } => Box::new(FeatureFilesBackend::new(
1102                root,
1103                metadata_glob.as_str(),
1104                feature_glob.as_str(),
1105            )),
1106        };
1107
1108        Ok(Self {
1109            index: Index::open(index_path)?,
1110            backend,
1111        })
1112    }
1113
1114    /// Rebuilds the index from the configured backend.
1115    ///
1116    /// # Errors
1117    ///
1118    /// Returns an error if backend scanning or index population fails.
1119    pub fn reindex(&mut self) -> Result<()> {
1120        let worker_count = configured_worker_count()?;
1121        let scans = self.backend.scan(worker_count)?;
1122        self.index.rebuild(&scans)
1123    }
1124
1125    /// Returns a `CityJSON` feature by id.
1126    ///
1127    /// # Errors
1128    ///
1129    /// Returns an error if lookup fails.
1130    pub fn get(&self, id: &str) -> Result<Option<CityModel>> {
1131        self.get_with_metadata(id)
1132            .map(|maybe| maybe.map(|(_, model)| model))
1133    }
1134
1135    /// Returns a `CityJSON` feature by id together with the source metadata
1136    /// used to reconstruct it.
1137    ///
1138    /// # Errors
1139    ///
1140    /// Returns an error if lookup fails.
1141    pub fn get_with_metadata(&self, id: &str) -> Result<Option<(Arc<Meta>, CityModel)>> {
1142        let Some(loc) = self.index.lookup_id(id)? else {
1143            return Ok(None);
1144        };
1145        let metadata = self.index.get_cached_metadata(loc.source_id)?;
1146        let model = self.backend.read_one(&loc, Arc::clone(&metadata.bytes))?;
1147        Ok(Some((metadata.value, model)))
1148    }
1149
1150    /// Returns a lightweight feature reference for a feature id.
1151    ///
1152    /// # Errors
1153    ///
1154    /// Returns an error if the lookup fails.
1155    pub fn lookup_feature_ref(&self, id: &str) -> Result<Option<IndexedFeatureRef>> {
1156        self.index.lookup_feature_ref(id)
1157    }
1158
1159    /// Returns all lightweight feature references for a feature id.
1160    ///
1161    /// Results are ordered by the internal feature row id.
1162    ///
1163    /// # Errors
1164    ///
1165    /// Returns an error if the lookup fails.
1166    pub fn lookup_feature_refs(&self, id: &str) -> Result<Vec<IndexedFeatureRef>> {
1167        self.index.lookup_feature_refs(id)
1168    }
1169
1170    /// Returns a lightweight feature reference for a feature row id.
1171    ///
1172    /// # Errors
1173    ///
1174    /// Returns an error if the lookup fails.
1175    pub fn lookup_feature_ref_by_rowid(&self, row_id: i64) -> Result<Option<IndexedFeatureRef>> {
1176        self.index.lookup_feature_ref_by_rowid(row_id)
1177    }
1178
1179    /// Returns cached metadata for a source id.
1180    ///
1181    /// # Errors
1182    ///
1183    /// Returns an error if the metadata lookup fails.
1184    pub fn metadata_for_source(&self, source_id: i64) -> Result<Arc<Meta>> {
1185        self.index
1186            .get_cached_metadata(source_id)
1187            .map(|metadata| metadata.value)
1188    }
1189
1190    /// Returns every feature intersecting the given bounding box.
1191    ///
1192    /// # Errors
1193    ///
1194    /// Returns an error if the query fails.
1195    pub fn query(&self, bbox: &BBox) -> Result<Vec<CityModel>> {
1196        self.query_iter(bbox)?
1197            .collect::<std::result::Result<Vec<_>, _>>()
1198    }
1199
1200    /// Returns every feature intersecting the given bounding box together with
1201    /// the source metadata used to reconstruct it.
1202    ///
1203    /// # Errors
1204    ///
1205    /// Returns an error if the query fails.
1206    pub fn query_with_metadata(&self, bbox: &BBox) -> Result<Vec<(Arc<Meta>, CityModel)>> {
1207        self.query_iter_with_metadata(bbox)?
1208            .collect::<std::result::Result<Vec<_>, _>>()
1209    }
1210
1211    /// Returns an iterator over features intersecting the given bounding box.
1212    ///
1213    /// # Errors
1214    ///
1215    /// Returns an error if the iterator cannot be constructed.
1216    pub fn query_iter(&self, bbox: &BBox) -> Result<impl Iterator<Item = Result<CityModel>> + '_> {
1217        let iter = self.query_iter_with_metadata(bbox)?;
1218        Ok(iter.map(|item| item.map(|(_, model)| model)))
1219    }
1220
1221    /// Returns an iterator over features intersecting the given bounding box
1222    /// together with their feature identifiers.
1223    ///
1224    /// # Errors
1225    ///
1226    /// Returns an error if the iterator cannot be constructed.
1227    pub fn query_iter_with_ids(
1228        &self,
1229        bbox: &BBox,
1230    ) -> Result<impl Iterator<Item = Result<(String, CityModel)>> + '_> {
1231        let locations = self.index.lookup_bbox_iter(*bbox);
1232        Ok(locations.map(move |loc| {
1233            let loc = loc?;
1234            let feature_id = loc.feature_id.clone();
1235            let metadata = self.index.get_cached_metadata(loc.source_id)?;
1236            let model = self.backend.read_one(&loc, Arc::clone(&metadata.bytes))?;
1237            Ok((feature_id, model))
1238        }))
1239    }
1240
1241    /// Returns an iterator over features intersecting the given bounding box
1242    /// together with the source metadata used to reconstruct them.
1243    ///
1244    /// # Errors
1245    ///
1246    /// Returns an error if the iterator cannot be constructed.
1247    pub fn query_iter_with_metadata(
1248        &self,
1249        bbox: &BBox,
1250    ) -> Result<impl Iterator<Item = Result<(Arc<Meta>, CityModel)>> + '_> {
1251        let locations = self.index.lookup_bbox_iter(*bbox);
1252        Ok(locations.map(move |loc| {
1253            let loc = loc?;
1254            let metadata = self.index.get_cached_metadata(loc.source_id)?;
1255            let model = self.backend.read_one(&loc, Arc::clone(&metadata.bytes))?;
1256            Ok((metadata.value, model))
1257        }))
1258    }
1259
1260    /// Returns every feature in the index.
1261    ///
1262    /// # Errors
1263    ///
1264    /// Returns an error if the iterator cannot be constructed.
1265    pub fn iter_all(&self) -> Result<impl Iterator<Item = Result<CityModel>> + '_> {
1266        let iter = self.iter_all_with_metadata()?;
1267        Ok(iter.map(|item| item.map(|(_, model)| model)))
1268    }
1269
1270    /// Returns every feature in the index together with its feature identifier.
1271    ///
1272    /// # Errors
1273    ///
1274    /// Returns an error if the iterator cannot be constructed.
1275    pub fn iter_all_with_ids(
1276        &self,
1277    ) -> Result<impl Iterator<Item = Result<(String, CityModel)>> + '_> {
1278        let iter = self.index.lookup_all_iter();
1279        Ok(iter.map(move |loc| {
1280            let loc = loc?;
1281            let feature_id = loc.location.feature_id.clone();
1282            let metadata = self.index.get_cached_metadata(loc.location.source_id)?;
1283            let model = self
1284                .backend
1285                .read_one(&loc.location, Arc::clone(&metadata.bytes))?;
1286            Ok((feature_id, model))
1287        }))
1288    }
1289
1290    /// Returns every feature in the index together with the source metadata used
1291    /// to reconstruct it.
1292    ///
1293    /// # Errors
1294    ///
1295    /// Returns an error if the iterator cannot be constructed.
1296    pub fn iter_all_with_metadata(
1297        &self,
1298    ) -> Result<impl Iterator<Item = Result<(Arc<Meta>, CityModel)>> + '_> {
1299        let iter = self.index.lookup_all_iter();
1300        Ok(iter.map(move |loc| {
1301            let loc = loc?;
1302            let metadata = self.index.get_cached_metadata(loc.location.source_id)?;
1303            let model = self
1304                .backend
1305                .read_one(&loc.location, Arc::clone(&metadata.bytes))?;
1306            Ok((metadata.value, model))
1307        }))
1308    }
1309
1310    /// Returns every indexed feature as a page of lightweight references.
1311    ///
1312    /// Each page is ordered by the internal feature row id and can be used for
1313    /// caller-managed parallel decoding.
1314    ///
1315    /// # Errors
1316    ///
1317    /// Returns an error if the iterator cannot be constructed or `page_size`
1318    /// is zero.
1319    pub fn iter_all_feature_ref_pages(
1320        &self,
1321        page_size: usize,
1322    ) -> Result<impl Iterator<Item = Result<Vec<IndexedFeatureRef>>> + '_> {
1323        self.index.lookup_all_ref_page_iter(page_size)
1324    }
1325
1326    /// Returns every indexed feature as a page of lightweight references.
1327    ///
1328    /// This is a semantic alias of [`CityIndex::iter_all_feature_ref_pages`]
1329    /// for callers that care primarily about bbox-oriented processing.
1330    ///
1331    /// # Errors
1332    ///
1333    /// Returns an error if the iterator cannot be constructed or `page_size`
1334    /// is zero.
1335    pub fn iter_all_bbox_pages(
1336        &self,
1337        page_size: usize,
1338    ) -> Result<impl Iterator<Item = Result<Vec<IndexedFeatureRef>>> + '_> {
1339        self.index.lookup_all_ref_page_iter(page_size)
1340    }
1341
1342    /// Returns every indexed feature together with its decoded payload in
1343    /// ascending feature row id order.
1344    ///
1345    /// # Errors
1346    ///
1347    /// Returns an error if the iterator cannot be constructed.
1348    pub fn scan_features(&self) -> Result<impl Iterator<Item = Result<IndexedFeature>> + '_> {
1349        let ref_pages = self
1350            .index
1351            .lookup_all_ref_page_iter(DEFAULT_SCAN_PAGE_SIZE)?;
1352        Ok(AllIndexedFeatureIter::new(AllIndexedFeaturePageIter {
1353            city_index: self,
1354            ref_pages,
1355        }))
1356    }
1357
1358    /// Returns every indexed feature as decoded pages in ascending feature row
1359    /// id order.
1360    ///
1361    /// Each page preserves the row order from
1362    /// [`CityIndex::iter_all_feature_ref_pages`] and reconstructs its payloads
1363    /// with the grouped batch path.
1364    ///
1365    /// # Errors
1366    ///
1367    /// Returns an error if the iterator cannot be constructed or `page_size`
1368    /// is zero.
1369    pub fn scan_feature_pages(
1370        &self,
1371        page_size: usize,
1372    ) -> Result<impl Iterator<Item = Result<Vec<IndexedFeature>>> + '_> {
1373        let ref_pages = self.index.lookup_all_ref_page_iter(page_size)?;
1374        Ok(AllIndexedFeaturePageIter {
1375            city_index: self,
1376            ref_pages,
1377        })
1378    }
1379
1380    /// Returns aggregate bounds and feature count for the whole index.
1381    ///
1382    /// Returns `Ok(None)` when the index contains no features.
1383    ///
1384    /// # Errors
1385    ///
1386    /// Returns an error if the aggregate query fails.
1387    pub fn feature_bounds_summary(&self) -> Result<Option<FeatureBoundsSummary>> {
1388        self.index.feature_bounds_summary()
1389    }
1390
1391    /// Reconstructs a single indexed feature from a lightweight reference.
1392    ///
1393    /// # Errors
1394    ///
1395    /// Returns an error if the feature cannot be reconstructed.
1396    pub fn read_feature(&self, feature: &IndexedFeatureRef) -> Result<CityModel> {
1397        let metadata = self.index.get_cached_metadata(feature.source_id)?;
1398        self.backend
1399            .read_one(&feature.to_location(), Arc::clone(&metadata.bytes))
1400    }
1401
1402    /// Reconstructs multiple indexed features from lightweight references.
1403    ///
1404    /// # Errors
1405    ///
1406    /// Returns an error if any feature cannot be reconstructed.
1407    pub fn read_features(&self, features: &[IndexedFeatureRef]) -> Result<Vec<CityModel>> {
1408        self.read_feature_models(features)
1409    }
1410
1411    /// Reconstructs and filters multiple indexed features while preserving the
1412    /// input order.
1413    ///
1414    /// # Errors
1415    ///
1416    /// Returns an error if any feature cannot be reconstructed or filtered.
1417    pub fn read_filtered_features(
1418        &self,
1419        features: &[IndexedFeatureRef],
1420        filter: &FeatureFilter,
1421    ) -> Result<Vec<FilteredFeature>> {
1422        self.read_feature_models(features)?
1423            .iter()
1424            .map(|model| filter.apply(model))
1425            .collect()
1426    }
1427
1428    /// Reconstructs multiple indexed features with their references.
1429    ///
1430    /// The returned features preserve the input order.
1431    ///
1432    /// # Errors
1433    ///
1434    /// Returns an error if any feature cannot be reconstructed.
1435    pub fn read_indexed_features(
1436        &self,
1437        features: &[IndexedFeatureRef],
1438    ) -> Result<Vec<IndexedFeature>> {
1439        let models = self.read_feature_models(features)?;
1440        Ok(features
1441            .iter()
1442            .cloned()
1443            .zip(models)
1444            .map(|(reference, model)| IndexedFeature { reference, model })
1445            .collect())
1446    }
1447
1448    /// Reconstructs a feature by row id.
1449    ///
1450    /// # Errors
1451    ///
1452    /// Returns an error if lookup or reconstruction fails.
1453    pub fn read_feature_by_rowid(&self, row_id: i64) -> Result<Option<IndexedFeature>> {
1454        let Some(reference) = self.lookup_feature_ref_by_rowid(row_id)? else {
1455            return Ok(None);
1456        };
1457        let model = self.read_feature(&reference)?;
1458        Ok(Some(IndexedFeature { reference, model }))
1459    }
1460
1461    /// Reconstructs features by row id while preserving the input order.
1462    ///
1463    /// Missing row ids are returned as `None`.
1464    ///
1465    /// # Errors
1466    ///
1467    /// Returns an error if lookup or reconstruction fails.
1468    pub fn read_features_by_rowids(&self, row_ids: &[i64]) -> Result<Vec<Option<IndexedFeature>>> {
1469        let references = self.index.lookup_feature_refs_by_rowids(row_ids)?;
1470        let present_references = references.iter().flatten().cloned().collect::<Vec<_>>();
1471        let mut present_features = self.read_indexed_features(&present_references)?.into_iter();
1472        let mut features = Vec::with_capacity(row_ids.len());
1473        for reference in references {
1474            if reference.is_some() {
1475                let feature = present_features.next().ok_or_else(|| {
1476                    import_error("feature reconstruction returned fewer models than references")
1477                })?;
1478                features.push(Some(feature));
1479            } else {
1480                features.push(None);
1481            }
1482        }
1483        Ok(features)
1484    }
1485
1486    /// Reconstructs a page of features after a row id.
1487    ///
1488    /// Passing `None` starts from the first row. Results are ordered by row id.
1489    ///
1490    /// # Errors
1491    ///
1492    /// Returns an error if lookup or reconstruction fails, or `limit` is zero.
1493    pub fn read_feature_range_after_rowid(
1494        &self,
1495        after_row_id: Option<i64>,
1496        limit: usize,
1497    ) -> Result<Vec<IndexedFeature>> {
1498        if limit == 0 {
1499            return Err(import_error("limit must be greater than zero"));
1500        }
1501        let refs = self
1502            .index
1503            .lookup_all_ref_page(after_row_id, limit)?
1504            .into_iter()
1505            .map(|record| record.feature)
1506            .collect::<Vec<_>>();
1507        self.read_indexed_features(&refs)
1508    }
1509
1510    /// Returns the total number of indexed feature references.
1511    ///
1512    /// # Errors
1513    ///
1514    /// Returns an error if the count cannot be read from the index.
1515    pub fn feature_ref_count(&self) -> Result<usize> {
1516        self.index.feature_count()
1517    }
1518
1519    /// Returns the total number of indexed sources.
1520    ///
1521    /// # Errors
1522    ///
1523    /// Returns an error if the count cannot be read from the index.
1524    pub fn source_count(&self) -> Result<usize> {
1525        self.index.source_count()
1526    }
1527
1528    /// Returns the total number of indexed `CityObjects`.
1529    ///
1530    /// # Errors
1531    ///
1532    /// Returns an error if the count cannot be read from the index.
1533    pub fn cityobject_count(&self) -> Result<usize> {
1534        self.index.cityobject_count()
1535    }
1536
1537    /// Returns a contiguous page of indexed feature references.
1538    ///
1539    /// The page is ordered by the underlying feature row identifier.
1540    ///
1541    /// # Errors
1542    ///
1543    /// Returns an error if the page cannot be read from the index.
1544    pub fn feature_ref_page(&self, offset: usize, limit: usize) -> Result<Vec<IndexedFeatureRef>> {
1545        self.index.lookup_all_ref_page_window(offset, limit)
1546    }
1547
1548    /// Returns the raw indexed feature bytes for the given feature identifier.
1549    ///
1550    /// # Errors
1551    ///
1552    /// Returns an error if the index lookup or byte-range read fails.
1553    pub fn get_bytes(&self, id: &str) -> Result<Option<Vec<u8>>> {
1554        let Some(loc) = self.index.lookup_id(id)? else {
1555            return Ok(None);
1556        };
1557        read_exact_range(&loc.source_path, loc.offset, loc.length).map(Some)
1558    }
1559
1560    /// Returns the raw bytes for a feature reference.
1561    ///
1562    /// # Errors
1563    ///
1564    /// Returns an error if the feature bytes cannot be read from disk.
1565    pub fn read_feature_bytes(&self, feature: &IndexedFeatureRef) -> Result<Vec<u8>> {
1566        read_exact_range(&feature.source_path, feature.offset, feature.length)
1567    }
1568
1569    /// Returns cached metadata entries.
1570    ///
1571    /// # Errors
1572    ///
1573    /// Returns an error if metadata lookup fails.
1574    pub fn metadata(&self) -> Result<Vec<Arc<Meta>>> {
1575        self.index.metadata()
1576    }
1577
1578    fn read_feature_models(&self, features: &[IndexedFeatureRef]) -> Result<Vec<CityModel>> {
1579        let mut features_by_source: BTreeMap<i64, Vec<(usize, FeatureLocation)>> = BTreeMap::new();
1580        for (index, feature) in features.iter().enumerate() {
1581            features_by_source
1582                .entry(feature.source_id)
1583                .or_default()
1584                .push((index, feature.to_location()));
1585        }
1586
1587        let mut models = std::iter::repeat_with(|| None)
1588            .take(features.len())
1589            .collect::<Vec<Option<CityModel>>>();
1590        for (source_id, indexed_locations) in features_by_source {
1591            let metadata = self.index.get_cached_metadata(source_id)?;
1592            let locations = indexed_locations
1593                .iter()
1594                .map(|(_, location)| location)
1595                .collect::<Vec<_>>();
1596            let source_models = self
1597                .backend
1598                .read_many(&locations, Arc::clone(&metadata.bytes))?;
1599            for ((index, _), model) in indexed_locations.into_iter().zip(source_models) {
1600                models[index] = Some(model);
1601            }
1602        }
1603
1604        Ok(models
1605            .into_iter()
1606            .map(|model| model.expect("every input feature should have a decoded model"))
1607            .collect())
1608    }
1609}
1610
1611type Meta = serde_json::Value;
1612
1613#[derive(Clone)]
1614struct CachedMetadata {
1615    value: Arc<Meta>,
1616    bytes: Arc<[u8]>,
1617}
1618
1619struct Index {
1620    conn: rusqlite::Connection,
1621    metadata_cache: Mutex<HashMap<i64, CachedMetadata>>,
1622}
1623
1624struct FeatureLocation {
1625    feature_id: String,
1626    source_id: i64,
1627    source_path: PathBuf,
1628    offset: u64,
1629    length: u64,
1630    vertices_offset: Option<u64>,
1631    vertices_length: Option<u64>,
1632    member_ranges_json: Option<String>,
1633}
1634
1635struct IndexedFeatureLocation {
1636    row_id: i64,
1637    location: FeatureLocation,
1638}
1639
1640struct IndexedFeatureRefLocation {
1641    row_id: i64,
1642    feature: IndexedFeatureRef,
1643}
1644
1645struct FeatureIndexEntry {
1646    id: String,
1647    source_id: i64,
1648    path: PathBuf,
1649    file_size: u64,
1650    file_mtime_ns: i64,
1651    offset: u64,
1652    length: u64,
1653    bounds: FeatureBounds,
1654    cityobject_count: u64,
1655    member_ranges_json: Option<String>,
1656}
1657
1658struct IndexedSourceRecord {
1659    path: PathBuf,
1660    source_size: Option<u64>,
1661    source_mtime_ns: Option<i64>,
1662}
1663
1664struct IndexedFeaturePathRecord {
1665    path: PathBuf,
1666    file_size: Option<u64>,
1667    file_mtime_ns: Option<i64>,
1668}
1669
1670struct BBoxLocationIter<'a> {
1671    index: &'a Index,
1672    bbox: BBox,
1673    last_row_id: Option<i64>,
1674    page: std::vec::IntoIter<IndexedFeatureLocation>,
1675    finished: bool,
1676}
1677
1678struct AllLocationIter<'a> {
1679    index: &'a Index,
1680    last_row_id: Option<i64>,
1681    page: std::vec::IntoIter<IndexedFeatureLocation>,
1682    finished: bool,
1683}
1684
1685struct AllFeatureRefPageIter<'a> {
1686    index: &'a Index,
1687    page_size: usize,
1688    last_row_id: Option<i64>,
1689    finished: bool,
1690}
1691
1692struct AllIndexedFeaturePageIter<'a> {
1693    city_index: &'a CityIndex,
1694    ref_pages: AllFeatureRefPageIter<'a>,
1695}
1696
1697struct AllIndexedFeatureIter<'a> {
1698    pages: AllIndexedFeaturePageIter<'a>,
1699    page: std::vec::IntoIter<IndexedFeature>,
1700    finished: bool,
1701}
1702
1703impl<'a> BBoxLocationIter<'a> {
1704    const PAGE_SIZE: usize = 512;
1705
1706    fn new(index: &'a Index, bbox: BBox) -> Self {
1707        Self {
1708            index,
1709            bbox,
1710            last_row_id: None,
1711            page: Vec::new().into_iter(),
1712            finished: false,
1713        }
1714    }
1715
1716    fn next_location(&mut self) -> Result<Option<FeatureLocation>> {
1717        if self.finished {
1718            return Ok(None);
1719        }
1720
1721        if let Some(feature) = self.page.next() {
1722            self.last_row_id = Some(feature.row_id);
1723            return Ok(Some(feature.location));
1724        }
1725
1726        let page = self
1727            .index
1728            .lookup_bbox_page(&self.bbox, self.last_row_id, Self::PAGE_SIZE)?;
1729        if page.is_empty() {
1730            self.finished = true;
1731            return Ok(None);
1732        }
1733
1734        self.page = page.into_iter();
1735        let feature = self
1736            .page
1737            .next()
1738            .expect("non-empty page should yield at least one feature");
1739        self.last_row_id = Some(feature.row_id);
1740        Ok(Some(feature.location))
1741    }
1742}
1743
1744impl<'a> AllLocationIter<'a> {
1745    const PAGE_SIZE: usize = 512;
1746
1747    fn new(index: &'a Index) -> Self {
1748        Self {
1749            index,
1750            last_row_id: None,
1751            page: Vec::new().into_iter(),
1752            finished: false,
1753        }
1754    }
1755
1756    fn next_location(&mut self) -> Result<Option<IndexedFeatureLocation>> {
1757        if self.finished {
1758            return Ok(None);
1759        }
1760
1761        if let Some(feature) = self.page.next() {
1762            self.last_row_id = Some(feature.row_id);
1763            return Ok(Some(feature));
1764        }
1765
1766        let page = self
1767            .index
1768            .lookup_all_page(self.last_row_id, Self::PAGE_SIZE)?;
1769        if page.is_empty() {
1770            self.finished = true;
1771            return Ok(None);
1772        }
1773
1774        self.page = page.into_iter();
1775        let feature = self
1776            .page
1777            .next()
1778            .expect("non-empty page should yield at least one feature");
1779        self.last_row_id = Some(feature.row_id);
1780        Ok(Some(feature))
1781    }
1782}
1783
1784impl<'a> AllFeatureRefPageIter<'a> {
1785    fn new(index: &'a Index, page_size: usize) -> Result<Self> {
1786        if page_size == 0 {
1787            return Err(import_error("page_size must be greater than zero"));
1788        }
1789        Ok(Self {
1790            index,
1791            page_size,
1792            last_row_id: None,
1793            finished: false,
1794        })
1795    }
1796
1797    fn next_page(&mut self) -> Result<Option<Vec<IndexedFeatureRef>>> {
1798        if self.finished {
1799            return Ok(None);
1800        }
1801
1802        let page = self
1803            .index
1804            .lookup_all_ref_page(self.last_row_id, self.page_size)?;
1805        if page.is_empty() {
1806            self.finished = true;
1807            return Ok(None);
1808        }
1809
1810        self.last_row_id = Some(
1811            page.last()
1812                .expect("non-empty page should yield at least one feature")
1813                .row_id,
1814        );
1815        Ok(Some(
1816            page.into_iter().map(|record| record.feature).collect(),
1817        ))
1818    }
1819}
1820
1821impl Iterator for BBoxLocationIter<'_> {
1822    type Item = Result<FeatureLocation>;
1823
1824    fn next(&mut self) -> Option<Self::Item> {
1825        match self.next_location() {
1826            Ok(Some(feature)) => Some(Ok(feature)),
1827            Ok(None) => None,
1828            Err(error) => {
1829                self.finished = true;
1830                Some(Err(error))
1831            }
1832        }
1833    }
1834}
1835
1836impl Iterator for AllLocationIter<'_> {
1837    type Item = Result<IndexedFeatureLocation>;
1838
1839    fn next(&mut self) -> Option<Self::Item> {
1840        match self.next_location() {
1841            Ok(Some(feature)) => Some(Ok(feature)),
1842            Ok(None) => None,
1843            Err(error) => {
1844                self.finished = true;
1845                Some(Err(error))
1846            }
1847        }
1848    }
1849}
1850
1851impl Iterator for AllFeatureRefPageIter<'_> {
1852    type Item = Result<Vec<IndexedFeatureRef>>;
1853
1854    fn next(&mut self) -> Option<Self::Item> {
1855        match self.next_page() {
1856            Ok(Some(page)) => Some(Ok(page)),
1857            Ok(None) => None,
1858            Err(error) => {
1859                self.finished = true;
1860                Some(Err(error))
1861            }
1862        }
1863    }
1864}
1865
1866impl Iterator for AllIndexedFeaturePageIter<'_> {
1867    type Item = Result<Vec<IndexedFeature>>;
1868
1869    fn next(&mut self) -> Option<Self::Item> {
1870        self.ref_pages
1871            .next()
1872            .map(|page| page.and_then(|refs| self.city_index.read_indexed_features(&refs)))
1873    }
1874}
1875
1876impl<'a> AllIndexedFeatureIter<'a> {
1877    fn new(pages: AllIndexedFeaturePageIter<'a>) -> Self {
1878        Self {
1879            pages,
1880            page: Vec::new().into_iter(),
1881            finished: false,
1882        }
1883    }
1884}
1885
1886impl Iterator for AllIndexedFeatureIter<'_> {
1887    type Item = Result<IndexedFeature>;
1888
1889    fn next(&mut self) -> Option<Self::Item> {
1890        if self.finished {
1891            return None;
1892        }
1893
1894        loop {
1895            if let Some(feature) = self.page.next() {
1896                return Some(Ok(feature));
1897            }
1898            match self.pages.next() {
1899                Some(Ok(page)) => {
1900                    self.page = page.into_iter();
1901                }
1902                Some(Err(error)) => {
1903                    self.finished = true;
1904                    return Some(Err(error));
1905                }
1906                None => {
1907                    self.finished = true;
1908                    return None;
1909                }
1910            }
1911        }
1912    }
1913}
1914
1915impl Index {
1916    fn open(path: &Path) -> Result<Self> {
1917        if let Some(parent) = path
1918            .parent()
1919            .filter(|parent| !parent.as_os_str().is_empty())
1920        {
1921            fs::create_dir_all(parent)?;
1922        }
1923
1924        let conn = sqlite_result(rusqlite::Connection::open(path))?;
1925        sqlite_result(conn.execute_batch(
1926            r"
1927            PRAGMA foreign_keys = ON;
1928
1929            CREATE TABLE IF NOT EXISTS sources (
1930                id INTEGER PRIMARY KEY AUTOINCREMENT,
1931                path TEXT NOT NULL UNIQUE,
1932                metadata TEXT NOT NULL,
1933                vertices_offset INTEGER,
1934                vertices_length INTEGER,
1935                source_size INTEGER,
1936                source_mtime_ns INTEGER
1937            );
1938
1939            CREATE TABLE IF NOT EXISTS features (
1940                id INTEGER PRIMARY KEY AUTOINCREMENT,
1941                feature_id TEXT NOT NULL,
1942                source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
1943                path TEXT NOT NULL,
1944                file_size INTEGER,
1945                file_mtime_ns INTEGER,
1946                offset INTEGER NOT NULL,
1947                length INTEGER NOT NULL,
1948                min_z REAL,
1949                max_z REAL,
1950                cityobject_count INTEGER,
1951                member_ranges TEXT
1952            );
1953
1954            CREATE VIRTUAL TABLE IF NOT EXISTS feature_bbox
1955            USING rtree(
1956                feature_rowid,
1957                min_x,
1958                max_x,
1959                min_y,
1960                max_y
1961            );
1962
1963            CREATE TABLE IF NOT EXISTS bbox_map (
1964                feature_rowid INTEGER PRIMARY KEY,
1965                feature_id TEXT NOT NULL
1966            );
1967            ",
1968        ))?;
1969        Self::ensure_duplicate_feature_ids_allowed(&conn)?;
1970        Self::ensure_member_ranges_column(&conn)?;
1971        Self::ensure_source_status_columns(&conn)?;
1972        Self::ensure_feature_status_columns(&conn)?;
1973        Self::ensure_feature_bounds_columns(&conn)?;
1974
1975        Ok(Self {
1976            conn,
1977            metadata_cache: Mutex::new(HashMap::new()),
1978        })
1979    }
1980
1981    fn rebuild(&mut self, scans: &[SourceScan]) -> Result<()> {
1982        let tx = sqlite_result(self.conn.transaction())?;
1983        Self::clear_tables(&tx)?;
1984
1985        let mut feature_entries = Vec::new();
1986        for scan in scans {
1987            let source_id = Self::insert_source_in_tx(
1988                &tx,
1989                scan.path.as_path(),
1990                &scan.metadata,
1991                scan.vertices_offset,
1992                scan.vertices_length,
1993                scan.source_size,
1994                scan.source_mtime_ns,
1995            )?;
1996            for feature in &scan.features {
1997                feature_entries.push(FeatureIndexEntry {
1998                    id: feature.id.clone(),
1999                    source_id,
2000                    path: feature.path.clone(),
2001                    file_size: feature.file_size,
2002                    file_mtime_ns: feature.file_mtime_ns,
2003                    offset: feature.offset,
2004                    length: feature.length,
2005                    bounds: feature.bounds,
2006                    cityobject_count: feature.cityobject_count,
2007                    member_ranges_json: feature
2008                        .member_ranges
2009                        .as_ref()
2010                        .map(json_string)
2011                        .transpose()?,
2012                });
2013            }
2014        }
2015        Self::insert_features_in_tx(&tx, &feature_entries)?;
2016        sqlite_result(tx.commit())?;
2017
2018        self.metadata_cache
2019            .lock()
2020            .unwrap_or_else(std::sync::PoisonError::into_inner)
2021            .clear();
2022        Ok(())
2023    }
2024
2025    fn lookup_id(&self, id: &str) -> Result<Option<FeatureLocation>> {
2026        sqlite_result(
2027            self.conn
2028                .query_row(
2029                    r"
2030                SELECT
2031                    f.feature_id,
2032                    s.id,
2033                    f.path,
2034                    f.offset,
2035                    f.length,
2036                    s.vertices_offset,
2037                    s.vertices_length,
2038                    f.member_ranges
2039                FROM features AS f
2040                JOIN sources AS s ON s.id = f.source_id
2041                WHERE f.feature_id = ?1
2042                ORDER BY f.id
2043                LIMIT 1
2044                ",
2045                    params![id],
2046                    Self::feature_location_from_row,
2047                )
2048                .optional(),
2049        )
2050    }
2051
2052    fn lookup_feature_ref(&self, id: &str) -> Result<Option<IndexedFeatureRef>> {
2053        sqlite_result(
2054            self.conn
2055                .query_row(
2056                    r"
2057                SELECT
2058                    f.id,
2059                    f.feature_id,
2060                    s.id,
2061                    f.path,
2062                    f.offset,
2063                    f.length,
2064                    s.vertices_offset,
2065                    s.vertices_length,
2066                    f.member_ranges,
2067                    fb.min_x,
2068                    fb.max_x,
2069                    fb.min_y,
2070                    fb.max_y,
2071                    f.min_z,
2072                    f.max_z
2073                FROM features AS f
2074                JOIN sources AS s ON s.id = f.source_id
2075                JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2076                WHERE f.feature_id = ?1
2077                ORDER BY f.id
2078                LIMIT 1
2079                ",
2080                    params![id],
2081                    Self::indexed_feature_ref_location_from_row,
2082                )
2083                .optional()
2084                .map(|maybe| maybe.map(|record| record.feature)),
2085        )
2086    }
2087
2088    fn lookup_feature_refs(&self, id: &str) -> Result<Vec<IndexedFeatureRef>> {
2089        let mut stmt = sqlite_result(self.conn.prepare(
2090            r"
2091            SELECT
2092                f.id,
2093                f.feature_id,
2094                s.id,
2095                f.path,
2096                f.offset,
2097                f.length,
2098                s.vertices_offset,
2099                s.vertices_length,
2100                f.member_ranges,
2101                fb.min_x,
2102                fb.max_x,
2103                fb.min_y,
2104                fb.max_y,
2105                f.min_z,
2106                f.max_z
2107            FROM features AS f
2108            JOIN sources AS s ON s.id = f.source_id
2109            JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2110            WHERE f.feature_id = ?1
2111            ORDER BY f.id
2112            ",
2113        ))?;
2114        let rows = sqlite_result(
2115            stmt.query_map(params![id], Self::indexed_feature_ref_location_from_row),
2116        )?;
2117        sqlite_result(rows.map(|row| row.map(|record| record.feature)).collect())
2118    }
2119
2120    fn lookup_feature_ref_by_rowid(&self, row_id: i64) -> Result<Option<IndexedFeatureRef>> {
2121        sqlite_result(
2122            self.conn
2123                .query_row(
2124                    r"
2125                SELECT
2126                    f.id,
2127                    f.feature_id,
2128                    s.id,
2129                    f.path,
2130                    f.offset,
2131                    f.length,
2132                    s.vertices_offset,
2133                    s.vertices_length,
2134                    f.member_ranges,
2135                    fb.min_x,
2136                    fb.max_x,
2137                    fb.min_y,
2138                    fb.max_y,
2139                    f.min_z,
2140                    f.max_z
2141                FROM features AS f
2142                JOIN sources AS s ON s.id = f.source_id
2143                JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2144                WHERE f.id = ?1
2145                ",
2146                    params![row_id],
2147                    Self::indexed_feature_ref_location_from_row,
2148                )
2149                .optional()
2150                .map(|maybe| maybe.map(|record| record.feature)),
2151        )
2152    }
2153
2154    fn lookup_feature_refs_by_rowids(
2155        &self,
2156        row_ids: &[i64],
2157    ) -> Result<Vec<Option<IndexedFeatureRef>>> {
2158        row_ids
2159            .iter()
2160            .map(|row_id| self.lookup_feature_ref_by_rowid(*row_id))
2161            .collect()
2162    }
2163
2164    fn lookup_bbox_iter(&self, bbox: BBox) -> BBoxLocationIter<'_> {
2165        BBoxLocationIter::new(self, bbox)
2166    }
2167
2168    fn lookup_all_iter(&self) -> AllLocationIter<'_> {
2169        AllLocationIter::new(self)
2170    }
2171
2172    fn lookup_all_ref_page_iter(&self, page_size: usize) -> Result<AllFeatureRefPageIter<'_>> {
2173        AllFeatureRefPageIter::new(self, page_size)
2174    }
2175
2176    fn lookup_all_ref_page_window(
2177        &self,
2178        offset: usize,
2179        limit: usize,
2180    ) -> Result<Vec<IndexedFeatureRef>> {
2181        let mut stmt = sqlite_result(self.conn.prepare(
2182            r"
2183            SELECT
2184                f.id,
2185                f.feature_id,
2186                s.id,
2187                f.path,
2188                f.offset,
2189                f.length,
2190                s.vertices_offset,
2191                s.vertices_length,
2192                f.member_ranges,
2193                fb.min_x,
2194                fb.max_x,
2195                fb.min_y,
2196                fb.max_y,
2197                f.min_z,
2198                f.max_z
2199            FROM features AS f
2200            JOIN sources AS s ON s.id = f.source_id
2201            JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2202            ORDER BY f.id
2203            LIMIT ?2 OFFSET ?1
2204            ",
2205        ))?;
2206        let rows = sqlite_result(stmt.query_map(
2207            params![offset, limit],
2208            Self::indexed_feature_ref_location_from_row,
2209        ))?;
2210        sqlite_result(rows.map(|row| row.map(|record| record.feature)).collect())
2211    }
2212
2213    fn lookup_bbox_page(
2214        &self,
2215        bbox: &BBox,
2216        after_row_id: Option<i64>,
2217        limit: usize,
2218    ) -> Result<Vec<IndexedFeatureLocation>> {
2219        let mut stmt = sqlite_result(self.conn.prepare(
2220            r"
2221            SELECT
2222                f.id,
2223                f.feature_id,
2224                s.id,
2225                f.path,
2226                f.offset,
2227                f.length,
2228                s.vertices_offset,
2229                s.vertices_length,
2230                f.member_ranges
2231            FROM feature_bbox AS fb
2232            JOIN bbox_map AS bm ON bm.feature_rowid = fb.feature_rowid
2233            JOIN features AS f ON f.id = bm.feature_rowid
2234            JOIN sources AS s ON s.id = f.source_id
2235            WHERE fb.min_x <= ?2
2236              AND fb.max_x >= ?1
2237              AND fb.min_y <= ?4
2238              AND fb.max_y >= ?3
2239              AND (?5 IS NULL OR f.id > ?5)
2240            ORDER BY f.id
2241            LIMIT ?6
2242            ",
2243        ))?;
2244        let rows = sqlite_result(stmt.query_map(
2245            params![
2246                bbox.min_x,
2247                bbox.max_x,
2248                bbox.min_y,
2249                bbox.max_y,
2250                after_row_id,
2251                limit
2252            ],
2253            Self::indexed_feature_location_from_row,
2254        ))?;
2255        sqlite_result(rows.collect())
2256    }
2257
2258    fn lookup_all_page(
2259        &self,
2260        after_row_id: Option<i64>,
2261        limit: usize,
2262    ) -> Result<Vec<IndexedFeatureLocation>> {
2263        let sql = match after_row_id {
2264            Some(_) => {
2265                r"
2266                SELECT
2267                    f.id,
2268                    f.feature_id,
2269                    s.id,
2270                    f.path,
2271                    f.offset,
2272                    f.length,
2273                    s.vertices_offset,
2274                    s.vertices_length,
2275                    f.member_ranges
2276                FROM features AS f
2277                JOIN sources AS s ON s.id = f.source_id
2278                WHERE f.id > ?1
2279                ORDER BY f.id
2280                LIMIT ?2
2281                "
2282            }
2283            None => {
2284                r"
2285                SELECT
2286                    f.id,
2287                    f.feature_id,
2288                    s.id,
2289                    f.path,
2290                    f.offset,
2291                    f.length,
2292                    s.vertices_offset,
2293                    s.vertices_length,
2294                    f.member_ranges
2295                FROM features AS f
2296                JOIN sources AS s ON s.id = f.source_id
2297                ORDER BY f.id
2298                LIMIT ?1
2299                "
2300            }
2301        };
2302        let mut stmt = sqlite_result(self.conn.prepare(sql))?;
2303        let rows = if let Some(after_row_id) = after_row_id {
2304            sqlite_result(stmt.query_map(
2305                params![after_row_id, limit],
2306                Self::indexed_feature_location_from_row,
2307            ))?
2308        } else {
2309            sqlite_result(stmt.query_map(params![limit], Self::indexed_feature_location_from_row))?
2310        };
2311        sqlite_result(rows.collect())
2312    }
2313
2314    fn lookup_all_ref_page(
2315        &self,
2316        after_row_id: Option<i64>,
2317        limit: usize,
2318    ) -> Result<Vec<IndexedFeatureRefLocation>> {
2319        let sql = match after_row_id {
2320            Some(_) => {
2321                r"
2322                SELECT
2323                    f.id,
2324                    f.feature_id,
2325                    s.id,
2326                    f.path,
2327                    f.offset,
2328                    f.length,
2329                    s.vertices_offset,
2330                    s.vertices_length,
2331                    f.member_ranges,
2332                    fb.min_x,
2333                    fb.max_x,
2334                    fb.min_y,
2335                    fb.max_y,
2336                    f.min_z,
2337                    f.max_z
2338                FROM features AS f
2339                JOIN sources AS s ON s.id = f.source_id
2340                JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2341                WHERE f.id > ?1
2342                ORDER BY f.id
2343                LIMIT ?2
2344                "
2345            }
2346            None => {
2347                r"
2348                SELECT
2349                    f.id,
2350                    f.feature_id,
2351                    s.id,
2352                    f.path,
2353                    f.offset,
2354                    f.length,
2355                    s.vertices_offset,
2356                    s.vertices_length,
2357                    f.member_ranges,
2358                    fb.min_x,
2359                    fb.max_x,
2360                    fb.min_y,
2361                    fb.max_y,
2362                    f.min_z,
2363                    f.max_z
2364                FROM features AS f
2365                JOIN sources AS s ON s.id = f.source_id
2366                JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2367                ORDER BY f.id
2368                LIMIT ?1
2369                "
2370            }
2371        };
2372        let mut stmt = sqlite_result(self.conn.prepare(sql))?;
2373        let rows = if let Some(after_row_id) = after_row_id {
2374            sqlite_result(stmt.query_map(
2375                params![after_row_id, limit],
2376                Self::indexed_feature_ref_location_from_row,
2377            ))?
2378        } else {
2379            sqlite_result(
2380                stmt.query_map(params![limit], Self::indexed_feature_ref_location_from_row),
2381            )?
2382        };
2383        sqlite_result(rows.collect())
2384    }
2385
2386    fn get_cached_metadata(&self, source_id: i64) -> Result<CachedMetadata> {
2387        if let Some(metadata) = self
2388            .metadata_cache
2389            .lock()
2390            .unwrap_or_else(std::sync::PoisonError::into_inner)
2391            .get(&source_id)
2392            .cloned()
2393        {
2394            return Ok(metadata);
2395        }
2396
2397        let metadata_json: String = sqlite_result(self.conn.query_row(
2398            "SELECT metadata FROM sources WHERE id = ?1",
2399            params![source_id],
2400            |row| row.get(0),
2401        ))?;
2402        let metadata: Meta = parse_json_str(&metadata_json)?;
2403        let metadata = CachedMetadata {
2404            value: Arc::new(metadata),
2405            bytes: Arc::from(metadata_json.into_bytes()),
2406        };
2407
2408        self.metadata_cache
2409            .lock()
2410            .unwrap_or_else(std::sync::PoisonError::into_inner)
2411            .insert(source_id, metadata.clone());
2412
2413        Ok(metadata)
2414    }
2415
2416    fn get_metadata(&self, source_id: i64) -> Result<Arc<Meta>> {
2417        self.get_cached_metadata(source_id)
2418            .map(|metadata| metadata.value)
2419    }
2420
2421    fn metadata(&self) -> Result<Vec<Arc<Meta>>> {
2422        let mut stmt = sqlite_result(self.conn.prepare("SELECT id FROM sources ORDER BY id"))?;
2423        let rows = sqlite_result(stmt.query_map([], |row| row.get::<_, i64>(0)))?;
2424        let source_ids = sqlite_result(rows.collect::<rusqlite::Result<Vec<_>>>())?;
2425        source_ids
2426            .into_iter()
2427            .map(|source_id| self.get_metadata(source_id))
2428            .collect()
2429    }
2430
2431    fn source_count(&self) -> Result<usize> {
2432        self.query_count("SELECT COUNT(*) FROM sources")
2433    }
2434
2435    fn feature_count(&self) -> Result<usize> {
2436        self.query_count("SELECT COUNT(*) FROM features")
2437    }
2438
2439    fn cityobject_count(&self) -> Result<usize> {
2440        let total = sqlite_result(self.conn.query_row(
2441            "SELECT COALESCE(SUM(cityobject_count), 0) FROM features",
2442            [],
2443            |row| row.get::<_, i64>(0),
2444        ))?;
2445        usize::try_from(total)
2446            .map_err(|_| import_error("indexed CityObject count does not fit in usize"))
2447    }
2448
2449    fn query_count(&self, sql: &str) -> Result<usize> {
2450        let count = sqlite_result(self.conn.query_row(sql, [], |row| row.get::<_, i64>(0)))?;
2451        usize::try_from(count).map_err(|_| import_error("count does not fit in usize"))
2452    }
2453
2454    fn feature_bounds_summary(&self) -> Result<Option<FeatureBoundsSummary>> {
2455        let summary = sqlite_result(self.conn.query_row(
2456            r"
2457            SELECT
2458                COUNT(*),
2459                MIN(fb.min_x),
2460                MAX(fb.max_x),
2461                MIN(fb.min_y),
2462                MAX(fb.max_y),
2463                MIN(f.min_z),
2464                MAX(f.max_z)
2465            FROM features AS f
2466            JOIN feature_bbox AS fb ON fb.feature_rowid = f.id
2467            ",
2468            [],
2469            |row| {
2470                let count = row.get::<_, i64>(0)?;
2471                if count == 0 {
2472                    return Ok(None);
2473                }
2474                let feature_count = usize::try_from(count).map_err(|error| {
2475                    rusqlite::Error::FromSqlConversionFailure(
2476                        0,
2477                        rusqlite::types::Type::Integer,
2478                        Box::new(error),
2479                    )
2480                })?;
2481                Ok(Some(FeatureBoundsSummary {
2482                    bounds: FeatureBounds {
2483                        min_x: row.get::<_, f64>(1)?,
2484                        max_x: row.get::<_, f64>(2)?,
2485                        min_y: row.get::<_, f64>(3)?,
2486                        max_y: row.get::<_, f64>(4)?,
2487                        min_z: row.get::<_, f64>(5)?,
2488                        max_z: row.get::<_, f64>(6)?,
2489                    },
2490                    feature_count,
2491                }))
2492            },
2493        ))?;
2494        Ok(summary)
2495    }
2496
2497    fn indexed_sources(&self) -> Result<Vec<IndexedSourceRecord>> {
2498        let mut stmt = sqlite_result(self.conn.prepare(
2499            r"
2500            SELECT path, source_size, source_mtime_ns
2501            FROM sources
2502            ORDER BY path
2503            ",
2504        ))?;
2505        let rows = sqlite_result(stmt.query_map([], |row| {
2506            Ok(IndexedSourceRecord {
2507                path: PathBuf::from(row.get::<_, String>(0)?),
2508                source_size: row.get::<_, Option<i64>>(1)?.map(i64_to_u64).transpose()?,
2509                source_mtime_ns: row.get::<_, Option<i64>>(2)?,
2510            })
2511        }))?;
2512        sqlite_result(rows.collect())
2513    }
2514
2515    fn indexed_feature_paths(&self) -> Result<Vec<IndexedFeaturePathRecord>> {
2516        let mut stmt = sqlite_result(self.conn.prepare(
2517            r"
2518            SELECT DISTINCT path, file_size, file_mtime_ns
2519            FROM features
2520            ORDER BY path
2521            ",
2522        ))?;
2523        let rows = sqlite_result(stmt.query_map([], |row| {
2524            Ok(IndexedFeaturePathRecord {
2525                path: PathBuf::from(row.get::<_, String>(0)?),
2526                file_size: row.get::<_, Option<i64>>(1)?.map(i64_to_u64).transpose()?,
2527                file_mtime_ns: row.get::<_, Option<i64>>(2)?,
2528            })
2529        }))?;
2530        sqlite_result(rows.collect())
2531    }
2532
2533    fn ensure_member_ranges_column(conn: &rusqlite::Connection) -> Result<()> {
2534        let mut stmt = sqlite_result(conn.prepare("PRAGMA table_info(features)"))?;
2535        let rows = sqlite_result(stmt.query_map([], |row| row.get::<_, String>(1)))?;
2536        let columns = sqlite_result(rows.collect::<rusqlite::Result<Vec<_>>>())?;
2537        if !columns.iter().any(|column| column == "member_ranges") {
2538            sqlite_result(conn.execute("ALTER TABLE features ADD COLUMN member_ranges TEXT", []))?;
2539        }
2540        Ok(())
2541    }
2542
2543    fn ensure_source_status_columns(conn: &rusqlite::Connection) -> Result<()> {
2544        let mut stmt = sqlite_result(conn.prepare("PRAGMA table_info(sources)"))?;
2545        let rows = sqlite_result(stmt.query_map([], |row| row.get::<_, String>(1)))?;
2546        let columns = sqlite_result(rows.collect::<rusqlite::Result<Vec<_>>>())?;
2547        if !columns.iter().any(|column| column == "source_size") {
2548            sqlite_result(conn.execute("ALTER TABLE sources ADD COLUMN source_size INTEGER", []))?;
2549        }
2550        if !columns.iter().any(|column| column == "source_mtime_ns") {
2551            sqlite_result(
2552                conn.execute("ALTER TABLE sources ADD COLUMN source_mtime_ns INTEGER", []),
2553            )?;
2554        }
2555        Ok(())
2556    }
2557
2558    fn ensure_feature_status_columns(conn: &rusqlite::Connection) -> Result<()> {
2559        let mut stmt = sqlite_result(conn.prepare("PRAGMA table_info(features)"))?;
2560        let rows = sqlite_result(stmt.query_map([], |row| row.get::<_, String>(1)))?;
2561        let columns = sqlite_result(rows.collect::<rusqlite::Result<Vec<_>>>())?;
2562        if !columns.iter().any(|column| column == "file_size") {
2563            sqlite_result(conn.execute("ALTER TABLE features ADD COLUMN file_size INTEGER", []))?;
2564        }
2565        if !columns.iter().any(|column| column == "file_mtime_ns") {
2566            sqlite_result(
2567                conn.execute("ALTER TABLE features ADD COLUMN file_mtime_ns INTEGER", []),
2568            )?;
2569        }
2570        if !columns.iter().any(|column| column == "cityobject_count") {
2571            sqlite_result(conn.execute(
2572                "ALTER TABLE features ADD COLUMN cityobject_count INTEGER",
2573                [],
2574            ))?;
2575        }
2576        Ok(())
2577    }
2578
2579    fn ensure_feature_bounds_columns(conn: &rusqlite::Connection) -> Result<()> {
2580        let mut stmt = sqlite_result(conn.prepare("PRAGMA table_info(features)"))?;
2581        let rows = sqlite_result(stmt.query_map([], |row| row.get::<_, String>(1)))?;
2582        let columns = sqlite_result(rows.collect::<rusqlite::Result<Vec<_>>>())?;
2583        if !columns.iter().any(|column| column == "min_z") {
2584            sqlite_result(conn.execute("ALTER TABLE features ADD COLUMN min_z REAL", []))?;
2585        }
2586        if !columns.iter().any(|column| column == "max_z") {
2587            sqlite_result(conn.execute("ALTER TABLE features ADD COLUMN max_z REAL", []))?;
2588        }
2589        Ok(())
2590    }
2591
2592    fn feature_bounds_complete(&self) -> Result<bool> {
2593        let missing = sqlite_result(self.conn.query_row(
2594            "SELECT COUNT(*) FROM features WHERE min_z IS NULL OR max_z IS NULL",
2595            [],
2596            |row| row.get::<_, i64>(0),
2597        ))?;
2598        Ok(missing == 0)
2599    }
2600
2601    fn ensure_duplicate_feature_ids_allowed(conn: &rusqlite::Connection) -> Result<()> {
2602        if table_sql_contains(conn, "features", "feature_id TEXT NOT NULL UNIQUE")?
2603            || table_sql_contains(conn, "bbox_map", "feature_id TEXT NOT NULL UNIQUE")?
2604        {
2605            sqlite_result(conn.execute_batch(
2606                r"
2607                PRAGMA foreign_keys = OFF;
2608
2609                DROP TABLE IF EXISTS bbox_map_new;
2610                CREATE TABLE bbox_map_new (
2611                    feature_rowid INTEGER PRIMARY KEY,
2612                    feature_id TEXT NOT NULL
2613                );
2614                INSERT INTO bbox_map_new (feature_rowid, feature_id)
2615                SELECT feature_rowid, feature_id FROM bbox_map;
2616                DROP TABLE bbox_map;
2617                ALTER TABLE bbox_map_new RENAME TO bbox_map;
2618
2619                DROP TABLE IF EXISTS features_new;
2620                CREATE TABLE features_new (
2621                    id INTEGER PRIMARY KEY AUTOINCREMENT,
2622                    feature_id TEXT NOT NULL,
2623                    source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
2624                    path TEXT NOT NULL,
2625                    file_size INTEGER,
2626                    file_mtime_ns INTEGER,
2627                    offset INTEGER NOT NULL,
2628                    length INTEGER NOT NULL,
2629                    min_z REAL,
2630                    max_z REAL,
2631                    cityobject_count INTEGER,
2632                    member_ranges TEXT
2633                );
2634                INSERT INTO features_new (
2635                    id,
2636                    feature_id,
2637                    source_id,
2638                    path,
2639                    file_size,
2640                    file_mtime_ns,
2641                    offset,
2642                    length,
2643                    min_z,
2644                    max_z,
2645                    cityobject_count,
2646                    member_ranges
2647                )
2648                SELECT
2649                    id,
2650                    feature_id,
2651                    source_id,
2652                    path,
2653                    file_size,
2654                    file_mtime_ns,
2655                    offset,
2656                    length,
2657                    min_z,
2658                    max_z,
2659                    cityobject_count,
2660                    member_ranges
2661                FROM features;
2662                DROP TABLE features;
2663                ALTER TABLE features_new RENAME TO features;
2664
2665                PRAGMA foreign_keys = ON;
2666                ",
2667            ))?;
2668        }
2669        Ok(())
2670    }
2671
2672    fn clear_tables(tx: &rusqlite::Transaction<'_>) -> Result<()> {
2673        sqlite_result(tx.execute_batch(
2674            r"
2675            DELETE FROM bbox_map;
2676            DELETE FROM feature_bbox;
2677            DELETE FROM features;
2678            DELETE FROM sources;
2679            ",
2680        ))?;
2681        Ok(())
2682    }
2683
2684    fn insert_source_in_tx(
2685        tx: &rusqlite::Transaction<'_>,
2686        path: &Path,
2687        meta: &Meta,
2688        vertices_offset: Option<u64>,
2689        vertices_length: Option<u64>,
2690        source_size: u64,
2691        source_mtime_ns: i64,
2692    ) -> Result<i64> {
2693        let metadata_json = json_string(meta)?;
2694        let vertices_offset = sqlite_result(vertices_offset.map(u64_to_i64).transpose())?;
2695        let vertices_length = sqlite_result(vertices_length.map(u64_to_i64).transpose())?;
2696        let source_size = sqlite_result(u64_to_i64(source_size))?;
2697        sqlite_result(tx.execute(
2698            r"
2699            INSERT INTO sources (
2700                path,
2701                metadata,
2702                vertices_offset,
2703                vertices_length,
2704                source_size,
2705                source_mtime_ns
2706            )
2707            VALUES (?1, ?2, ?3, ?4, ?5, ?6)
2708            ",
2709            params![
2710                path.to_string_lossy(),
2711                metadata_json,
2712                vertices_offset,
2713                vertices_length,
2714                source_size,
2715                source_mtime_ns,
2716            ],
2717        ))?;
2718        Ok(tx.last_insert_rowid())
2719    }
2720
2721    fn insert_features_in_tx(
2722        tx: &rusqlite::Transaction<'_>,
2723        entries: &[FeatureIndexEntry],
2724    ) -> Result<()> {
2725        let mut feature_stmt = sqlite_result(tx.prepare(
2726            r"
2727            INSERT INTO features (
2728                feature_id,
2729                source_id,
2730                path,
2731                file_size,
2732                file_mtime_ns,
2733                offset,
2734                length,
2735                min_z,
2736                max_z,
2737                cityobject_count,
2738                member_ranges
2739            )
2740            VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
2741            ",
2742        ))?;
2743        let mut bbox_stmt = sqlite_result(tx.prepare(
2744            r"
2745            INSERT INTO feature_bbox (feature_rowid, min_x, max_x, min_y, max_y)
2746            VALUES (?1, ?2, ?3, ?4, ?5)
2747            ",
2748        ))?;
2749        let mut map_stmt = sqlite_result(tx.prepare(
2750            r"
2751            INSERT INTO bbox_map (feature_rowid, feature_id)
2752            VALUES (?1, ?2)
2753            ",
2754        ))?;
2755        for entry in entries {
2756            let file_size = sqlite_result(u64_to_i64(entry.file_size))?;
2757            let offset = sqlite_result(u64_to_i64(entry.offset))?;
2758            let length = sqlite_result(u64_to_i64(entry.length))?;
2759            let cityobject_count = sqlite_result(u64_to_i64(entry.cityobject_count))?;
2760            sqlite_result(feature_stmt.execute(params![
2761                &entry.id,
2762                entry.source_id,
2763                entry.path.to_string_lossy(),
2764                file_size,
2765                entry.file_mtime_ns,
2766                offset,
2767                length,
2768                entry.bounds.min_z,
2769                entry.bounds.max_z,
2770                cityobject_count,
2771                &entry.member_ranges_json,
2772            ]))?;
2773            let feature_rowid = tx.last_insert_rowid();
2774            sqlite_result(bbox_stmt.execute(params![
2775                feature_rowid,
2776                entry.bounds.min_x,
2777                entry.bounds.max_x,
2778                entry.bounds.min_y,
2779                entry.bounds.max_y,
2780            ]))?;
2781            sqlite_result(map_stmt.execute(params![feature_rowid, &entry.id]))?;
2782        }
2783
2784        Ok(())
2785    }
2786
2787    fn feature_location_from_row(row: &rusqlite::Row<'_>) -> rusqlite::Result<FeatureLocation> {
2788        Self::feature_location_from_row_offset(row, 0)
2789    }
2790
2791    fn feature_location_from_row_offset(
2792        row: &rusqlite::Row<'_>,
2793        col: usize,
2794    ) -> rusqlite::Result<FeatureLocation> {
2795        let feature_id = row.get::<_, String>(col)?;
2796        let source_id = row.get::<_, i64>(col + 1)?;
2797        let source_path = PathBuf::from(row.get::<_, String>(col + 2)?);
2798        let offset = i64_to_u64(row.get::<_, i64>(col + 3)?)?;
2799        let length = i64_to_u64(row.get::<_, i64>(col + 4)?)?;
2800        let vertices_offset = match row.get::<_, Option<i64>>(col + 5)? {
2801            Some(value) => Some(i64_to_u64(value)?),
2802            None => None,
2803        };
2804        let vertices_length = match row.get::<_, Option<i64>>(col + 6)? {
2805            Some(value) => Some(i64_to_u64(value)?),
2806            None => None,
2807        };
2808        let member_ranges_json = row.get::<_, Option<String>>(col + 7)?;
2809
2810        Ok(FeatureLocation {
2811            feature_id,
2812            source_id,
2813            source_path,
2814            offset,
2815            length,
2816            vertices_offset,
2817            vertices_length,
2818            member_ranges_json,
2819        })
2820    }
2821
2822    fn indexed_feature_location_from_row(
2823        row: &rusqlite::Row<'_>,
2824    ) -> rusqlite::Result<IndexedFeatureLocation> {
2825        let row_id = row.get::<_, i64>(0)?;
2826        let location = Self::feature_location_from_row_offset(row, 1)?;
2827        Ok(IndexedFeatureLocation { row_id, location })
2828    }
2829
2830    fn indexed_feature_ref_location_from_row(
2831        row: &rusqlite::Row<'_>,
2832    ) -> rusqlite::Result<IndexedFeatureRefLocation> {
2833        let row_id = row.get::<_, i64>(0)?;
2834        let feature_id = row.get::<_, String>(1)?;
2835        let source_id = row.get::<_, i64>(2)?;
2836        let source_path = PathBuf::from(row.get::<_, String>(3)?);
2837        let offset = i64_to_u64(row.get::<_, i64>(4)?)?;
2838        let length = i64_to_u64(row.get::<_, i64>(5)?)?;
2839        let vertices_offset = match row.get::<_, Option<i64>>(6)? {
2840            Some(value) => Some(i64_to_u64(value)?),
2841            None => None,
2842        };
2843        let vertices_length = match row.get::<_, Option<i64>>(7)? {
2844            Some(value) => Some(i64_to_u64(value)?),
2845            None => None,
2846        };
2847        let member_ranges_json = row.get::<_, Option<String>>(8)?;
2848        let bounds = FeatureBounds {
2849            min_x: row.get::<_, f64>(9)?,
2850            max_x: row.get::<_, f64>(10)?,
2851            min_y: row.get::<_, f64>(11)?,
2852            max_y: row.get::<_, f64>(12)?,
2853            min_z: row.get::<_, f64>(13)?,
2854            max_z: row.get::<_, f64>(14)?,
2855        };
2856
2857        Ok(IndexedFeatureRefLocation {
2858            row_id,
2859            feature: IndexedFeatureRef {
2860                row_id,
2861                feature_id,
2862                source_id,
2863                source_path,
2864                offset,
2865                length,
2866                vertices_offset,
2867                vertices_length,
2868                member_ranges_json,
2869                bounds,
2870            },
2871        })
2872    }
2873}
2874
2875trait StorageBackend: Send + Sync {
2876    fn scan(&self, worker_count: usize) -> Result<Vec<SourceScan>>;
2877    fn read_one(&self, loc: &FeatureLocation, metadata_bytes: Arc<[u8]>) -> Result<CityModel>;
2878    fn read_many(
2879        &self,
2880        locations: &[&FeatureLocation],
2881        metadata_bytes: Arc<[u8]>,
2882    ) -> Result<Vec<CityModel>> {
2883        locations
2884            .iter()
2885            .map(|loc| self.read_one(loc, Arc::clone(&metadata_bytes)))
2886            .collect()
2887    }
2888}
2889
2890struct SourceScan {
2891    path: PathBuf,
2892    metadata: Meta,
2893    vertices_offset: Option<u64>,
2894    vertices_length: Option<u64>,
2895    source_size: u64,
2896    source_mtime_ns: i64,
2897    features: Vec<ScannedFeature>,
2898}
2899
2900struct ScannedFeature {
2901    id: String,
2902    path: PathBuf,
2903    file_size: u64,
2904    file_mtime_ns: i64,
2905    offset: u64,
2906    length: u64,
2907    bounds: FeatureBounds,
2908    cityobject_count: u64,
2909    member_ranges: Option<Vec<IndexedObjectRange>>,
2910}
2911
2912#[derive(Clone, Debug, Serialize, Deserialize)]
2913struct IndexedObjectRange {
2914    id: String,
2915    offset: u64,
2916    length: u64,
2917}
2918
2919struct LocalizedFeatureParts {
2920    feature_id: String,
2921    cityobjects: Vec<LocalizedFeatureObject>,
2922    vertices: Vec<[i64; 3]>,
2923}
2924
2925struct LocalizedFeatureObject {
2926    id: String,
2927    object_json: Box<RawValue>,
2928}
2929
2930struct NdjsonBackend {
2931    paths: Vec<PathBuf>,
2932}
2933
2934impl StorageBackend for NdjsonBackend {
2935    fn scan(&self, worker_count: usize) -> Result<Vec<SourceScan>> {
2936        let paths = collect_layout_files(&self.paths, ".jsonl")?;
2937        parallel_scan_items(&paths, worker_count, |path| {
2938            scan_ndjson_source(path.as_path())
2939        })
2940    }
2941
2942    fn read_one(&self, loc: &FeatureLocation, metadata_bytes: Arc<[u8]>) -> Result<CityModel> {
2943        let bytes = read_exact_range(&loc.source_path, loc.offset, loc.length)?;
2944        feature_slice_with_indexed_id(&bytes, loc, metadata_bytes.as_ref())
2945    }
2946
2947    fn read_many(
2948        &self,
2949        locations: &[&FeatureLocation],
2950        metadata_bytes: Arc<[u8]>,
2951    ) -> Result<Vec<CityModel>> {
2952        read_feature_slices_with_base(locations, metadata_bytes.as_ref())
2953    }
2954}
2955
2956struct CityJsonBackend {
2957    paths: Vec<PathBuf>,
2958    vertices_cache: Mutex<LruCache<PathBuf, Arc<Vec<[i64; 3]>>>>,
2959}
2960
2961impl CityJsonBackend {
2962    fn new(paths: Vec<PathBuf>) -> Self {
2963        Self {
2964            paths,
2965            vertices_cache: Mutex::new(LruCache::unbounded()),
2966        }
2967    }
2968
2969    fn load_shared_vertices(
2970        &self,
2971        source_path: &Path,
2972        source_file: &mut fs::File,
2973        offset: u64,
2974        length: u64,
2975    ) -> Result<Arc<Vec<[i64; 3]>>> {
2976        let mut cache = self
2977            .vertices_cache
2978            .lock()
2979            .unwrap_or_else(std::sync::PoisonError::into_inner);
2980        if let Some(vertices) = cache.get(source_path) {
2981            return Ok(Arc::clone(vertices));
2982        }
2983
2984        let vertices_bytes = read_exact_range_from_file(source_file, source_path, offset, length)?;
2985        let vertices = Arc::new(parse_vertices_fragment(&vertices_bytes)?);
2986        cache.put(source_path.to_path_buf(), Arc::clone(&vertices));
2987        Ok(vertices)
2988    }
2989}
2990
2991impl StorageBackend for CityJsonBackend {
2992    fn scan(&self, worker_count: usize) -> Result<Vec<SourceScan>> {
2993        let _ = &self.vertices_cache;
2994        let paths = collect_layout_files(&self.paths, ".city.json")?;
2995        parallel_scan_items(&paths, worker_count, |path| {
2996            scan_cityjson_source(path.as_path())
2997        })
2998    }
2999
3000    fn read_one(&self, loc: &FeatureLocation, metadata_bytes: Arc<[u8]>) -> Result<CityModel> {
3001        let mut source_file = fs::File::open(&loc.source_path)?;
3002        self.read_one_from_file(loc, metadata_bytes.as_ref(), &mut source_file)
3003    }
3004
3005    fn read_many(
3006        &self,
3007        locations: &[&FeatureLocation],
3008        metadata_bytes: Arc<[u8]>,
3009    ) -> Result<Vec<CityModel>> {
3010        let mut locations_by_path: BTreeMap<PathBuf, Vec<usize>> = BTreeMap::new();
3011        for (index, location) in locations.iter().enumerate() {
3012            locations_by_path
3013                .entry(location.source_path.clone())
3014                .or_default()
3015                .push(index);
3016        }
3017
3018        let mut models = std::iter::repeat_with(|| None)
3019            .take(locations.len())
3020            .collect::<Vec<Option<CityModel>>>();
3021        for (path, mut indexes) in locations_by_path {
3022            indexes.sort_by_key(|index| locations[*index].offset);
3023            let mut source_file = fs::File::open(&path)?;
3024            for index in indexes {
3025                let model = self.read_one_from_file(
3026                    locations[index],
3027                    metadata_bytes.as_ref(),
3028                    &mut source_file,
3029                )?;
3030                models[index] = Some(model);
3031            }
3032        }
3033
3034        Ok(models
3035            .into_iter()
3036            .map(|model| model.expect("every input feature should have a decoded model"))
3037            .collect())
3038    }
3039}
3040
3041impl CityJsonBackend {
3042    fn read_one_from_file(
3043        &self,
3044        loc: &FeatureLocation,
3045        metadata_bytes: &[u8],
3046        source_file: &mut fs::File,
3047    ) -> Result<CityModel> {
3048        let vertices_offset = loc.vertices_offset.ok_or_else(|| {
3049            Error::UnsupportedFeature(
3050                "regular CityJSON reads require an indexed shared vertices range".into(),
3051            )
3052        })?;
3053        let vertices_length = loc.vertices_length.ok_or_else(|| {
3054            Error::UnsupportedFeature(
3055                "regular CityJSON reads require an indexed shared vertices range".into(),
3056            )
3057        })?;
3058
3059        let member_ranges = loc
3060            .member_ranges_json
3061            .as_deref()
3062            .map(parse_json_str::<Vec<IndexedObjectRange>>)
3063            .transpose()?
3064            .unwrap_or_else(|| {
3065                vec![IndexedObjectRange {
3066                    id: loc.feature_id.clone(),
3067                    offset: loc.offset,
3068                    length: loc.length,
3069                }]
3070            });
3071        let mut object_entries = Vec::with_capacity(member_ranges.len());
3072        for member_range in &member_ranges {
3073            let object_fragment = read_exact_range_from_file(
3074                source_file,
3075                &loc.source_path,
3076                member_range.offset,
3077                member_range.length,
3078            )?;
3079            let (object_id, object_value) = parse_cityobject_entry(&object_fragment)?;
3080            if object_id != member_range.id {
3081                return Err(import_error(format!(
3082                    "indexed CityJSON member {} resolved to fragment for {}",
3083                    member_range.id, object_id
3084                )));
3085            }
3086            object_entries.push((object_id, object_value));
3087        }
3088        let shared_vertices = self.load_shared_vertices(
3089            &loc.source_path,
3090            source_file,
3091            vertices_offset,
3092            vertices_length,
3093        )?;
3094        let feature_parts =
3095            build_feature_parts(&loc.feature_id, object_entries, shared_vertices.as_ref())?;
3096        let cityobjects = feature_parts
3097            .cityobjects
3098            .iter()
3099            .map(|cityobject| staged::FeatureObjectFragment {
3100                id: cityobject.id.as_str(),
3101                object: cityobject.object_json.as_ref(),
3102            })
3103            .collect::<Vec<_>>();
3104        let assembly = staged::FeatureAssembly {
3105            id: feature_parts.feature_id.as_str(),
3106            cityobjects: &cityobjects,
3107            vertices: &feature_parts.vertices,
3108        };
3109
3110        staged::from_feature_assembly_with_base(assembly, metadata_bytes)
3111    }
3112}
3113
3114struct FeatureFilesBackend {
3115    root: PathBuf,
3116    metadata_glob: GlobMatcher,
3117    feature_glob: GlobMatcher,
3118}
3119
3120struct FeatureFileSourcePlan {
3121    path: PathBuf,
3122    metadata: Meta,
3123    source_size: u64,
3124    source_mtime_ns: i64,
3125    feature_paths: Vec<PathBuf>,
3126}
3127
3128struct FeatureFileScanItem<'a> {
3129    source_index: usize,
3130    metadata: &'a Meta,
3131    path: &'a Path,
3132}
3133
3134impl FeatureFilesBackend {
3135    fn new(root: PathBuf, metadata_glob: &str, feature_glob: &str) -> Self {
3136        let metadata_glob = globset::Glob::new(metadata_glob)
3137            .expect("metadata glob must be valid")
3138            .compile_matcher();
3139        let feature_glob = globset::Glob::new(feature_glob)
3140            .expect("feature glob must be valid")
3141            .compile_matcher();
3142        Self {
3143            root,
3144            metadata_glob,
3145            feature_glob,
3146        }
3147    }
3148}
3149
3150impl StorageBackend for FeatureFilesBackend {
3151    fn scan(&self, worker_count: usize) -> Result<Vec<SourceScan>> {
3152        scan_feature_files_root(
3153            &self.root,
3154            &self.metadata_glob,
3155            &self.feature_glob,
3156            worker_count,
3157        )
3158    }
3159
3160    fn read_one(&self, loc: &FeatureLocation, metadata_bytes: Arc<[u8]>) -> Result<CityModel> {
3161        let feature_bytes = read_exact_range(&loc.source_path, loc.offset, loc.length)?;
3162        feature_slice_with_indexed_id(&feature_bytes, loc, metadata_bytes.as_ref())
3163    }
3164
3165    fn read_many(
3166        &self,
3167        locations: &[&FeatureLocation],
3168        metadata_bytes: Arc<[u8]>,
3169    ) -> Result<Vec<CityModel>> {
3170        read_feature_slices_with_base(locations, metadata_bytes.as_ref())
3171    }
3172}
3173
3174fn scan_feature_files_root(
3175    root: &Path,
3176    metadata_glob: &GlobMatcher,
3177    feature_glob: &GlobMatcher,
3178    worker_count: usize,
3179) -> Result<Vec<SourceScan>> {
3180    let plans = discover_feature_file_sources(root, metadata_glob, feature_glob)?;
3181    let mut sources = plans
3182        .iter()
3183        .map(|plan| SourceScan {
3184            path: plan.path.clone(),
3185            metadata: plan.metadata.clone(),
3186            vertices_offset: None,
3187            vertices_length: None,
3188            source_size: plan.source_size,
3189            source_mtime_ns: plan.source_mtime_ns,
3190            features: Vec::with_capacity(plan.feature_paths.len()),
3191        })
3192        .collect::<Vec<_>>();
3193    let scan_items = plans
3194        .iter()
3195        .enumerate()
3196        .flat_map(|(source_index, plan)| {
3197            plan.feature_paths
3198                .iter()
3199                .map(move |path| FeatureFileScanItem {
3200                    source_index,
3201                    metadata: &plan.metadata,
3202                    path: path.as_path(),
3203                })
3204        })
3205        .collect::<Vec<_>>();
3206    let features = parallel_scan_items(&scan_items, worker_count, scan_feature_file)?;
3207    for (source_index, features) in features {
3208        sources[source_index].features.extend(features);
3209    }
3210    Ok(sources)
3211}
3212
3213fn discover_feature_file_sources(
3214    root: &Path,
3215    metadata_glob: &GlobMatcher,
3216    feature_glob: &GlobMatcher,
3217) -> Result<Vec<FeatureFileSourcePlan>> {
3218    let mut metadata_files = Vec::new();
3219    let mut feature_files = Vec::new();
3220
3221    for entry in WalkBuilder::new(root)
3222        .hidden(false)
3223        .follow_links(true)
3224        .build()
3225    {
3226        let entry = entry.map_err(|error| import_error(error.to_string()))?;
3227        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
3228            continue;
3229        }
3230        if entry.metadata().is_ok_and(|meta| meta.len() == 0) {
3231            continue;
3232        }
3233        let path = entry.into_path();
3234        let rel = path.strip_prefix(root).unwrap_or(path.as_path());
3235        if metadata_glob.is_match(rel) {
3236            metadata_files.push(path);
3237        } else if feature_glob.is_match(rel) {
3238            feature_files.push(path);
3239        }
3240    }
3241
3242    metadata_files.sort();
3243    feature_files.sort();
3244
3245    if metadata_files.is_empty() {
3246        return Err(import_error(format!(
3247            "feature-files root {} does not contain any metadata files",
3248            root.display()
3249        )));
3250    }
3251
3252    let mut metadata_by_dir = BTreeMap::new();
3253    let mut sources = BTreeMap::new();
3254
3255    for metadata_path in metadata_files {
3256        let metadata: Meta = read_json(&metadata_path)?;
3257        let (source_size, source_mtime_ns) = file_status(&metadata_path)?;
3258        let parent = metadata_path.parent().unwrap_or(root).to_path_buf();
3259        metadata_by_dir.insert(parent, metadata_path.clone());
3260        sources.insert(
3261            metadata_path.clone(),
3262            FeatureFileSourcePlan {
3263                path: metadata_path,
3264                metadata,
3265                source_size,
3266                source_mtime_ns,
3267                feature_paths: Vec::new(),
3268            },
3269        );
3270    }
3271
3272    for feature_path in feature_files {
3273        let metadata_path = resolve_feature_metadata_path(root, &feature_path, &metadata_by_dir)
3274            .ok_or_else(|| {
3275                import_error(format!(
3276                    "no ancestor metadata file found for feature {}",
3277                    feature_path.display()
3278                ))
3279            })?;
3280        let source = sources.get_mut(&metadata_path).ok_or_else(|| {
3281            import_error(format!(
3282                "feature {} resolved to missing metadata source {}",
3283                feature_path.display(),
3284                metadata_path.display()
3285            ))
3286        })?;
3287        source.feature_paths.push(feature_path);
3288    }
3289
3290    Ok(sources.into_values().collect())
3291}
3292
3293fn scan_feature_file(item: &FeatureFileScanItem<'_>) -> Result<(usize, Vec<ScannedFeature>)> {
3294    let feature: Value = read_json(item.path)?;
3295    let (ids, bounds, cityobject_count) = parse_feature_file_bounds(&feature, item.metadata)?;
3296    let (file_size, file_mtime_ns) = file_status(item.path)?;
3297    let features = ids
3298        .into_iter()
3299        .map(|id| ScannedFeature {
3300            id,
3301            path: item.path.to_path_buf(),
3302            file_size,
3303            file_mtime_ns,
3304            offset: 0,
3305            length: file_size,
3306            bounds,
3307            cityobject_count,
3308            member_ranges: None,
3309        })
3310        .collect();
3311    Ok((item.source_index, features))
3312}
3313
3314fn resolve_feature_metadata_path(
3315    root: &Path,
3316    feature_path: &Path,
3317    metadata_by_dir: &BTreeMap<PathBuf, PathBuf>,
3318) -> Option<PathBuf> {
3319    let mut current = feature_path.parent();
3320    while let Some(dir) = current {
3321        if let Some(metadata_path) = metadata_by_dir.get(dir) {
3322            return Some(metadata_path.clone());
3323        }
3324        if dir == root {
3325            break;
3326        }
3327        current = dir.parent();
3328    }
3329    None
3330}
3331
3332fn parse_feature_file_bounds(
3333    feature: &Value,
3334    metadata: &Meta,
3335) -> Result<(Vec<String>, FeatureBounds, u64)> {
3336    let ids = feature_cityobject_keys(feature, "feature file")?;
3337    let vertices = feature
3338        .get("vertices")
3339        .cloned()
3340        .ok_or_else(|| import_error("feature file is missing vertices"))?;
3341    let vertices: Vec<[i64; 3]> = parse_json_value(vertices)?;
3342
3343    let referenced_vertices = collect_feature_vertex_indices(feature, vertices.len())?;
3344    let (scale, translate) = parse_ndjson_transform(metadata)?;
3345    let bounds = feature_bounds_from_vertices(&vertices, &referenced_vertices, scale, translate)?;
3346    let cityobject_count = feature_cityobject_count(feature, "feature file")?;
3347    Ok((ids, bounds, cityobject_count))
3348}
3349
3350fn trim_fragment_delimiters(bytes: &[u8]) -> &[u8] {
3351    let mut start = 0;
3352    let mut end = bytes.len();
3353
3354    while start < end && (bytes[start].is_ascii_whitespace() || bytes[start] == b',') {
3355        start += 1;
3356    }
3357    while end > start && (bytes[end - 1].is_ascii_whitespace() || bytes[end - 1] == b',') {
3358        end -= 1;
3359    }
3360
3361    &bytes[start..end]
3362}
3363
3364fn parse_cityobject_entry(fragment: &[u8]) -> Result<(String, Value)> {
3365    let fragment = trim_fragment_delimiters(fragment);
3366    if fragment.is_empty() {
3367        return Err(import_error("CityObject entry fragment is empty"));
3368    }
3369
3370    let mut wrapped = Vec::with_capacity(fragment.len() + 2);
3371    wrapped.push(b'{');
3372    wrapped.extend_from_slice(fragment);
3373    wrapped.push(b'}');
3374
3375    let entry: Map<String, Value> = parse_json_slice(&wrapped)?;
3376    if entry.len() != 1 {
3377        return Err(import_error(
3378            "CityObject entry fragment must contain exactly one object entry",
3379        ));
3380    }
3381
3382    let (object_id, object_value) = entry
3383        .into_iter()
3384        .next()
3385        .ok_or_else(|| import_error("CityObject entry fragment is empty"))?;
3386    if !object_value.is_object() {
3387        return Err(import_error("CityObject entry value must be a JSON object"));
3388    }
3389
3390    Ok((object_id, object_value))
3391}
3392
3393fn parse_vertices_fragment(fragment: &[u8]) -> Result<Vec<[i64; 3]>> {
3394    let fragment = trim_fragment_delimiters(fragment);
3395    if fragment.is_empty() {
3396        return Err(import_error("shared vertices fragment is empty"));
3397    }
3398    parse_json_slice(fragment)
3399}
3400
3401fn build_feature_parts(
3402    feature_id: &str,
3403    mut object_entries: Vec<(String, Value)>,
3404    shared_vertices: &[[i64; 3]],
3405) -> Result<LocalizedFeatureParts> {
3406    let retained_ids = object_entries
3407        .iter()
3408        .map(|(id, _)| id.clone())
3409        .collect::<BTreeSet<_>>();
3410
3411    for (_, object_value) in &mut object_entries {
3412        filter_local_relationships(object_value, &retained_ids)?;
3413    }
3414
3415    let mut referenced_vertices = BTreeSet::new();
3416    for (_, object_value) in &object_entries {
3417        collect_object_vertex_indices(object_value, &mut referenced_vertices)?;
3418    }
3419
3420    let local_vertices = build_local_vertices(shared_vertices, &referenced_vertices)?;
3421    let remap = referenced_vertices
3422        .iter()
3423        .enumerate()
3424        .map(|(new_index, old_index)| (*old_index, new_index))
3425        .collect::<HashMap<_, _>>();
3426
3427    for (_, object_value) in &mut object_entries {
3428        if let Some(geometries) = object_value
3429            .as_object_mut()
3430            .and_then(|object| object.get_mut("geometry"))
3431            .and_then(Value::as_array_mut)
3432        {
3433            for geometry in geometries {
3434                if let Some(boundaries) = geometry.get_mut("boundaries") {
3435                    remap_vertex_indices(boundaries, &remap)?;
3436                }
3437            }
3438        }
3439    }
3440
3441    let cityobjects = object_entries
3442        .into_iter()
3443        .map(|(id, object_value)| {
3444            let object_json = RawValue::from_string(json_string(&object_value)?)
3445                .map_err(|error| import_error(error.to_string()))?;
3446            Ok(LocalizedFeatureObject { id, object_json })
3447        })
3448        .collect::<Result<Vec<_>>>()?;
3449
3450    Ok(LocalizedFeatureParts {
3451        feature_id: feature_id.to_owned(),
3452        cityobjects,
3453        vertices: local_vertices,
3454    })
3455}
3456
3457fn filter_local_relationships(
3458    object_value: &mut Value,
3459    retained_ids: &BTreeSet<String>,
3460) -> Result<()> {
3461    let object = object_value
3462        .as_object_mut()
3463        .ok_or_else(|| import_error("CityObject value must be a JSON object"))?;
3464
3465    for key in ["children", "parents"] {
3466        let remove_key = match object.get_mut(key) {
3467            Some(value) => {
3468                let refs = value
3469                    .as_array_mut()
3470                    .ok_or_else(|| import_error(format!("{key} must be an array")))?;
3471                refs.retain(|entry| {
3472                    entry
3473                        .as_str()
3474                        .is_some_and(|object_id| retained_ids.contains(object_id))
3475                });
3476                refs.is_empty()
3477            }
3478            None => false,
3479        };
3480
3481        if remove_key {
3482            object.remove(key);
3483        }
3484    }
3485
3486    Ok(())
3487}
3488
3489fn collect_vertex_indices(value: &Value, indices: &mut BTreeSet<usize>) -> Result<()> {
3490    match value {
3491        Value::Array(items) => {
3492            for item in items {
3493                collect_vertex_indices(item, indices)?;
3494            }
3495            Ok(())
3496        }
3497        Value::Number(number) => {
3498            indices.insert(number_to_index(number)?);
3499            Ok(())
3500        }
3501        Value::Null => Ok(()),
3502        other => Err(import_error(format!(
3503            "boundary values must be arrays or non-negative integers, found {}",
3504            value_kind(other)
3505        ))),
3506    }
3507}
3508
3509fn remap_vertex_indices(value: &mut Value, remap: &HashMap<usize, usize>) -> Result<()> {
3510    match value {
3511        Value::Array(items) => {
3512            for item in items {
3513                remap_vertex_indices(item, remap)?;
3514            }
3515            Ok(())
3516        }
3517        Value::Number(number) => {
3518            let old_index = number_to_index(number)?;
3519            let new_index = remap.get(&old_index).copied().ok_or_else(|| {
3520                import_error(format!(
3521                    "missing remap entry for referenced vertex index {old_index}"
3522                ))
3523            })?;
3524            *value =
3525                Value::Number(Number::from(u64::try_from(new_index).map_err(|_| {
3526                    import_error("localized vertex index does not fit in u64")
3527                })?));
3528            Ok(())
3529        }
3530        Value::Null => Ok(()),
3531        other => Err(import_error(format!(
3532            "boundary values must be arrays or non-negative integers, found {}",
3533            value_kind(other)
3534        ))),
3535    }
3536}
3537
3538fn build_local_vertices(
3539    shared_vertices: &[[i64; 3]],
3540    referenced_vertices: &BTreeSet<usize>,
3541) -> Result<Vec<[i64; 3]>> {
3542    let mut vertices = Vec::with_capacity(referenced_vertices.len());
3543
3544    for &index in referenced_vertices {
3545        let vertex = shared_vertices.get(index).copied().ok_or_else(|| {
3546            import_error(format!(
3547                "vertex index {index} is outside the shared vertices array"
3548            ))
3549        })?;
3550        vertices.push(vertex);
3551    }
3552
3553    Ok(vertices)
3554}
3555
3556fn number_to_index(number: &Number) -> Result<usize> {
3557    let index = number
3558        .as_u64()
3559        .ok_or_else(|| import_error("boundary vertex indices must be non-negative integers"))?;
3560    usize::try_from(index)
3561        .map_err(|_| import_error(format!("vertex index {index} does not fit in usize")))
3562}
3563
3564fn value_kind(value: &Value) -> &'static str {
3565    match value {
3566        Value::Null => "null",
3567        Value::Bool(_) => "bool",
3568        Value::Number(_) => "number",
3569        Value::String(_) => "string",
3570        Value::Array(_) => "array",
3571        Value::Object(_) => "object",
3572    }
3573}
3574
3575fn import_error(message: impl Into<String>) -> Error {
3576    Error::Import(message.into())
3577}
3578
3579/// Returns the configured index worker count.
3580///
3581/// # Errors
3582///
3583/// Returns an error if `CJINDEX_WORKERS` is set to an invalid value.
3584pub fn configured_worker_count() -> Result<usize> {
3585    match std::env::var(WORKER_COUNT_ENV) {
3586        Ok(value) => {
3587            let worker_count = value.parse::<usize>().map_err(|error| {
3588                import_error(format!(
3589                    "{WORKER_COUNT_ENV} must be a positive integer: {error}"
3590                ))
3591            })?;
3592            if worker_count == 0 {
3593                return Err(import_error(format!(
3594                    "{WORKER_COUNT_ENV} must be greater than zero"
3595                )));
3596            }
3597            Ok(worker_count)
3598        }
3599        Err(std::env::VarError::NotPresent) => {
3600            Ok(std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get))
3601        }
3602        Err(std::env::VarError::NotUnicode(_)) => Err(import_error(format!(
3603            "{WORKER_COUNT_ENV} must contain valid UTF-8"
3604        ))),
3605    }
3606}
3607
3608fn parallel_scan_items<T, U, F>(items: &[T], worker_count: usize, scan: F) -> Result<Vec<U>>
3609where
3610    T: Sync,
3611    U: Send,
3612    F: Fn(&T) -> Result<U> + Sync,
3613{
3614    if items.is_empty() {
3615        return Ok(Vec::new());
3616    }
3617
3618    let shard_count = worker_count.max(1).min(items.len());
3619    if shard_count == 1 {
3620        return items.iter().map(scan).collect();
3621    }
3622
3623    let chunk_size = items.len().div_ceil(shard_count);
3624    std::thread::scope(|scope| -> Result<Vec<U>> {
3625        let mut handles = Vec::with_capacity(shard_count);
3626        let scan = &scan;
3627        for shard in items.chunks(chunk_size) {
3628            handles.push(scope.spawn(move || {
3629                let mut shard_results = Vec::with_capacity(shard.len());
3630                for item in shard {
3631                    shard_results.push(scan(item)?);
3632                }
3633                Ok::<Vec<U>, Error>(shard_results)
3634            }));
3635        }
3636
3637        let mut results = Vec::with_capacity(items.len());
3638        for handle in handles {
3639            let shard_results = handle
3640                .join()
3641                .map_err(|_| import_error("parallel scan worker panicked"))??;
3642            results.extend(shard_results);
3643        }
3644        Ok(results)
3645    })
3646}
3647
3648fn serde_json_error(error: &serde_json::Error) -> Error {
3649    import_error(error.to_string())
3650}
3651
3652fn parse_json_slice<T: DeserializeOwned>(bytes: &[u8]) -> Result<T> {
3653    serde_json::from_slice(bytes).map_err(|error| serde_json_error(&error))
3654}
3655
3656fn parse_json_str<T: DeserializeOwned>(value: &str) -> Result<T> {
3657    serde_json::from_str(value).map_err(|error| serde_json_error(&error))
3658}
3659
3660fn parse_json_value<T: DeserializeOwned>(value: Value) -> Result<T> {
3661    serde_json::from_value(value).map_err(|error| serde_json_error(&error))
3662}
3663
3664fn json_string<T: Serialize + ?Sized>(value: &T) -> Result<String> {
3665    serde_json::to_string(value).map_err(|error| serde_json_error(&error))
3666}
3667
3668fn table_sql_contains(conn: &rusqlite::Connection, table: &str, needle: &str) -> Result<bool> {
3669    let sql = sqlite_result(
3670        conn.query_row(
3671            "SELECT sql FROM sqlite_master WHERE type = 'table' AND name = ?1",
3672            params![table],
3673            |row| row.get::<_, Option<String>>(0),
3674        )
3675        .optional(),
3676    )?
3677    .flatten()
3678    .unwrap_or_default();
3679    Ok(sql.contains(needle))
3680}
3681
3682fn read_exact_range(path: &Path, offset: u64, length: u64) -> Result<Vec<u8>> {
3683    let mut file = fs::File::open(path)
3684        .map_err(|error| import_error(format!("failed to open {}: {error}", path.display())))?;
3685    read_exact_range_from_file(&mut file, path, offset, length)
3686}
3687
3688fn read_feature_slices_with_base(
3689    locations: &[&FeatureLocation],
3690    metadata_bytes: &[u8],
3691) -> Result<Vec<CityModel>> {
3692    let mut locations_by_path: BTreeMap<PathBuf, Vec<usize>> = BTreeMap::new();
3693    for (index, location) in locations.iter().enumerate() {
3694        locations_by_path
3695            .entry(location.source_path.clone())
3696            .or_default()
3697            .push(index);
3698    }
3699
3700    let mut models = std::iter::repeat_with(|| None)
3701        .take(locations.len())
3702        .collect::<Vec<Option<CityModel>>>();
3703    for (path, mut indexes) in locations_by_path {
3704        indexes.sort_by_key(|index| locations[*index].offset);
3705        let mut file = fs::File::open(&path)
3706            .map_err(|error| import_error(format!("failed to open {}: {error}", path.display())))?;
3707        for index in indexes {
3708            let location = locations[index];
3709            let feature_bytes =
3710                read_exact_range_from_file(&mut file, &path, location.offset, location.length)?;
3711            let model = feature_slice_with_indexed_id(&feature_bytes, location, metadata_bytes)?;
3712            models[index] = Some(model);
3713        }
3714    }
3715
3716    Ok(models
3717        .into_iter()
3718        .map(|model| model.expect("every input feature should have a decoded model"))
3719        .collect())
3720}
3721
3722fn feature_slice_with_indexed_id(
3723    feature_bytes: &[u8],
3724    loc: &FeatureLocation,
3725    metadata_bytes: &[u8],
3726) -> Result<CityModel> {
3727    staged::from_feature_slice_with_indexed_id_and_base(
3728        feature_bytes,
3729        loc.feature_id.as_str(),
3730        metadata_bytes,
3731    )
3732}
3733
3734fn read_exact_range_from_file(
3735    file: &mut fs::File,
3736    path: &Path,
3737    offset: u64,
3738    length: u64,
3739) -> Result<Vec<u8>> {
3740    let length = usize::try_from(length).map_err(|_| {
3741        import_error(format!(
3742            "requested read of {length} bytes from {} exceeds the supported buffer size",
3743            path.display()
3744        ))
3745    })?;
3746    if length > isize::MAX as usize {
3747        return Err(import_error(format!(
3748            "requested read of {length} bytes from {} exceeds the supported buffer size",
3749            path.display()
3750        )));
3751    }
3752
3753    let mut bytes = Vec::new();
3754    bytes.try_reserve_exact(length).map_err(|error| {
3755        import_error(format!(
3756            "failed to allocate buffer for {} bytes from {}: {error}",
3757            length,
3758            path.display()
3759        ))
3760    })?;
3761    bytes.resize(length, 0);
3762
3763    file.seek(SeekFrom::Start(offset)).map_err(|error| {
3764        import_error(format!(
3765            "failed to seek to byte offset {offset} in {}: {error}",
3766            path.display()
3767        ))
3768    })?;
3769    file.read_exact(&mut bytes).map_err(|error| {
3770        if error.kind() == ErrorKind::UnexpectedEof {
3771            import_error(format!(
3772                "short read while reading {length} bytes at offset {offset} from {}",
3773                path.display()
3774            ))
3775        } else {
3776            import_error(format!(
3777                "failed to read {length} bytes at offset {offset} from {}: {error}",
3778                path.display()
3779            ))
3780        }
3781    })?;
3782
3783    Ok(bytes)
3784}
3785
3786fn read_json(path: impl AsRef<Path>) -> Result<Value> {
3787    let bytes = fs::read(path.as_ref())?;
3788    parse_json_slice(&bytes)
3789}
3790
3791fn file_status(path: &Path) -> Result<(u64, i64)> {
3792    let metadata = fs::metadata(path)?;
3793    let modified = metadata.modified().map_err(|error| {
3794        import_error(format!(
3795            "failed to read modified time for {}: {error}",
3796            path.display()
3797        ))
3798    })?;
3799    let since_epoch = modified.duration_since(UNIX_EPOCH).map_err(|error| {
3800        import_error(format!(
3801            "modified time for {} is before the unix epoch: {error}",
3802            path.display()
3803        ))
3804    })?;
3805    let nanos = i64::try_from(since_epoch.as_nanos())
3806        .map_err(|_| import_error("modified time does not fit in i64 nanoseconds"))?;
3807    Ok((metadata.len(), nanos))
3808}
3809
3810fn feature_cityobject_count(feature: &Value, context: &str) -> Result<u64> {
3811    let cityobjects = feature
3812        .get("CityObjects")
3813        .and_then(Value::as_object)
3814        .ok_or_else(|| import_error(format!("{context} is missing CityObjects")))?;
3815    u64::try_from(cityobjects.len())
3816        .map_err(|_| import_error("CityObject count does not fit in u64"))
3817}
3818
3819fn scan_ndjson_source(path: &Path) -> Result<SourceScan> {
3820    let bytes = fs::read(path)?;
3821    let (source_size, source_mtime_ns) = file_status(path)?;
3822    let line_spans = line_spans(&bytes);
3823    let Some((_, metadata_bytes)) = line_spans.first() else {
3824        return Err(import_error(format!(
3825            "NDJSON source {} is empty",
3826            path.display()
3827        )));
3828    };
3829
3830    let metadata: Meta = parse_json_slice(metadata_bytes)?;
3831    let (scale, translate) = parse_ndjson_transform(&metadata)?;
3832    let mut features = Vec::new();
3833
3834    for (offset, line_bytes) in line_spans.into_iter().skip(1) {
3835        if line_bytes.iter().all(u8::is_ascii_whitespace) {
3836            continue;
3837        }
3838
3839        let feature: Value = parse_json_slice(line_bytes)?;
3840        let (ids, bounds) = parse_ndjson_feature_bounds(&feature, scale, translate)?;
3841        let cityobject_count = feature_cityobject_count(&feature, "ndjson feature")?;
3842        let length = u64::try_from(line_bytes.len())
3843            .map_err(|_| import_error("NDJSON feature line length does not fit in u64"))?;
3844        features.extend(ids.into_iter().map(|id| ScannedFeature {
3845            id,
3846            path: path.to_path_buf(),
3847            file_size: source_size,
3848            file_mtime_ns: source_mtime_ns,
3849            offset,
3850            length,
3851            bounds,
3852            cityobject_count,
3853            member_ranges: None,
3854        }));
3855    }
3856
3857    Ok(SourceScan {
3858        path: path.to_path_buf(),
3859        metadata,
3860        vertices_offset: None,
3861        vertices_length: None,
3862        source_size,
3863        source_mtime_ns,
3864        features,
3865    })
3866}
3867
3868fn collect_layout_files(paths: &[PathBuf], suffix: &str) -> Result<Vec<PathBuf>> {
3869    let mut files = Vec::new();
3870
3871    for root in paths {
3872        if root.is_file() {
3873            if root.to_string_lossy().ends_with(suffix) {
3874                files.push(root.clone());
3875            }
3876            continue;
3877        }
3878
3879        for entry in WalkBuilder::new(root)
3880            .hidden(false)
3881            .follow_links(true)
3882            .build()
3883        {
3884            let entry = entry.map_err(|error| import_error(error.to_string()))?;
3885            if !entry.file_type().is_some_and(|ft| ft.is_file()) {
3886                continue;
3887            }
3888            let path = entry.into_path();
3889            if path.to_string_lossy().ends_with(suffix) {
3890                files.push(path);
3891            }
3892        }
3893    }
3894
3895    files.sort();
3896    files.dedup();
3897    Ok(files)
3898}
3899
3900fn scan_cityjson_source(path: &Path) -> Result<SourceScan> {
3901    let bytes = fs::read(path)?;
3902    let (source_size, source_mtime_ns) = file_status(path)?;
3903    let document: Value = parse_json_slice(&bytes)?;
3904    let metadata = cityjson_base_metadata(&document)?;
3905    let (scale, translate) = parse_ndjson_transform(&metadata)?;
3906
3907    let cityobjects = document
3908        .get("CityObjects")
3909        .and_then(Value::as_object)
3910        .ok_or_else(|| {
3911            import_error(format!(
3912                "CityJSON source {} is missing CityObjects",
3913                path.display()
3914            ))
3915        })?;
3916    let vertices_value = document.get("vertices").ok_or_else(|| {
3917        import_error(format!(
3918            "CityJSON source {} is missing vertices",
3919            path.display()
3920        ))
3921    })?;
3922    let vertices: Vec<[i64; 3]> = parse_json_value(vertices_value.clone())?;
3923    let (vertices_offset, vertices_length) = top_level_value_range(&bytes, "vertices")?;
3924    let cityobject_ranges = cityobject_entry_ranges(&bytes)?
3925        .into_iter()
3926        .map(|(id, offset, length)| (id, (offset, length)))
3927        .collect::<HashMap<_, _>>();
3928
3929    let root_ids = root_cityobject_ids(cityobjects);
3930    let mut features = Vec::with_capacity(root_ids.len());
3931    for id in root_ids {
3932        let (offset, length) = cityobject_ranges.get(id).copied().ok_or_else(|| {
3933            import_error(format!(
3934                "CityObject fragment for {id} could not be located in {}",
3935                path.display()
3936            ))
3937        })?;
3938        let member_ids = collect_cityjson_feature_members(id, cityobjects)?;
3939        let member_ranges = member_ids
3940            .iter()
3941            .map(|member_id| {
3942                let (member_offset, member_length) =
3943                    cityobject_ranges.get(member_id).copied().ok_or_else(|| {
3944                        import_error(format!(
3945                            "CityObject fragment for {member_id} could not be located in {}",
3946                            path.display()
3947                        ))
3948                    })?;
3949                Ok(IndexedObjectRange {
3950                    id: member_id.clone(),
3951                    offset: member_offset,
3952                    length: member_length,
3953                })
3954            })
3955            .collect::<Result<Vec<_>>>()?;
3956        let mut referenced_vertices = BTreeSet::new();
3957        let mut visited = BTreeSet::new();
3958        collect_cityjson_object_vertex_indices(
3959            id,
3960            cityobjects,
3961            &mut referenced_vertices,
3962            &mut visited,
3963        )?;
3964        if referenced_vertices.is_empty() {
3965            return Err(import_error(format!(
3966                "CityObject {id} in {} does not reference any vertices",
3967                path.display()
3968            )));
3969        }
3970        let bounds =
3971            feature_bounds_from_vertices(&vertices, &referenced_vertices, scale, translate)?;
3972        features.push(ScannedFeature {
3973            id: id.clone(),
3974            path: path.to_path_buf(),
3975            file_size: source_size,
3976            file_mtime_ns: source_mtime_ns,
3977            offset,
3978            length,
3979            bounds,
3980            cityobject_count: u64::try_from(member_ranges.len())
3981                .map_err(|_| import_error("CityObject count does not fit in u64"))?,
3982            member_ranges: Some(member_ranges),
3983        });
3984    }
3985
3986    Ok(SourceScan {
3987        path: path.to_path_buf(),
3988        metadata,
3989        vertices_offset: Some(vertices_offset),
3990        vertices_length: Some(vertices_length),
3991        source_size,
3992        source_mtime_ns,
3993        features,
3994    })
3995}
3996
3997fn cityjson_base_metadata(document: &Value) -> Result<Meta> {
3998    let mut metadata = document.clone();
3999    let root = metadata
4000        .as_object_mut()
4001        .ok_or_else(|| import_error("CityJSON document root must be a JSON object"))?;
4002    root.insert("CityObjects".to_owned(), Value::Object(Map::new()));
4003    root.insert("vertices".to_owned(), Value::Array(Vec::new()));
4004    Ok(metadata)
4005}
4006
4007fn root_cityobject_ids(cityobjects: &Map<String, Value>) -> Vec<&String> {
4008    let mut child_ids = BTreeSet::new();
4009    let mut ids = cityobjects.keys().collect::<Vec<_>>();
4010
4011    for object in cityobjects.values() {
4012        if let Some(children) = object.get("children").and_then(Value::as_array) {
4013            for child in children {
4014                if let Some(child_id) = child.as_str() {
4015                    child_ids.insert(child_id.to_owned());
4016                }
4017            }
4018        }
4019    }
4020
4021    ids.sort();
4022    ids.into_iter()
4023        .filter(|id| {
4024            cityobjects
4025                .get(*id)
4026                .and_then(|object| object.get("parents"))
4027                .and_then(Value::as_array)
4028                .is_none_or(Vec::is_empty)
4029                && !child_ids.contains(id.as_str())
4030        })
4031        .collect()
4032}
4033
4034fn collect_cityjson_feature_members(
4035    root_id: &str,
4036    cityobjects: &Map<String, Value>,
4037) -> Result<Vec<String>> {
4038    let mut members = Vec::new();
4039    let mut visited = BTreeSet::new();
4040    collect_cityjson_feature_members_recursive(root_id, cityobjects, &mut members, &mut visited)?;
4041    Ok(members)
4042}
4043
4044fn collect_cityjson_feature_members_recursive(
4045    object_id: &str,
4046    cityobjects: &Map<String, Value>,
4047    members: &mut Vec<String>,
4048    visited: &mut BTreeSet<String>,
4049) -> Result<()> {
4050    if !visited.insert(object_id.to_owned()) {
4051        return Ok(());
4052    }
4053
4054    let object = cityobjects.get(object_id).ok_or_else(|| {
4055        import_error(format!(
4056            "CityJSON source is missing referenced CityObject {object_id}"
4057        ))
4058    })?;
4059    members.push(object_id.to_owned());
4060
4061    if let Some(children) = object.get("children").and_then(Value::as_array) {
4062        for child in children {
4063            let Some(child_id) = child.as_str() else {
4064                return Err(import_error(
4065                    "CityObject children must be string identifiers",
4066                ));
4067            };
4068            if cityobjects.contains_key(child_id) {
4069                collect_cityjson_feature_members_recursive(
4070                    child_id,
4071                    cityobjects,
4072                    members,
4073                    visited,
4074                )?;
4075            }
4076        }
4077    }
4078
4079    Ok(())
4080}
4081
4082fn collect_cityjson_object_vertex_indices(
4083    object_id: &str,
4084    cityobjects: &Map<String, Value>,
4085    indices: &mut BTreeSet<usize>,
4086    visited: &mut BTreeSet<String>,
4087) -> Result<()> {
4088    if !visited.insert(object_id.to_owned()) {
4089        return Ok(());
4090    }
4091
4092    let object = cityobjects.get(object_id).ok_or_else(|| {
4093        import_error(format!(
4094            "CityJSON source is missing referenced CityObject {object_id}"
4095        ))
4096    })?;
4097    collect_object_vertex_indices(object, indices)?;
4098
4099    if let Some(children) = object.get("children").and_then(Value::as_array) {
4100        for child in children {
4101            let Some(child_id) = child.as_str() else {
4102                return Err(import_error(
4103                    "CityObject children must be string identifiers",
4104                ));
4105            };
4106            if cityobjects.contains_key(child_id) {
4107                collect_cityjson_object_vertex_indices(child_id, cityobjects, indices, visited)?;
4108            }
4109        }
4110    }
4111
4112    Ok(())
4113}
4114
4115fn collect_object_vertex_indices(object: &Value, indices: &mut BTreeSet<usize>) -> Result<()> {
4116    if let Some(geometries) = object.get("geometry").and_then(Value::as_array) {
4117        for geometry in geometries {
4118            if let Some(boundaries) = geometry.get("boundaries") {
4119                collect_vertex_indices(boundaries, indices)?;
4120            }
4121        }
4122    }
4123    Ok(())
4124}
4125
4126fn top_level_value_range(bytes: &[u8], key: &str) -> Result<(u64, u64)> {
4127    let key_start = find_json_key(bytes, key)
4128        .ok_or_else(|| import_error(format!("top-level key {key} could not be located")))?;
4129    let mut cursor = skip_json_whitespace(bytes, key_start + key.len() + 2);
4130    if bytes.get(cursor) != Some(&b':') {
4131        return Err(import_error(format!(
4132            "top-level key {key} is missing a value separator"
4133        )));
4134    }
4135    cursor = skip_json_whitespace(bytes, cursor + 1);
4136    let value_end = json_value_end(bytes, cursor)?;
4137    Ok((
4138        u64::try_from(cursor).map_err(|_| import_error("value offset does not fit in u64"))?,
4139        u64::try_from(value_end - cursor)
4140            .map_err(|_| import_error("value length does not fit in u64"))?,
4141    ))
4142}
4143
4144fn cityobject_entry_ranges(bytes: &[u8]) -> Result<Vec<(String, u64, u64)>> {
4145    let key_start = find_json_key(bytes, "CityObjects")
4146        .ok_or_else(|| import_error("top-level key CityObjects could not be located"))?;
4147    let mut cursor = skip_json_whitespace(bytes, key_start + "\"CityObjects\"".len());
4148    if bytes.get(cursor) != Some(&b':') {
4149        return Err(import_error("CityObjects key is missing a value separator"));
4150    }
4151    cursor = skip_json_whitespace(bytes, cursor + 1);
4152    if bytes.get(cursor) != Some(&b'{') {
4153        return Err(import_error("CityObjects must be a JSON object"));
4154    }
4155    cursor += 1;
4156
4157    let mut entries = Vec::new();
4158    loop {
4159        cursor = skip_json_whitespace(bytes, cursor);
4160        match bytes.get(cursor) {
4161            Some(b'}') => break,
4162            Some(b'"') => {
4163                let entry_start = cursor;
4164                let (id, after_key) = parse_json_string(bytes, cursor)?;
4165                cursor = skip_json_whitespace(bytes, after_key);
4166                if bytes.get(cursor) != Some(&b':') {
4167                    return Err(import_error(
4168                        "CityObject entry is missing a value separator",
4169                    ));
4170                }
4171                cursor = skip_json_whitespace(bytes, cursor + 1);
4172                let value_end = json_value_end(bytes, cursor)?;
4173                let offset = u64::try_from(entry_start)
4174                    .map_err(|_| import_error("CityObject entry offset does not fit in u64"))?;
4175                let length = u64::try_from(value_end - entry_start)
4176                    .map_err(|_| import_error("CityObject entry length does not fit in u64"))?;
4177                entries.push((id, offset, length));
4178                cursor = skip_json_whitespace(bytes, value_end);
4179                match bytes.get(cursor) {
4180                    Some(b',') => cursor += 1,
4181                    Some(b'}') => break,
4182                    _ => {
4183                        return Err(import_error(
4184                            "CityObjects entries must be separated by commas",
4185                        ));
4186                    }
4187                }
4188            }
4189            _ => return Err(import_error("unexpected token inside CityObjects object")),
4190        }
4191    }
4192
4193    Ok(entries)
4194}
4195
4196fn find_json_key(bytes: &[u8], key: &str) -> Option<usize> {
4197    let needle = format!("\"{key}\"");
4198    bytes
4199        .windows(needle.len())
4200        .position(|window| window == needle.as_bytes())
4201}
4202
4203fn skip_json_whitespace(bytes: &[u8], mut index: usize) -> usize {
4204    while bytes.get(index).is_some_and(u8::is_ascii_whitespace) {
4205        index += 1;
4206    }
4207    index
4208}
4209
4210fn parse_json_string(bytes: &[u8], start: usize) -> Result<(String, usize)> {
4211    let mut index = start + 1;
4212    let mut escaped = false;
4213
4214    while let Some(byte) = bytes.get(index) {
4215        if escaped {
4216            escaped = false;
4217        } else if *byte == b'\\' {
4218            escaped = true;
4219        } else if *byte == b'"' {
4220            let end = index + 1;
4221            return Ok((parse_json_slice(&bytes[start..end])?, end));
4222        }
4223        index += 1;
4224    }
4225
4226    Err(import_error("unterminated JSON string"))
4227}
4228
4229fn json_value_end(bytes: &[u8], start: usize) -> Result<usize> {
4230    match bytes.get(start) {
4231        Some(b'{') => nested_json_end(bytes, start, b'{', b'}'),
4232        Some(b'[') => nested_json_end(bytes, start, b'[', b']'),
4233        Some(b'"') => parse_json_string(bytes, start).map(|(_, end)| end),
4234        Some(_) => {
4235            let mut end = start;
4236            while let Some(byte) = bytes.get(end) {
4237                if byte.is_ascii_whitespace() || matches!(*byte, b',' | b'}' | b']') {
4238                    break;
4239                }
4240                end += 1;
4241            }
4242            Ok(end)
4243        }
4244        None => Err(import_error("unexpected end of JSON input")),
4245    }
4246}
4247
4248fn nested_json_end(bytes: &[u8], start: usize, open: u8, close: u8) -> Result<usize> {
4249    let mut depth = 0usize;
4250    let mut index = start;
4251    let mut in_string = false;
4252    let mut escaped = false;
4253
4254    while let Some(byte) = bytes.get(index) {
4255        if in_string {
4256            if escaped {
4257                escaped = false;
4258            } else if *byte == b'\\' {
4259                escaped = true;
4260            } else if *byte == b'"' {
4261                in_string = false;
4262            }
4263        } else if *byte == b'"' {
4264            in_string = true;
4265        } else if *byte == open {
4266            depth += 1;
4267        } else if *byte == close {
4268            depth -= 1;
4269            if depth == 0 {
4270                return Ok(index + 1);
4271            }
4272        }
4273        index += 1;
4274    }
4275
4276    Err(import_error("unterminated JSON value"))
4277}
4278
4279fn parse_ndjson_transform(metadata: &Value) -> Result<([f64; 3], [f64; 3])> {
4280    let transform = metadata
4281        .get("transform")
4282        .and_then(Value::as_object)
4283        .ok_or_else(|| import_error("NDJSON metadata is missing transform"))?;
4284
4285    let scale = parse_vector3_f64(transform, "scale")?;
4286    let translate = parse_vector3_f64(transform, "translate")?;
4287    Ok((scale, translate))
4288}
4289
4290fn feature_cityobject_keys(feature: &Value, label: &str) -> Result<Vec<String>> {
4291    let cityobjects = feature
4292        .get("CityObjects")
4293        .ok_or_else(|| import_error(format!("{label} is missing CityObjects")))?
4294        .as_object()
4295        .ok_or_else(|| import_error(format!("{label} CityObjects must be an object")))?;
4296    if cityobjects.is_empty() {
4297        return Err(import_error(format!(
4298            "{label} CityObjects must contain at least one CityObject"
4299        )));
4300    }
4301    Ok(cityobjects.keys().cloned().collect())
4302}
4303
4304fn collect_feature_vertex_indices(feature: &Value, vertex_count: usize) -> Result<BTreeSet<usize>> {
4305    let mut indices = BTreeSet::new();
4306    let cityobjects = feature
4307        .get("CityObjects")
4308        .and_then(Value::as_object)
4309        .ok_or_else(|| import_error("feature package is missing CityObjects"))?;
4310
4311    for object in cityobjects.values() {
4312        collect_object_vertex_indices(object, &mut indices)?;
4313    }
4314
4315    if indices.is_empty() {
4316        indices.extend(0..vertex_count);
4317    }
4318
4319    Ok(indices)
4320}
4321
4322fn parse_vector3_f64(object: &Map<String, Value>, key: &str) -> Result<[f64; 3]> {
4323    let array = object
4324        .get(key)
4325        .and_then(Value::as_array)
4326        .ok_or_else(|| import_error(format!("transform is missing {key}")))?;
4327    if array.len() != 3 {
4328        return Err(import_error(format!(
4329            "transform {key} must contain three values"
4330        )));
4331    }
4332
4333    Ok([
4334        array[0]
4335            .as_f64()
4336            .ok_or_else(|| import_error(format!("transform {key}[0] must be numeric")))?,
4337        array[1]
4338            .as_f64()
4339            .ok_or_else(|| import_error(format!("transform {key}[1] must be numeric")))?,
4340        array[2]
4341            .as_f64()
4342            .ok_or_else(|| import_error(format!("transform {key}[2] must be numeric")))?,
4343    ])
4344}
4345
4346fn parse_ndjson_feature_bounds(
4347    feature: &Value,
4348    scale: [f64; 3],
4349    translate: [f64; 3],
4350) -> Result<(Vec<String>, FeatureBounds)> {
4351    let ids = feature_cityobject_keys(feature, "NDJSON feature")?;
4352    let vertices = feature
4353        .get("vertices")
4354        .ok_or_else(|| import_error("NDJSON feature is missing vertices"))?;
4355    let vertices: Vec<[i64; 3]> = parse_json_value(vertices.clone())?;
4356    let referenced_vertices = collect_feature_vertex_indices(feature, vertices.len())?;
4357    let bounds = feature_bounds_from_vertices(&vertices, &referenced_vertices, scale, translate)?;
4358    Ok((ids, bounds))
4359}
4360
4361#[allow(clippy::cast_precision_loss)]
4362fn feature_bounds_from_vertices(
4363    vertices: &[[i64; 3]],
4364    referenced_vertices: &BTreeSet<usize>,
4365    scale: [f64; 3],
4366    translate: [f64; 3],
4367) -> Result<FeatureBounds> {
4368    let mut min_x = f64::INFINITY;
4369    let mut max_x = f64::NEG_INFINITY;
4370    let mut min_y = f64::INFINITY;
4371    let mut max_y = f64::NEG_INFINITY;
4372    let mut min_z = f64::INFINITY;
4373    let mut max_z = f64::NEG_INFINITY;
4374
4375    for &index in referenced_vertices {
4376        let vertex = vertices.get(index).copied().ok_or_else(|| {
4377            import_error(format!(
4378                "vertex index {index} is outside the NDJSON feature vertex array"
4379            ))
4380        })?;
4381        let x = translate[0] + scale[0] * vertex[0] as f64;
4382        let y = translate[1] + scale[1] * vertex[1] as f64;
4383        let z = translate[2] + scale[2] * vertex[2] as f64;
4384        min_x = min_x.min(x);
4385        max_x = max_x.max(x);
4386        min_y = min_y.min(y);
4387        max_y = max_y.max(y);
4388        min_z = min_z.min(z);
4389        max_z = max_z.max(z);
4390    }
4391
4392    if !min_x.is_finite()
4393        || !min_y.is_finite()
4394        || !min_z.is_finite()
4395        || !max_x.is_finite()
4396        || !max_y.is_finite()
4397        || !max_z.is_finite()
4398    {
4399        return Err(import_error("NDJSON feature bbox could not be computed"));
4400    }
4401
4402    Ok(FeatureBounds {
4403        min_x,
4404        max_x,
4405        min_y,
4406        max_y,
4407        min_z,
4408        max_z,
4409    })
4410}
4411
4412fn line_spans(bytes: &[u8]) -> Vec<(u64, &[u8])> {
4413    let mut spans = Vec::new();
4414    let mut offset = 0u64;
4415
4416    for chunk in bytes.split_inclusive(|byte| *byte == b'\n') {
4417        spans.push((offset, trim_line_ending(chunk)));
4418        offset += u64::try_from(chunk.len()).expect("line chunk length fits in u64");
4419    }
4420
4421    if bytes.is_empty() {
4422        spans.clear();
4423    }
4424
4425    spans
4426}
4427
4428fn trim_line_ending(bytes: &[u8]) -> &[u8] {
4429    let mut end = bytes.len();
4430    while end > 0 && (bytes[end - 1] == b'\n' || bytes[end - 1] == b'\r') {
4431        end -= 1;
4432    }
4433    &bytes[..end]
4434}
4435
4436fn sqlite_result<T>(result: rusqlite::Result<T>) -> Result<T> {
4437    result.map_err(|value| Error::Import(value.to_string()))
4438}
4439
4440fn u64_to_i64(value: u64) -> rusqlite::Result<i64> {
4441    i64::try_from(value).map_err(|_| {
4442        rusqlite::Error::ToSqlConversionFailure(Box::new(import_error(format!(
4443            "value {value} does not fit in SQLite integer storage"
4444        ))))
4445    })
4446}
4447
4448fn i64_to_u64(value: i64) -> rusqlite::Result<u64> {
4449    u64::try_from(value).map_err(|_| {
4450        rusqlite::Error::ToSqlConversionFailure(Box::new(import_error(format!(
4451            "value {value} is not representable as u64"
4452        ))))
4453    })
4454}
4455
4456#[cfg(test)]
4457mod tests {
4458    use super::*;
4459    use std::time::{SystemTime, UNIX_EPOCH};
4460
4461    fn parent_child_lod_fixture() -> CityModel {
4462        cityjson_lib::json::from_feature_slice(
4463            br#"{
4464                "type":"CityJSONFeature",
4465                "id":"building",
4466                "CityObjects":{
4467                    "building":{
4468                        "type":"Building",
4469                        "children":["building-part"]
4470                    },
4471                    "building-part":{
4472                        "type":"BuildingPart",
4473                        "parents":["building"],
4474                        "geometry":[
4475                            {"type":"MultiSurface","lod":"1","boundaries":[[[0,1,2]]]},
4476                            {"type":"MultiSurface","lod":"2","boundaries":[[[0,2,3]]]}
4477                        ]
4478                    },
4479                    "road":{
4480                        "type":"Road",
4481                        "geometry":[{"type":"MultiSurface","lod":"1","boundaries":[[[4,5,6]]]}]
4482                    }
4483                },
4484                "vertices":[[0,0,0],[1,0,0],[1,1,0],[0,1,0],[10,0,0],[11,0,0],[10,1,0]]
4485            }"#,
4486        )
4487        .expect("parent-child fixture should parse")
4488    }
4489
4490    #[test]
4491    fn feature_filter_selecting_parent_type_retains_child_geometry() {
4492        let filter = FeatureFilter {
4493            cityobject_types: Some(BTreeSet::from(["Building".to_owned()])),
4494            default_lod: LodSelection::Highest,
4495            lods_by_type: BTreeMap::new(),
4496        };
4497
4498        let filtered = filter
4499            .apply(&parent_child_lod_fixture())
4500            .expect("filter should succeed");
4501
4502        assert_eq!(
4503            filtered.diagnostics.retained_types,
4504            BTreeSet::from(["Building".to_owned(), "BuildingPart".to_owned()])
4505        );
4506        assert_eq!(
4507            filtered.diagnostics.retained_lods.get("BuildingPart"),
4508            Some(&BTreeSet::from(["2".to_owned()]))
4509        );
4510        assert!(filtered.model.cityobjects().iter().any(|(_, cityobject)| {
4511            cityobject.id() == "building-part"
4512                && cityobject
4513                    .geometry()
4514                    .is_some_and(|geometries| !geometries.is_empty())
4515        }));
4516        assert!(
4517            !filtered
4518                .model
4519                .cityobjects()
4520                .iter()
4521                .any(|(_, cityobject)| cityobject.id() == "road")
4522        );
4523    }
4524
4525    #[test]
4526    fn feature_filter_summary_reports_missing_explicit_lod() {
4527        let filter = FeatureFilter {
4528            cityobject_types: None,
4529            default_lod: LodSelection::Highest,
4530            lods_by_type: BTreeMap::from([(
4531                "BuildingPart".to_owned(),
4532                LodSelection::Exact("3".to_owned()),
4533            )]),
4534        };
4535        let filtered = filter
4536            .apply(&parent_child_lod_fixture())
4537            .expect("filter should succeed");
4538        let mut summary = FeatureFilterSummary::default();
4539        summary.add(&filtered.diagnostics);
4540
4541        let failures = summary.requested_lod_failures(&filter);
4542
4543        assert_eq!(
4544            failures,
4545            vec![MissingLodSelection {
4546                cityobject_type: "BuildingPart".to_owned(),
4547                requested_lod: "3".to_owned(),
4548                available_lods: BTreeSet::from(["1".to_owned(), "2".to_owned()]),
4549            }]
4550        );
4551    }
4552
4553    #[test]
4554    fn cityjson_read_one_localizes_vertices_and_preserves_base_root_members() {
4555        let selected_id = "building-1";
4556        let selected_object = serde_json::json!({
4557            "type": "Building",
4558            "children": ["building-1-part"],
4559            "geometry": [{
4560                "type": "MultiSurface",
4561                "lod": "0",
4562                "boundaries": [[[2, 7, 5]]]
4563            }]
4564        });
4565        let other_object = serde_json::json!({
4566            "type": "Building",
4567            "geometry": [{
4568                "type": "MultiSurface",
4569                "lod": "0",
4570                "boundaries": [[[0, 1, 3]]]
4571            }]
4572        });
4573        let vertices = serde_json::json!([
4574            [100, 0, 0],
4575            [101, 0, 0],
4576            [0, 0, 0],
4577            [102, 0, 0],
4578            [103, 0, 0],
4579            [2, 0, 0],
4580            [104, 0, 0],
4581            [1, 0, 0]
4582        ]);
4583        let document = serde_json::json!({
4584            "type": "CityJSON",
4585            "version": "2.0",
4586            "transform": {
4587                "scale": [0.5, 0.5, 0.5],
4588                "translate": [10.0, 20.0, 30.0]
4589            },
4590            "metadata": {
4591                "title": "unit-test-fixture"
4592            },
4593            "CityObjects": {
4594                selected_id: selected_object.clone(),
4595                "other-object": other_object
4596            },
4597            "vertices": vertices.clone()
4598        });
4599        let document_bytes = serde_json::to_vec(&document).expect("fixture JSON");
4600        let base_document = cityjson_base_metadata(&document).expect("base CityJSON metadata");
4601        let base_document_bytes: Arc<[u8]> =
4602            Arc::from(serde_json::to_vec(&base_document).expect("base CityJSON metadata bytes"));
4603        let object_fragment = object_entry_fragment(selected_id, &selected_object);
4604        let vertices_fragment = serde_json::to_vec(&vertices).expect("vertices fragment");
4605        let loc = FeatureLocation {
4606            feature_id: selected_id.to_owned(),
4607            source_id: 0,
4608            source_path: write_temp_cityjson(&document_bytes),
4609            offset: find_subslice(&document_bytes, &object_fragment)
4610                .expect("selected object offset") as u64,
4611            length: object_fragment.len() as u64,
4612            vertices_offset: Some(
4613                find_subslice(&document_bytes, &vertices_fragment).expect("vertices offset") as u64,
4614            ),
4615            vertices_length: Some(vertices_fragment.len() as u64),
4616            member_ranges_json: None,
4617        };
4618
4619        let backend = CityJsonBackend::new(vec![loc.source_path.clone()]);
4620        let model = backend
4621            .read_one(&loc, base_document_bytes)
4622            .expect("CityJSON read should succeed");
4623        let output: Value =
4624            serde_json::from_str(&cityjson_lib::json::to_string(&model).expect("serialize result"))
4625                .expect("valid output JSON");
4626
4627        let cityobjects = output["CityObjects"]
4628            .as_object()
4629            .expect("result CityObjects must be an object");
4630        assert_eq!(cityobjects.len(), 1);
4631        assert!(cityobjects.contains_key(selected_id));
4632        assert_eq!(output["transform"], document["transform"]);
4633        assert_eq!(output["metadata"], document["metadata"]);
4634        assert!(cityobjects[selected_id].get("children").is_none());
4635        assert_eq!(
4636            output["vertices"],
4637            serde_json::json!([[0, 0, 0], [2, 0, 0], [1, 0, 0]])
4638        );
4639        assert_eq!(
4640            cityobjects[selected_id]["geometry"][0]["boundaries"],
4641            serde_json::json!([[[0, 2, 1]]])
4642        );
4643    }
4644
4645    #[test]
4646    fn cityjson_scan_and_read_one_group_root_objects_with_children() {
4647        let document = serde_json::json!({
4648            "type": "CityJSON",
4649            "version": "2.0",
4650            "transform": {
4651                "scale": [1.0, 1.0, 1.0],
4652                "translate": [0.0, 0.0, 0.0]
4653            },
4654            "CityObjects": {
4655                "building-1": {
4656                    "type": "Building",
4657                    "children": ["building-1-part"],
4658                    "geometry": [{
4659                        "type": "MultiSurface",
4660                        "lod": "1.0",
4661                        "boundaries": [[[0, 1, 2]]]
4662                    }]
4663                },
4664                "building-1-part": {
4665                    "type": "BuildingPart",
4666                    "parents": ["building-1"],
4667                    "geometry": [{
4668                        "type": "MultiSurface",
4669                        "lod": "1.0",
4670                        "boundaries": [[[3, 4, 5]]]
4671                    }]
4672                }
4673            },
4674            "vertices": [
4675                [0, 0, 0],
4676                [1, 0, 0],
4677                [0, 1, 0],
4678                [2, 0, 0],
4679                [3, 0, 0],
4680                [2, 1, 0]
4681            ]
4682        });
4683        let bytes = serde_json::to_vec(&document).expect("fixture JSON");
4684        let path = write_temp_cityjson(&bytes);
4685        let scan = scan_cityjson_source(&path).expect("scan should succeed");
4686
4687        assert_eq!(scan.features.len(), 1);
4688        assert_eq!(scan.features[0].id, "building-1");
4689        let member_ranges = scan.features[0]
4690            .member_ranges
4691            .as_ref()
4692            .expect("root feature should carry member ranges");
4693        assert_eq!(member_ranges.len(), 2);
4694        assert_eq!(member_ranges[0].id, "building-1");
4695        assert_eq!(member_ranges[1].id, "building-1-part");
4696
4697        let loc = FeatureLocation {
4698            feature_id: scan.features[0].id.clone(),
4699            source_id: 0,
4700            source_path: path,
4701            offset: scan.features[0].offset,
4702            length: scan.features[0].length,
4703            vertices_offset: scan.vertices_offset,
4704            vertices_length: scan.vertices_length,
4705            member_ranges_json: Some(
4706                serde_json::to_string(member_ranges).expect("member ranges JSON"),
4707            ),
4708        };
4709        let backend = CityJsonBackend::new(vec![loc.source_path.clone()]);
4710        let metadata_bytes: Arc<[u8]> =
4711            Arc::from(serde_json::to_vec(&scan.metadata).expect("metadata JSON"));
4712        let model = backend
4713            .read_one(&loc, metadata_bytes)
4714            .expect("CityJSON read should succeed");
4715        let output: Value =
4716            serde_json::from_str(&cityjson_lib::json::to_string(&model).expect("serialize result"))
4717                .expect("valid output JSON");
4718        let cityobjects = output["CityObjects"]
4719            .as_object()
4720            .expect("result CityObjects must be an object");
4721
4722        assert_eq!(cityobjects.len(), 2);
4723        assert!(cityobjects.contains_key("building-1"));
4724        assert!(cityobjects.contains_key("building-1-part"));
4725        assert_eq!(
4726            cityobjects["building-1"]["children"],
4727            serde_json::json!(["building-1-part"])
4728        );
4729        assert_eq!(
4730            cityobjects["building-1-part"]["parents"],
4731            serde_json::json!(["building-1"])
4732        );
4733    }
4734
4735    #[test]
4736    fn feature_parts_builder_drops_dangling_parent_links() {
4737        let parts = build_feature_parts(
4738            "building-1-part",
4739            vec![(
4740                "building-1-part".to_owned(),
4741                serde_json::json!({
4742                    "type": "BuildingPart",
4743                    "parents": ["building-1"],
4744                    "geometry": [{
4745                        "type": "MultiSurface",
4746                        "lod": "0",
4747                        "boundaries": [[[5, 9, 7]]]
4748                    }]
4749                }),
4750            )],
4751            &[
4752                [100, 0, 0],
4753                [101, 0, 0],
4754                [102, 0, 0],
4755                [103, 0, 0],
4756                [104, 0, 0],
4757                [0, 0, 0],
4758                [105, 0, 0],
4759                [2, 0, 0],
4760                [106, 0, 0],
4761                [1, 0, 0],
4762            ],
4763        )
4764        .expect("feature parts should build");
4765        let object: Value = serde_json::from_str(parts.cityobjects[0].object_json.get())
4766            .expect("valid object JSON");
4767
4768        assert_eq!(parts.feature_id, "building-1-part");
4769        assert!(object.get("parents").is_none());
4770        assert_eq!(parts.vertices, vec![[0, 0, 0], [2, 0, 0], [1, 0, 0]]);
4771        assert_eq!(
4772            object["geometry"][0]["boundaries"],
4773            serde_json::json!([[[0, 2, 1]]])
4774        );
4775    }
4776
4777    #[test]
4778    fn ndjson_backend_scan_and_index_lookup_roundtrip() {
4779        let metadata = serde_json::json!({
4780            "type": "CityJSON",
4781            "version": "2.0",
4782            "transform": {
4783                "scale": [1.0, 1.0, 1.0],
4784                "translate": [0.0, 0.0, 0.0]
4785            }
4786        });
4787        let feature = serde_json::json!({
4788            "type": "CityJSONFeature",
4789            "id": "ndjson-test-feature",
4790            "CityObjects": {
4791                "ndjson-test-feature": {
4792                    "type": "Building",
4793                    "geometry": [{
4794                        "type": "MultiSurface",
4795                        "lod": "1.0",
4796                        "boundaries": [[[0, 1, 2]]]
4797                    }]
4798                }
4799            },
4800            "vertices": [
4801                [0, 0, 0],
4802                [1, 0, 0],
4803                [0, 1, 0]
4804            ]
4805        });
4806        let ndjson_path = write_temp_ndjson(&metadata, &feature);
4807        let backend = NdjsonBackend {
4808            paths: vec![ndjson_path.clone()],
4809        };
4810        let scans = backend.scan(1).expect("NDJSON scan should succeed");
4811        assert_eq!(scans.len(), 1);
4812        assert_eq!(scans[0].features.len(), 1);
4813        assert_eq!(scans[0].features[0].id, "ndjson-test-feature");
4814
4815        let index_path = write_temp_index_path();
4816        let mut index = Index::open(&index_path).expect("SQLite index should open");
4817        index.rebuild(&scans).expect("NDJSON scan should index");
4818
4819        let by_id = index
4820            .lookup_id("ndjson-test-feature")
4821            .expect("id lookup should succeed");
4822        assert!(
4823            by_id.is_some(),
4824            "indexed feature should be addressable by id"
4825        );
4826
4827        let hits = index
4828            .lookup_bbox_iter(BBox {
4829                min_x: -1.0,
4830                max_x: 1.0,
4831                min_y: -1.0,
4832                max_y: 1.0,
4833            })
4834            .collect::<Result<Vec<_>>>()
4835            .expect("bbox lookup should collect");
4836        assert_eq!(hits.len(), 1);
4837        assert_eq!(hits[0].source_path, ndjson_path);
4838    }
4839
4840    #[test]
4841    fn opening_old_unique_schema_removes_feature_id_uniqueness() {
4842        let index_path = write_temp_index_path_with_prefix("old-unique-schema");
4843        {
4844            let conn = rusqlite::Connection::open(&index_path).expect("old index should open");
4845            conn.execute_batch(
4846                r"
4847                CREATE TABLE sources (
4848                    id INTEGER PRIMARY KEY AUTOINCREMENT,
4849                    path TEXT NOT NULL UNIQUE,
4850                    metadata TEXT NOT NULL,
4851                    vertices_offset INTEGER,
4852                    vertices_length INTEGER,
4853                    source_size INTEGER,
4854                    source_mtime_ns INTEGER
4855                );
4856                CREATE TABLE features (
4857                    id INTEGER PRIMARY KEY AUTOINCREMENT,
4858                    feature_id TEXT NOT NULL UNIQUE,
4859                    source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
4860                    path TEXT NOT NULL,
4861                    file_size INTEGER,
4862                    file_mtime_ns INTEGER,
4863                    offset INTEGER NOT NULL,
4864                    length INTEGER NOT NULL,
4865                    min_z REAL,
4866                    max_z REAL,
4867                    cityobject_count INTEGER,
4868                    member_ranges TEXT
4869                );
4870                CREATE VIRTUAL TABLE feature_bbox
4871                USING rtree(feature_rowid, min_x, max_x, min_y, max_y);
4872                CREATE TABLE bbox_map (
4873                    feature_rowid INTEGER PRIMARY KEY,
4874                    feature_id TEXT NOT NULL UNIQUE REFERENCES features(feature_id) ON DELETE CASCADE
4875                );
4876                INSERT INTO sources (path, metadata, source_size, source_mtime_ns)
4877                VALUES ('metadata.json', '{}', 0, 0);
4878                INSERT INTO features (
4879                    feature_id,
4880                    source_id,
4881                    path,
4882                    file_size,
4883                    file_mtime_ns,
4884                    offset,
4885                    length,
4886                    min_z,
4887                    max_z,
4888                    cityobject_count,
4889                    member_ranges
4890                )
4891                VALUES ('duplicate', 1, 'feature-a.city.jsonl', 0, 0, 0, 1, 0, 0, 1, NULL);
4892                INSERT INTO feature_bbox (feature_rowid, min_x, max_x, min_y, max_y)
4893                VALUES (1, 0, 1, 0, 1);
4894                INSERT INTO bbox_map (feature_rowid, feature_id) VALUES (1, 'duplicate');
4895                ",
4896            )
4897            .expect("old schema should initialize");
4898        }
4899
4900        let index = Index::open(&index_path).expect("index migration should succeed");
4901
4902        assert!(
4903            !table_sql_contains(&index.conn, "features", "feature_id TEXT NOT NULL UNIQUE",)
4904                .expect("features schema should load")
4905        );
4906        assert!(
4907            !table_sql_contains(&index.conn, "bbox_map", "feature_id TEXT NOT NULL UNIQUE",)
4908                .expect("bbox_map schema should load")
4909        );
4910
4911        index
4912            .conn
4913            .execute(
4914                r"
4915                INSERT INTO features (
4916                    feature_id,
4917                    source_id,
4918                    path,
4919                    file_size,
4920                    file_mtime_ns,
4921                    offset,
4922                    length,
4923                    min_z,
4924                    max_z,
4925                    cityobject_count,
4926                    member_ranges
4927                )
4928                VALUES ('duplicate', 1, 'feature-b.city.jsonl', 0, 0, 0, 1, 0, 0, 1, NULL)
4929                ",
4930                [],
4931            )
4932            .expect("duplicate feature_id should insert after migration");
4933        let row_id = index.conn.last_insert_rowid();
4934        index
4935            .conn
4936            .execute(
4937                "INSERT INTO bbox_map (feature_rowid, feature_id) VALUES (?1, 'duplicate')",
4938                params![row_id],
4939            )
4940            .expect("duplicate bbox_map feature_id should insert after migration");
4941    }
4942
4943    #[test]
4944    fn iter_all_scans_each_supported_layout_in_deterministic_order() {
4945        let expected_ids = vec!["alpha", "beta", "gamma"];
4946        let feature_files_root = write_temp_feature_files_root(&expected_ids);
4947        let feature_files_index_path = write_temp_index_path_with_prefix("feature-files");
4948        let mut feature_files_index = CityIndex::open(
4949            StorageLayout::FeatureFiles {
4950                root: feature_files_root,
4951                metadata_glob: "**/metadata.json".to_owned(),
4952                feature_glob: "**/*.city.jsonl".to_owned(),
4953            },
4954            &feature_files_index_path,
4955        )
4956        .expect("feature-files index should open");
4957        feature_files_index
4958            .reindex()
4959            .expect("feature-files dataset should index");
4960        assert_full_scan_order(&feature_files_index, &expected_ids);
4961
4962        let cityjson_root = write_temp_cityjson_root(&expected_ids);
4963        let cityjson_index_path = write_temp_index_path_with_prefix("cityjson");
4964        let mut cityjson_index = CityIndex::open(
4965            StorageLayout::CityJson {
4966                paths: vec![cityjson_root],
4967            },
4968            &cityjson_index_path,
4969        )
4970        .expect("cityjson index should open");
4971        cityjson_index
4972            .reindex()
4973            .expect("cityjson dataset should index");
4974        assert_full_scan_order(&cityjson_index, &expected_ids);
4975
4976        let ndjson_root = write_temp_ndjson_root(&expected_ids);
4977        let ndjson_index_path = write_temp_index_path_with_prefix("ndjson");
4978        let mut ndjson_index = CityIndex::open(
4979            StorageLayout::Ndjson {
4980                paths: vec![ndjson_root],
4981            },
4982            &ndjson_index_path,
4983        )
4984        .expect("ndjson index should open");
4985        ndjson_index.reindex().expect("ndjson dataset should index");
4986        assert_full_scan_order(&ndjson_index, &expected_ids);
4987        assert_full_scan_pages(&ndjson_index, &expected_ids);
4988    }
4989
4990    #[test]
4991    fn iter_all_paginates_across_multiple_pages() {
4992        let ids = (0..600)
4993            .map(|idx| format!("feature-{idx:03}"))
4994            .collect::<Vec<_>>();
4995        let id_refs = ids.iter().map(String::as_str).collect::<Vec<_>>();
4996        let root = write_temp_ndjson_root(&id_refs);
4997        let index_path = write_temp_index_path_with_prefix("iter-all-pages");
4998        let layout = StorageLayout::Ndjson {
4999            paths: vec![root.clone()],
5000        };
5001        let mut index = CityIndex::open(layout, &index_path).expect("index should open");
5002        index.reindex().expect("dataset should index");
5003
5004        let scanned_ids = index
5005            .iter_all_with_ids()
5006            .expect("iter_all_with_ids should build")
5007            .map(|result| result.map(|(id, _)| id))
5008            .collect::<Result<Vec<_>>>()
5009            .expect("iter_all_with_ids should collect");
5010
5011        assert_eq!(scanned_ids.len(), 600);
5012        assert_eq!(scanned_ids.first().expect("first id"), "feature-000");
5013        assert_eq!(scanned_ids.last().expect("last id"), "feature-599");
5014
5015        let ref_pages = index
5016            .iter_all_feature_ref_pages(128)
5017            .expect("iter_all_feature_ref_pages should build")
5018            .collect::<Result<Vec<_>>>()
5019            .expect("iter_all_feature_ref_pages should collect");
5020        assert_eq!(
5021            ref_pages.iter().map(Vec::len).collect::<Vec<_>>(),
5022            vec![128, 128, 128, 128, 88]
5023        );
5024        assert_eq!(
5025            ref_pages
5026                .iter()
5027                .flat_map(|page| page.iter().map(|feature| feature.feature_id.as_str()))
5028                .collect::<Vec<_>>(),
5029            ids.iter().map(String::as_str).collect::<Vec<_>>()
5030        );
5031        assert_eq!(
5032            ref_pages
5033                .iter()
5034                .flat_map(|page| page.iter().map(|feature| feature.row_id))
5035                .collect::<Vec<_>>(),
5036            (1..=600).collect::<Vec<_>>()
5037        );
5038
5039        let first_batch = index
5040            .read_features(ref_pages.first().expect("first page should exist"))
5041            .expect("feature batch should reconstruct");
5042        assert_eq!(first_batch.len(), 128);
5043
5044        assert_indexed_batch_preserves_order(&index, ref_pages.first().expect("first page"), &ids);
5045        assert_decoded_scan_pages(&index, &ids);
5046        assert_rowid_feature_reads(&index);
5047
5048        for page in &ref_pages {
5049            for feature in page {
5050                let model = index
5051                    .read_feature(feature)
5052                    .expect("feature should reconstruct");
5053                assert!(model_contains_id(&model, &feature.feature_id));
5054                assert_eq!(
5055                    feature_bounds_for_model(&model).expect("bounds should be computable"),
5056                    feature.bounds
5057                );
5058            }
5059        }
5060
5061        let bbox_pages = index
5062            .iter_all_bbox_pages(128)
5063            .expect("iter_all_bbox_pages should build")
5064            .collect::<Result<Vec<_>>>()
5065            .expect("iter_all_bbox_pages should collect");
5066        assert_eq!(
5067            bbox_pages
5068                .iter()
5069                .flat_map(|page| page.iter().map(|feature| feature.feature_id.as_str()))
5070                .collect::<Vec<_>>(),
5071            ids.iter().map(String::as_str).collect::<Vec<_>>()
5072        );
5073    }
5074
5075    #[test]
5076    fn iter_all_feature_ref_pages_handles_page_boundaries_without_gaps() {
5077        let ids = (0..256)
5078            .map(|idx| format!("boundary-{idx:03}"))
5079            .collect::<Vec<_>>();
5080        let id_refs = ids.iter().map(String::as_str).collect::<Vec<_>>();
5081        let root = write_temp_ndjson_root(&id_refs);
5082        let index_path = write_temp_index_path_with_prefix("iter-boundary-pages");
5083        let mut index = CityIndex::open(StorageLayout::Ndjson { paths: vec![root] }, &index_path)
5084            .expect("index should open");
5085        index.reindex().expect("dataset should index");
5086
5087        let pages = index
5088            .iter_all_feature_ref_pages(128)
5089            .expect("iter_all_feature_ref_pages should build")
5090            .collect::<Result<Vec<_>>>()
5091            .expect("iter_all_feature_ref_pages should collect");
5092
5093        assert_eq!(
5094            pages.iter().map(Vec::len).collect::<Vec<_>>(),
5095            vec![128, 128]
5096        );
5097        assert_eq!(
5098            pages
5099                .iter()
5100                .flat_map(|page| page.iter().map(|feature| feature.feature_id.as_str()))
5101                .collect::<Vec<_>>(),
5102            ids.iter().map(String::as_str).collect::<Vec<_>>()
5103        );
5104    }
5105
5106    #[test]
5107    fn feature_bounds_summary_matches_iterative_bounds() {
5108        let ids = ["alpha", "beta", "gamma"];
5109        let root = write_temp_ndjson_root(&ids);
5110        let index_path = write_temp_index_path_with_prefix("bounds-summary");
5111        let mut index = CityIndex::open(StorageLayout::Ndjson { paths: vec![root] }, &index_path)
5112            .expect("index should open");
5113        index.reindex().expect("dataset should index");
5114
5115        let summary = index
5116            .feature_bounds_summary()
5117            .expect("bounds summary should load")
5118            .expect("non-empty index should have a summary");
5119        let mut pages = index
5120            .iter_all_bbox_pages(2)
5121            .expect("bbox pages should build")
5122            .collect::<Result<Vec<_>>>()
5123            .expect("bbox pages should collect")
5124            .into_iter()
5125            .flatten();
5126        let first = pages.next().expect("first indexed feature");
5127        let mut expected = first.bounds;
5128        let mut count = 1usize;
5129        for feature in pages {
5130            expected.min_x = expected.min_x.min(feature.bounds.min_x);
5131            expected.max_x = expected.max_x.max(feature.bounds.max_x);
5132            expected.min_y = expected.min_y.min(feature.bounds.min_y);
5133            expected.max_y = expected.max_y.max(feature.bounds.max_y);
5134            expected.min_z = expected.min_z.min(feature.bounds.min_z);
5135            expected.max_z = expected.max_z.max(feature.bounds.max_z);
5136            count += 1;
5137        }
5138
5139        assert_eq!(summary.feature_count, count);
5140        assert_eq!(summary.bounds, expected);
5141    }
5142
5143    #[test]
5144    fn feature_bounds_summary_returns_none_for_empty_index() {
5145        let index_path = write_temp_index_path_with_prefix("empty-bounds-summary");
5146        let index = CityIndex::open(StorageLayout::Ndjson { paths: Vec::new() }, &index_path)
5147            .expect("index should open");
5148
5149        assert_eq!(
5150            index.feature_bounds_summary().expect("summary should load"),
5151            None
5152        );
5153    }
5154
5155    #[test]
5156    fn iter_all_feature_ref_pages_rejects_zero_page_size() {
5157        let root = write_temp_ndjson_root(&["alpha"]);
5158        let index_path = write_temp_index_path_with_prefix("page-size-zero");
5159        let mut index = CityIndex::open(StorageLayout::Ndjson { paths: vec![root] }, &index_path)
5160            .expect("index should open");
5161        index.reindex().expect("dataset should index");
5162
5163        match index.iter_all_feature_ref_pages(0) {
5164            Ok(_) => panic!("zero page size should be rejected"),
5165            Err(error) => assert!(error.to_string().contains("page_size")),
5166        }
5167    }
5168
5169    #[test]
5170    fn read_exact_range_reads_only_the_requested_span() {
5171        let path = write_temp_bytes(b"abcdefghij");
5172
5173        let bytes = read_exact_range(&path, 3, 4).expect("range read should succeed");
5174
5175        assert_eq!(bytes, b"defg");
5176    }
5177
5178    #[test]
5179    fn read_exact_range_rejects_short_reads() {
5180        let path = write_temp_bytes(b"abc");
5181
5182        let error = read_exact_range(&path, 2, 4).expect_err("range read should fail");
5183
5184        assert!(error.to_string().contains("short read"));
5185    }
5186
5187    #[test]
5188    fn read_exact_range_rejects_oversized_lengths() {
5189        let path = write_temp_bytes(b"abc");
5190
5191        let error = read_exact_range(&path, 0, u64::MAX).expect_err("range read should fail");
5192
5193        assert!(
5194            error
5195                .to_string()
5196                .contains("exceeds the supported buffer size")
5197        );
5198    }
5199
5200    #[test]
5201    fn feature_files_metadata_resolution_prefers_nearest_ancestor() {
5202        let root = PathBuf::from("/data/root");
5203        let mut metadata_by_dir = BTreeMap::new();
5204        metadata_by_dir.insert(root.clone(), root.join("metadata.json"));
5205        metadata_by_dir.insert(
5206            root.join("features/8"),
5207            root.join("features/8/metadata.json"),
5208        );
5209
5210        let feature_path = root.join("features/8/296/592/sample.city.jsonl");
5211        let resolved = resolve_feature_metadata_path(&root, &feature_path, &metadata_by_dir)
5212            .expect("metadata must resolve");
5213
5214        assert_eq!(resolved, root.join("features/8/metadata.json"));
5215    }
5216
5217    fn object_entry_fragment(object_id: &str, object: &Value) -> Vec<u8> {
5218        let mut map = Map::new();
5219        map.insert(object_id.to_owned(), object.clone());
5220        let serialized = serde_json::to_vec(&Value::Object(map)).expect("object entry");
5221        serialized[1..serialized.len() - 1].to_vec()
5222    }
5223
5224    fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
5225        haystack
5226            .windows(needle.len())
5227            .position(|window| window == needle)
5228    }
5229
5230    fn write_temp_cityjson(bytes: &[u8]) -> PathBuf {
5231        let unique = SystemTime::now()
5232            .duration_since(UNIX_EPOCH)
5233            .expect("system time")
5234            .as_nanos();
5235        let path =
5236            std::env::temp_dir().join(format!("cityjson-index-cityjson-read-one-{unique}.json"));
5237        fs::write(&path, bytes).expect("write temp cityjson");
5238        path
5239    }
5240
5241    fn write_temp_ndjson(metadata: &Value, feature: &Value) -> PathBuf {
5242        let unique = SystemTime::now()
5243            .duration_since(UNIX_EPOCH)
5244            .expect("system time")
5245            .as_nanos();
5246        let path = std::env::temp_dir().join(format!("cityjson-index-ndjson-{unique}.jsonl"));
5247        let contents = format!(
5248            "{}\n{}\n",
5249            serde_json::to_string(metadata).expect("metadata JSON"),
5250            serde_json::to_string(feature).expect("feature JSON")
5251        );
5252        fs::write(&path, contents).expect("write temp ndjson");
5253        path
5254    }
5255
5256    fn write_temp_index_path() -> PathBuf {
5257        let unique = SystemTime::now()
5258            .duration_since(UNIX_EPOCH)
5259            .expect("system time")
5260            .as_nanos();
5261        let path = std::env::temp_dir().join(format!("cityjson-index-ndjson-{unique}.sqlite"));
5262        if path.exists() {
5263            fs::remove_file(&path).expect("remove temp sqlite");
5264        }
5265        path
5266    }
5267
5268    fn write_temp_index_path_with_prefix(prefix: &str) -> PathBuf {
5269        let unique = SystemTime::now()
5270            .duration_since(UNIX_EPOCH)
5271            .expect("system time")
5272            .as_nanos();
5273        let path = std::env::temp_dir().join(format!("cityjson-index-{prefix}-{unique}.sqlite"));
5274        if path.exists() {
5275            fs::remove_file(&path).expect("remove temp sqlite");
5276        }
5277        path
5278    }
5279
5280    fn write_temp_feature_files_root(ids: &[&str]) -> PathBuf {
5281        let root = write_temp_dir("cityjson-index-feature-files");
5282        fs::write(
5283            root.join("metadata.json"),
5284            serde_json::to_vec(&base_document()).expect("metadata JSON"),
5285        )
5286        .expect("write metadata");
5287        for (idx, id) in ids.iter().enumerate() {
5288            let feature_path = root.join(format!("features/{idx:03}.city.jsonl"));
5289            let idx = i64::try_from(idx).expect("test index fits in i64");
5290            if let Some(parent) = feature_path.parent() {
5291                fs::create_dir_all(parent).expect("create feature directory");
5292            }
5293            fs::write(
5294                &feature_path,
5295                serde_json::to_vec(&feature_feature_document(id, idx)).expect("feature JSON"),
5296            )
5297            .expect("write feature file");
5298        }
5299        root
5300    }
5301
5302    fn write_temp_cityjson_root(ids: &[&str]) -> PathBuf {
5303        let root = write_temp_dir("cityjson-index-cityjson");
5304        let mut cityobjects = Map::new();
5305        for id in ids {
5306            cityobjects.insert((*id).to_owned(), feature_object(0));
5307        }
5308        let document = serde_json::json!({
5309            "type": "CityJSON",
5310            "version": "2.0",
5311            "transform": {
5312                "scale": [1.0, 1.0, 1.0],
5313                "translate": [0.0, 0.0, 0.0]
5314            },
5315            "metadata": {
5316                "referenceSystem": "https://www.opengis.net/def/crs/EPSG/0/7415"
5317            },
5318            "CityObjects": cityobjects,
5319            "vertices": [
5320                [0, 0, 0],
5321                [1, 0, 0],
5322                [0, 1, 0]
5323            ]
5324        });
5325        fs::write(
5326            root.join("dataset.city.json"),
5327            serde_json::to_vec(&document).expect("cityjson JSON"),
5328        )
5329        .expect("write cityjson");
5330        root
5331    }
5332
5333    fn write_temp_ndjson_root(ids: &[&str]) -> PathBuf {
5334        let root = write_temp_dir("cityjson-index-ndjson-root");
5335        let mut contents = serde_json::to_string(&base_document()).expect("metadata JSON");
5336        contents.push('\n');
5337        for (idx, id) in ids.iter().enumerate() {
5338            let idx = i64::try_from(idx).expect("test index fits in i64");
5339            contents.push_str(
5340                &serde_json::to_string(&feature_feature_document(id, idx)).expect("feature JSON"),
5341            );
5342            contents.push('\n');
5343        }
5344        fs::write(root.join("dataset.city.jsonl"), contents).expect("write ndjson");
5345        root
5346    }
5347
5348    fn write_temp_dir(prefix: &str) -> PathBuf {
5349        let unique = SystemTime::now()
5350            .duration_since(UNIX_EPOCH)
5351            .expect("system time")
5352            .as_nanos();
5353        let path = std::env::temp_dir().join(format!("{prefix}-{unique}"));
5354        fs::create_dir_all(&path).expect("create temp dir");
5355        path
5356    }
5357
5358    fn base_document() -> Value {
5359        serde_json::json!({
5360            "type": "CityJSON",
5361            "version": "2.0",
5362            "transform": {
5363                "scale": [1.0, 1.0, 1.0],
5364                "translate": [0.0, 0.0, 0.0]
5365            },
5366            "metadata": {
5367                "referenceSystem": "https://www.opengis.net/def/crs/EPSG/0/7415"
5368            },
5369            "CityObjects": {},
5370            "vertices": []
5371        })
5372    }
5373
5374    fn feature_feature_document(id: &str, offset: i64) -> Value {
5375        let object = feature_object(offset);
5376        serde_json::json!({
5377            "type": "CityJSONFeature",
5378            "id": id,
5379            "CityObjects": {
5380                id: object
5381            },
5382            "vertices": [
5383                [offset, 0, 0],
5384                [offset + 1, 0, 0],
5385                [offset, 1, 0]
5386            ]
5387        })
5388    }
5389
5390    fn feature_object(_offset: i64) -> Value {
5391        serde_json::json!({
5392            "type": "Building",
5393            "geometry": [{
5394                "type": "MultiSurface",
5395                "lod": "1.0",
5396                "boundaries": [[[0, 1, 2]]]
5397            }]
5398        })
5399    }
5400
5401    fn assert_full_scan_order(index: &CityIndex, expected_ids: &[&str]) {
5402        let ids = index
5403            .iter_all_with_ids()
5404            .expect("iter_all_with_ids should build")
5405            .collect::<Result<Vec<_>>>()
5406            .expect("iter_all_with_ids should collect");
5407        assert_eq!(
5408            ids.iter().map(|(id, _)| id.as_str()).collect::<Vec<_>>(),
5409            expected_ids
5410        );
5411
5412        let models = index
5413            .iter_all()
5414            .expect("iter_all should build")
5415            .collect::<Result<Vec<_>>>()
5416            .expect("iter_all should collect");
5417        assert_eq!(models.len(), expected_ids.len());
5418
5419        let models_with_metadata = index
5420            .iter_all_with_metadata()
5421            .expect("iter_all_with_metadata should build")
5422            .collect::<Result<Vec<_>>>()
5423            .expect("iter_all_with_metadata should collect");
5424        assert_eq!(models_with_metadata.len(), expected_ids.len());
5425    }
5426
5427    fn assert_full_scan_pages(index: &CityIndex, expected_ids: &[&str]) {
5428        let pages = index
5429            .iter_all_feature_ref_pages(2)
5430            .expect("iter_all_feature_ref_pages should build")
5431            .collect::<Result<Vec<_>>>()
5432            .expect("iter_all_feature_ref_pages should collect");
5433        assert_eq!(
5434            pages
5435                .iter()
5436                .flat_map(|page| page.iter().map(|feature| feature.feature_id.as_str()))
5437                .collect::<Vec<_>>(),
5438            expected_ids
5439        );
5440
5441        let bbox_pages = index
5442            .iter_all_bbox_pages(2)
5443            .expect("iter_all_bbox_pages should build")
5444            .collect::<Result<Vec<_>>>()
5445            .expect("iter_all_bbox_pages should collect");
5446        assert_eq!(
5447            bbox_pages
5448                .iter()
5449                .flat_map(|page| page.iter().map(|feature| feature.feature_id.as_str()))
5450                .collect::<Vec<_>>(),
5451            expected_ids
5452        );
5453
5454        for page in pages {
5455            for feature in page {
5456                let model = index
5457                    .read_feature(&feature)
5458                    .expect("feature should reconstruct");
5459                assert!(model_contains_id(&model, &feature.feature_id));
5460                assert_eq!(
5461                    feature_bounds_for_model(&model).expect("bounds should be computable"),
5462                    feature.bounds
5463                );
5464            }
5465        }
5466    }
5467
5468    fn assert_indexed_batch_preserves_order(
5469        index: &CityIndex,
5470        features: &[IndexedFeatureRef],
5471        ids: &[String],
5472    ) {
5473        let indexed_features = index
5474            .read_indexed_features(features)
5475            .expect("indexed feature batch should reconstruct");
5476        assert_eq!(
5477            indexed_features
5478                .iter()
5479                .map(|feature| feature.reference.feature_id.as_str())
5480                .collect::<Vec<_>>(),
5481            ids.iter()
5482                .take(features.len())
5483                .map(String::as_str)
5484                .collect::<Vec<_>>()
5485        );
5486    }
5487
5488    fn assert_decoded_scan_pages(index: &CityIndex, ids: &[String]) {
5489        let scan_pages = index
5490            .scan_feature_pages(128)
5491            .expect("scan_feature_pages should build")
5492            .collect::<Result<Vec<_>>>()
5493            .expect("scan_feature_pages should collect");
5494        assert_eq!(
5495            scan_pages.iter().map(Vec::len).collect::<Vec<_>>(),
5496            vec![128, 128, 128, 128, 88]
5497        );
5498        assert_eq!(
5499            scan_pages
5500                .iter()
5501                .flat_map(|page| {
5502                    page.iter()
5503                        .map(|feature| feature.reference.feature_id.as_str())
5504                })
5505                .collect::<Vec<_>>(),
5506            ids.iter().map(String::as_str).collect::<Vec<_>>()
5507        );
5508
5509        let scanned_ids = index
5510            .scan_features()
5511            .expect("scan_features should build")
5512            .map(|result| result.map(|feature| feature.reference.feature_id))
5513            .collect::<Result<Vec<_>>>()
5514            .expect("scan_features should collect");
5515        assert_eq!(scanned_ids, ids);
5516    }
5517
5518    fn assert_rowid_feature_reads(index: &CityIndex) {
5519        let first_ref = index
5520            .lookup_feature_ref_by_rowid(1)
5521            .expect("rowid lookup should load")
5522            .expect("first rowid should exist");
5523        assert_eq!(first_ref.feature_id, "feature-000");
5524        assert_eq!(
5525            index
5526                .lookup_feature_ref_by_rowid(9999)
5527                .expect("missing rowid lookup should load"),
5528            None
5529        );
5530
5531        let rowid_features = index
5532            .read_features_by_rowids(&[2, 9999, 1, 2])
5533            .expect("rowid batch should reconstruct");
5534        assert_eq!(
5535            rowid_features
5536                .iter()
5537                .map(|feature| feature
5538                    .as_ref()
5539                    .map(|feature| feature.reference.feature_id.as_str()))
5540                .collect::<Vec<_>>(),
5541            vec![
5542                Some("feature-001"),
5543                None,
5544                Some("feature-000"),
5545                Some("feature-001")
5546            ]
5547        );
5548
5549        let range_features = index
5550            .read_feature_range_after_rowid(Some(127), 3)
5551            .expect("rowid range should reconstruct");
5552        assert_eq!(
5553            range_features
5554                .iter()
5555                .map(|feature| feature.reference.feature_id.as_str())
5556                .collect::<Vec<_>>(),
5557            vec!["feature-127", "feature-128", "feature-129"]
5558        );
5559    }
5560
5561    fn model_contains_id(model: &CityModel, id: &str) -> bool {
5562        let value: Value =
5563            serde_json::from_str(&cityjson_lib::json::to_string(model).expect("serialize model"))
5564                .expect("model JSON");
5565        value["CityObjects"]
5566            .as_object()
5567            .is_some_and(|cityobjects| cityobjects.contains_key(id))
5568    }
5569
5570    fn feature_bounds_for_model(model: &CityModel) -> Result<FeatureBounds> {
5571        let value: Value =
5572            serde_json::from_str(&cityjson_lib::json::to_string(model).expect("serialize model"))
5573                .expect("model JSON");
5574        let vertices = value
5575            .get("vertices")
5576            .and_then(Value::as_array)
5577            .ok_or_else(|| import_error("model JSON is missing vertices"))?;
5578        let transform = value
5579            .get("transform")
5580            .and_then(Value::as_object)
5581            .ok_or_else(|| import_error("model JSON is missing transform"))?;
5582        let scale = parse_transform_component(transform, "scale")?;
5583        let translate = parse_transform_component(transform, "translate")?;
5584
5585        let mut min_x = f64::INFINITY;
5586        let mut max_x = f64::NEG_INFINITY;
5587        let mut min_y = f64::INFINITY;
5588        let mut max_y = f64::NEG_INFINITY;
5589        let mut min_z = f64::INFINITY;
5590        let mut max_z = f64::NEG_INFINITY;
5591
5592        for vertex in vertices {
5593            let coords = vertex
5594                .as_array()
5595                .ok_or_else(|| import_error("vertex must be an array"))?;
5596            if coords.len() != 3 {
5597                return Err(import_error("vertex must have three coordinates"));
5598            }
5599            let x = translate[0] + scale[0] * value_as_f64(&coords[0])?;
5600            let y = translate[1] + scale[1] * value_as_f64(&coords[1])?;
5601            let z = translate[2] + scale[2] * value_as_f64(&coords[2])?;
5602            min_x = min_x.min(x);
5603            max_x = max_x.max(x);
5604            min_y = min_y.min(y);
5605            max_y = max_y.max(y);
5606            min_z = min_z.min(z);
5607            max_z = max_z.max(z);
5608        }
5609
5610        if !min_x.is_finite()
5611            || !max_x.is_finite()
5612            || !min_y.is_finite()
5613            || !max_y.is_finite()
5614            || !min_z.is_finite()
5615            || !max_z.is_finite()
5616        {
5617            return Err(import_error(
5618                "could not compute a finite bbox from the model",
5619            ));
5620        }
5621
5622        Ok(FeatureBounds {
5623            min_x,
5624            max_x,
5625            min_y,
5626            max_y,
5627            min_z,
5628            max_z,
5629        })
5630    }
5631
5632    fn parse_transform_component(
5633        transform: &serde_json::Map<String, Value>,
5634        key: &str,
5635    ) -> Result<[f64; 3]> {
5636        let values = transform
5637            .get(key)
5638            .and_then(Value::as_array)
5639            .ok_or_else(|| import_error(format!("transform is missing {key}")))?;
5640        if values.len() != 3 {
5641            return Err(import_error(format!(
5642                "transform {key} must contain three values"
5643            )));
5644        }
5645        Ok([
5646            value_as_f64(&values[0])?,
5647            value_as_f64(&values[1])?,
5648            value_as_f64(&values[2])?,
5649        ])
5650    }
5651
5652    fn value_as_f64(value: &Value) -> Result<f64> {
5653        value
5654            .as_f64()
5655            .ok_or_else(|| import_error("expected a numeric value"))
5656    }
5657
5658    fn write_temp_bytes(bytes: &[u8]) -> PathBuf {
5659        let unique = SystemTime::now()
5660            .duration_since(UNIX_EPOCH)
5661            .expect("system time")
5662            .as_nanos();
5663        let path = std::env::temp_dir().join(format!("cityjson-index-range-read-{unique}.bin"));
5664        fs::write(&path, bytes).expect("write temp bytes");
5665        path
5666    }
5667}