use crate::error::{CityJsonStacError, Result};
use crate::memory::{log_memory, memory_log_interval, memory_logging_enabled};
use crate::metadata::AttributeDefinition;
use crate::metadata::BBox3D;
use crate::metadata::CRS;
use crate::reader::{get_reader, CityModelMetadataReader};
use std::collections::{BTreeSet, HashMap};
use std::path::{Path, PathBuf};
use std::sync::RwLock;
use tempfile::{TempDir, TempPath};
pub struct ZipReader {
file_path: PathBuf,
temp_dir: TempDir,
_temp_file: Option<TempPath>,
inner_paths: Vec<PathBuf>,
metadata: RwLock<Option<ZipMetadata>>,
}
#[derive(Debug)]
struct ZipMetadata {
bbox: Option<BBox3D>,
city_object_count: usize,
city_object_types: BTreeSet<String>,
lods: BTreeSet<String>,
attributes: Vec<AttributeDefinition>,
primary_encoding: &'static str,
version: String,
crs: Option<CRS>,
has_textures: bool,
has_materials: bool,
has_semantic_surfaces: bool,
}
impl ZipReader {
pub fn new(file_path: &Path) -> Result<Self> {
if !file_path.exists() {
return Err(CityJsonStacError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File not found: {}", file_path.display()),
)));
}
let temp_dir = TempDir::new()?;
Self::extract_zip(file_path, temp_dir.path())?;
log_memory(format!("zip-extracted path={}", file_path.display()));
let mut reader = Self {
file_path: file_path.to_path_buf(),
temp_dir,
_temp_file: None,
inner_paths: Vec::new(),
metadata: RwLock::new(None),
};
reader.inner_paths = reader.discover_inner_paths()?;
log_memory(format!(
"zip-discovered path={} inner_files={}",
reader.file_path.display(),
reader.inner_paths.len()
));
if reader.inner_paths.is_empty() {
return Err(CityJsonStacError::InvalidCityJson(
"No CityJSON/CityGML files found in ZIP".to_string(),
));
}
Ok(reader)
}
pub fn from_temp_file(
virtual_path: &Path,
real_path: &Path,
temp_path: TempPath,
) -> Result<Self> {
let temp_dir = TempDir::new()?;
Self::extract_zip(real_path, temp_dir.path())?;
log_memory(format!("zip-extracted path={}", virtual_path.display()));
let mut reader = Self {
file_path: virtual_path.to_path_buf(),
temp_dir,
_temp_file: Some(temp_path),
inner_paths: Vec::new(),
metadata: RwLock::new(None),
};
reader.inner_paths = reader.discover_inner_paths()?;
log_memory(format!(
"zip-discovered path={} inner_files={}",
reader.file_path.display(),
reader.inner_paths.len()
));
if reader.inner_paths.is_empty() {
return Err(CityJsonStacError::InvalidCityJson(
"No CityJSON/CityGML files found in ZIP".to_string(),
));
}
Ok(reader)
}
fn extract_zip(zip_path: &Path, dest_dir: &Path) -> Result<()> {
let file = std::fs::File::open(zip_path)?;
let mut archive = zip::ZipArchive::new(file)?;
for i in 0..archive.len() {
let mut file = archive.by_index(i)?;
let outpath = dest_dir.join(file.name());
if !outpath.starts_with(dest_dir) {
return Err(CityJsonStacError::InvalidCityJson(format!(
"ZIP file contains unsafe path: {}",
file.name()
)));
}
if file.name().ends_with('/') {
std::fs::create_dir_all(&outpath)?;
} else {
if let Some(parent) = outpath.parent() {
std::fs::create_dir_all(parent)?;
}
let mut outfile = std::fs::File::create(&outpath)?;
std::io::copy(&mut file, &mut outfile)?;
}
}
Ok(())
}
fn discover_inner_paths(&self) -> Result<Vec<PathBuf>> {
let mut paths = Vec::new();
fn walk_dir(dir: &Path, paths: &mut Vec<PathBuf>) -> Result<()> {
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
walk_dir(&path, paths)?;
} else {
if get_reader(&path).is_ok() {
log::debug!("Found supported file in ZIP: {:?}", path);
paths.push(path);
}
}
}
Ok(())
}
walk_dir(self.temp_dir.path(), &mut paths)?;
Ok(paths)
}
fn aggregate_metadata(&self) -> Result<ZipMetadata> {
let mut city_object_count = 0;
let mut city_object_types = BTreeSet::new();
let mut lods = BTreeSet::new();
let mut attributes_map: HashMap<String, AttributeDefinition> = HashMap::new();
let mut has_textures = false;
let mut has_materials = false;
let mut has_semantic_surfaces = false;
let mut min_x = f64::MAX;
let mut min_y = f64::MAX;
let mut min_z = f64::MAX;
let mut max_x = f64::MIN;
let mut max_y = f64::MIN;
let mut max_z = f64::MIN;
let mut has_bbox = false;
let mut primary_encoding = "CityJSON";
let mut version = String::new();
let mut crs = None;
let memory_log_every = memory_log_interval(100);
for (idx, path) in self.inner_paths.iter().enumerate() {
let reader = match get_reader(path) {
Ok(reader) => reader,
Err(_) => continue,
};
if idx == 0 {
primary_encoding = reader.encoding();
}
if let Ok(count) = reader.city_object_count() {
city_object_count += count;
}
if let Ok(types) = reader.city_object_types() {
city_object_types.extend(types);
}
if let Ok(reader_lods) = reader.lods() {
lods.extend(reader_lods);
}
if let Ok(reader_attrs) = reader.attributes() {
for attr in reader_attrs {
attributes_map.insert(attr.name.clone(), attr);
}
}
if let Ok(true) = reader.textures() {
has_textures = true;
}
if let Ok(true) = reader.materials() {
has_materials = true;
}
if let Ok(true) = reader.semantic_surfaces() {
has_semantic_surfaces = true;
}
if let Ok(bbox) = reader.bbox() {
has_bbox = true;
min_x = min_x.min(bbox.xmin);
min_y = min_y.min(bbox.ymin);
min_z = min_z.min(bbox.zmin);
max_x = max_x.max(bbox.xmax);
max_y = max_y.max(bbox.ymax);
max_z = max_z.max(bbox.zmax);
}
if version.is_empty() {
if let Ok(v) = reader.version() {
version = v;
}
}
if crs.is_none() {
if let Ok(c) = reader.crs() {
crs = Some(c);
}
}
if memory_logging_enabled() && (idx + 1) % memory_log_every == 0 {
log_memory(format!(
"zip-aggregate path={} processed_inner={}/{}",
self.file_path.display(),
idx + 1,
self.inner_paths.len()
));
}
}
log_memory(format!(
"zip-aggregate-finished path={} inner_files={}",
self.file_path.display(),
self.inner_paths.len()
));
let bbox = if has_bbox {
Some(BBox3D::new(min_x, min_y, min_z, max_x, max_y, max_z))
} else {
None
};
let attributes: Vec<_> = attributes_map.into_values().collect();
Ok(ZipMetadata {
bbox,
city_object_count,
city_object_types,
lods,
attributes,
primary_encoding,
version,
crs,
has_textures,
has_materials,
has_semantic_surfaces,
})
}
fn ensure_loaded(&self) -> Result<()> {
{
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
if metadata.is_some() {
return Ok(());
}
}
let mut metadata = self
.metadata
.write()
.map_err(|_| CityJsonStacError::Other("Failed to acquire write lock".to_string()))?;
if metadata.is_none() {
*metadata = Some(self.aggregate_metadata()?);
}
Ok(())
}
}
impl CityModelMetadataReader for ZipReader {
fn bbox(&self) -> Result<BBox3D> {
self.ensure_loaded()?;
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
metadata
.as_ref()
.and_then(|m| m.bbox.clone())
.ok_or_else(|| CityJsonStacError::MetadataError("BBox not found".to_string()))
}
fn crs(&self) -> Result<CRS> {
self.ensure_loaded()?;
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
Ok(metadata.as_ref().unwrap().crs.clone().unwrap_or_default())
}
fn lods(&self) -> Result<Vec<String>> {
self.ensure_loaded()?;
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
Ok(metadata.as_ref().unwrap().lods.iter().cloned().collect())
}
fn city_object_types(&self) -> Result<Vec<String>> {
self.ensure_loaded()?;
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
Ok(metadata
.as_ref()
.unwrap()
.city_object_types
.iter()
.cloned()
.collect())
}
fn city_object_count(&self) -> Result<usize> {
self.ensure_loaded()?;
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
Ok(metadata.as_ref().unwrap().city_object_count)
}
fn attributes(&self) -> Result<Vec<AttributeDefinition>> {
self.ensure_loaded()?;
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
Ok(metadata.as_ref().unwrap().attributes.clone())
}
fn encoding(&self) -> &'static str {
if let Ok(metadata) = self.metadata.read() {
if let Some(ref m) = *metadata {
return m.primary_encoding;
}
}
"CityJSON" }
fn version(&self) -> Result<String> {
self.ensure_loaded()?;
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
Ok(metadata.as_ref().unwrap().version.clone())
}
fn file_path(&self) -> &Path {
&self.file_path
}
fn transform(&self) -> Result<Option<crate::metadata::Transform>> {
Ok(None) }
fn metadata(&self) -> Result<Option<serde_json::Value>> {
Ok(None)
}
fn extensions(&self) -> Result<Vec<String>> {
Ok(Vec::new())
}
fn semantic_surfaces(&self) -> Result<bool> {
self.ensure_loaded()?;
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
Ok(metadata.as_ref().unwrap().has_semantic_surfaces)
}
fn textures(&self) -> Result<bool> {
self.ensure_loaded()?;
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
Ok(metadata.as_ref().unwrap().has_textures)
}
fn materials(&self) -> Result<bool> {
self.ensure_loaded()?;
let metadata = self
.metadata
.read()
.map_err(|_| CityJsonStacError::Other("Failed to acquire read lock".to_string()))?;
Ok(metadata.as_ref().unwrap().has_materials)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_test_zip_with_cityjson() -> NamedTempFile {
let temp_zip = NamedTempFile::new().unwrap();
let mut zip = zip::ZipWriter::new(temp_zip.as_file());
let cityjson = r#"{
"type": "CityJSON",
"version": "1.1",
"transform": {
"scale": [0.01, 0.01, 0.01],
"translate": [100000, 200000, 0]
},
"metadata": {
"geographicalExtent": [1.0, 2.0, 0.0, 10.0, 20.0, 30.0],
"referenceSystem": "https://www.opengis.net/def/crs/EPSG/0/7415"
},
"CityObjects": {
"building1": {
"type": "Building",
"geometry": [{
"type": "Solid",
"lod": "2",
"boundaries": [[[[0,0,0]]]]
}]
}
},
"vertices": [[0,0,0]]
}"#;
let options = zip::write::SimpleFileOptions::default()
.compression_method(zip::CompressionMethod::Stored);
zip.start_file("data.city.json", options).unwrap();
zip.write_all(cityjson.as_bytes()).unwrap();
zip.finish().unwrap();
temp_zip
}
#[test]
fn test_zip_reader_aggregates_metadata() {
let temp_zip = create_test_zip_with_cityjson();
let reader = ZipReader::new(temp_zip.path()).unwrap();
let bbox = reader.bbox().unwrap();
assert_eq!(bbox.xmin, 1.0);
assert_eq!(bbox.xmax, 10.0);
let count = reader.city_object_count().unwrap();
assert_eq!(count, 1);
let types = reader.city_object_types().unwrap();
assert!(types.contains(&"Building".to_string()));
let lods = reader.lods().unwrap();
assert!(lods.contains(&"2".to_string()));
let version = reader.version().unwrap();
assert_eq!(version, "1.1");
let crs = reader.crs().unwrap();
assert_eq!(crs.to_stac_epsg(), Some(7415));
assert_eq!(reader.encoding(), "CityJSON");
}
#[test]
fn test_zip_reader_empty_zip() {
let temp_zip = NamedTempFile::new().unwrap();
let zip = zip::ZipWriter::new(temp_zip.as_file());
zip.finish().unwrap();
let result = ZipReader::new(temp_zip.path());
assert!(result.is_err());
match result {
Err(CityJsonStacError::InvalidCityJson(msg)) => {
assert!(msg.contains("No CityJSON/CityGML files found"));
}
_ => panic!("Expected InvalidCityJson error"),
}
}
#[test]
fn test_zip_reader_not_streamable() {
let mut temp_zip = NamedTempFile::new().unwrap();
let zip = zip::ZipWriter::new(temp_zip.as_file_mut());
zip.finish().unwrap();
let result = ZipReader::new(temp_zip.path());
assert!(result.is_err());
match result {
Err(CityJsonStacError::InvalidCityJson(msg)) => {
assert!(msg.contains("No CityJSON/CityGML files found"));
}
_ => panic!("Expected InvalidCityJson error"),
}
}
}