use std::path::{Path, PathBuf};
use oxigdal_core::error::{IoError, OxiGdalError};
use crate::{DatasetFormat, DatasetInfo, Result};
const TIFF_LE_MAGIC: [u8; 2] = [0x49, 0x49];
const TIFF_BE_MAGIC: [u8; 2] = [0x4D, 0x4D];
const JP2_MAGIC: [u8; 12] = [
0x00, 0x00, 0x00, 0x0C, 0x6A, 0x50, 0x20, 0x20, 0x0D, 0x0A, 0x87, 0x0A,
];
const HDF5_MAGIC: [u8; 8] = [0x89, 0x48, 0x44, 0x46, 0x0D, 0x0A, 0x1A, 0x0A];
const NETCDF_MAGIC: [u8; 3] = [0x43, 0x44, 0x46];
const ZIP_MAGIC: [u8; 4] = [0x50, 0x4B, 0x03, 0x04];
const SQLITE_MAGIC: [u8; 6] = [0x53, 0x51, 0x4C, 0x69, 0x74, 0x65];
const BIGTIFF_VERSION: u16 = 43;
const TIFF_VERSION: u16 = 42;
fn detect_cloud_scheme(path_str: &str) -> Option<CloudScheme> {
if path_str.starts_with("s3://") {
Some(CloudScheme::S3)
} else if path_str.starts_with("gs://") {
Some(CloudScheme::Gcs)
} else if path_str.starts_with("az://") || path_str.starts_with("abfs://") {
Some(CloudScheme::Azure)
} else if path_str.starts_with("http://") || path_str.starts_with("https://") {
Some(CloudScheme::Http)
} else {
None
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CloudScheme {
S3,
Gcs,
Azure,
Http,
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum MagicDetectionResult {
Detected(DatasetFormat),
Unknown,
}
fn read_magic_bytes(path: &Path, n: usize) -> Result<Vec<u8>> {
use std::io::Read;
let mut file = std::fs::File::open(path).map_err(|e| {
OxiGdalError::Io(IoError::Read {
message: format!("cannot open '{}': {e}", path.display()),
})
})?;
let mut buf = vec![0u8; n];
let read_bytes = file.read(&mut buf).map_err(|e| {
OxiGdalError::Io(IoError::Read {
message: format!("cannot read magic bytes from '{}': {e}", path.display()),
})
})?;
buf.truncate(read_bytes);
Ok(buf)
}
fn detect_from_magic(path: &Path) -> Result<MagicDetectionResult> {
let buf = read_magic_bytes(path, 16)?;
if buf.len() < 2 {
return Ok(MagicDetectionResult::Unknown);
}
if buf.starts_with(&TIFF_LE_MAGIC) || buf.starts_with(&TIFF_BE_MAGIC) {
if buf.len() >= 4 {
let version = if buf[0] == 0x49 {
u16::from_le_bytes([buf[2], buf[3]])
} else {
u16::from_be_bytes([buf[2], buf[3]])
};
if version == TIFF_VERSION || version == BIGTIFF_VERSION {
return Ok(MagicDetectionResult::Detected(DatasetFormat::GeoTiff));
}
}
return Ok(MagicDetectionResult::Detected(DatasetFormat::GeoTiff));
}
if buf.len() >= 12 && buf[..12] == JP2_MAGIC {
return Ok(MagicDetectionResult::Detected(DatasetFormat::Jpeg2000));
}
if buf.len() >= 8 && buf[..8] == HDF5_MAGIC {
return Ok(MagicDetectionResult::Detected(DatasetFormat::Hdf5));
}
if buf.len() >= 4
&& buf[..3] == NETCDF_MAGIC
&& (buf[3] == 0x01 || buf[3] == 0x02 || buf[3] == 0x05)
{
return Ok(MagicDetectionResult::Detected(DatasetFormat::NetCdf));
}
if buf.len() >= 4 && buf[..4] == ZIP_MAGIC {
return Ok(MagicDetectionResult::Detected(DatasetFormat::GeoPackage));
}
if buf.len() >= 6 && buf[..6] == SQLITE_MAGIC {
return Ok(MagicDetectionResult::Detected(DatasetFormat::GeoPackage));
}
Ok(MagicDetectionResult::Unknown)
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum OpenedDataset {
GeoTiff(DatasetInfo),
GeoJson(DatasetInfo),
Shapefile(DatasetInfo),
GeoPackage(DatasetInfo),
GeoParquet(DatasetInfo),
NetCdf(DatasetInfo),
Hdf5(DatasetInfo),
Zarr(DatasetInfo),
Grib(DatasetInfo),
FlatGeobuf(DatasetInfo),
Jpeg2000(DatasetInfo),
Vrt(DatasetInfo),
Stac(DatasetInfo),
Cloud {
scheme: CloudScheme,
path: PathBuf,
guessed_format: DatasetFormat,
},
Unknown(DatasetInfo),
}
impl OpenedDataset {
pub fn info(&self) -> Option<&DatasetInfo> {
match self {
Self::GeoTiff(i)
| Self::GeoJson(i)
| Self::Shapefile(i)
| Self::GeoPackage(i)
| Self::GeoParquet(i)
| Self::NetCdf(i)
| Self::Hdf5(i)
| Self::Zarr(i)
| Self::Grib(i)
| Self::FlatGeobuf(i)
| Self::Jpeg2000(i)
| Self::Vrt(i)
| Self::Stac(i)
| Self::Unknown(i) => Some(i),
Self::Cloud { .. } => None,
}
}
pub fn format(&self) -> DatasetFormat {
match self {
Self::GeoTiff(_) => DatasetFormat::GeoTiff,
Self::GeoJson(_) => DatasetFormat::GeoJson,
Self::Shapefile(_) => DatasetFormat::Shapefile,
Self::GeoPackage(_) => DatasetFormat::GeoPackage,
Self::GeoParquet(_) => DatasetFormat::GeoParquet,
Self::NetCdf(_) => DatasetFormat::NetCdf,
Self::Hdf5(_) => DatasetFormat::Hdf5,
Self::Zarr(_) => DatasetFormat::Zarr,
Self::Grib(_) => DatasetFormat::Grib,
Self::FlatGeobuf(_) => DatasetFormat::FlatGeobuf,
Self::Jpeg2000(_) => DatasetFormat::Jpeg2000,
Self::Vrt(_) => DatasetFormat::Vrt,
Self::Stac(_) => DatasetFormat::Stac,
Self::Cloud { guessed_format, .. } => *guessed_format,
Self::Unknown(_) => DatasetFormat::Unknown,
}
}
pub fn is_cloud(&self) -> bool {
matches!(self, Self::Cloud { .. })
}
pub fn is_raster(&self) -> bool {
matches!(
self,
Self::GeoTiff(_)
| Self::Jpeg2000(_)
| Self::NetCdf(_)
| Self::Hdf5(_)
| Self::Zarr(_)
| Self::Grib(_)
| Self::Vrt(_)
)
}
pub fn is_vector(&self) -> bool {
matches!(
self,
Self::GeoJson(_)
| Self::Shapefile(_)
| Self::GeoPackage(_)
| Self::GeoParquet(_)
| Self::FlatGeobuf(_)
| Self::Stac(_)
)
}
}
pub fn open(path: impl AsRef<Path>) -> Result<OpenedDataset> {
let path_ref = path.as_ref();
let path_str = path_ref.to_str().unwrap_or("").to_string();
if let Some(scheme) = detect_cloud_scheme(&path_str) {
let guessed_format = DatasetFormat::from_extension(&path_str);
return Ok(OpenedDataset::Cloud {
scheme,
path: path_ref.to_path_buf(),
guessed_format,
});
}
if !path_ref.exists() {
return Err(OxiGdalError::Io(IoError::NotFound {
path: path_str.clone(),
}));
}
let magic_result = detect_from_magic(path_ref)?;
let format = match magic_result {
MagicDetectionResult::Detected(fmt) => {
if fmt == DatasetFormat::GeoPackage {
let ext_fmt = DatasetFormat::from_extension(&path_str);
match ext_fmt {
DatasetFormat::Unknown => DatasetFormat::GeoPackage,
other => other,
}
} else {
fmt
}
}
MagicDetectionResult::Unknown => {
let ext_fmt = DatasetFormat::from_extension(&path_str);
if ext_fmt == DatasetFormat::Unknown {
let ext = path_ref
.extension()
.and_then(|e| e.to_str())
.map(str::to_lowercase)
.unwrap_or_default();
if ext == "json" {
DatasetFormat::GeoJson
} else {
DatasetFormat::Unknown
}
} else {
ext_fmt
}
}
};
let info = build_dataset_info(path_ref, format);
let opened = map_format_to_opened(format, info);
Ok(opened)
}
fn build_dataset_info(_path: &Path, format: DatasetFormat) -> DatasetInfo {
DatasetInfo {
format,
width: None,
height: None,
band_count: 0,
layer_count: 0,
crs: None,
geotransform: None,
}
}
fn map_format_to_opened(format: DatasetFormat, info: DatasetInfo) -> OpenedDataset {
match format {
DatasetFormat::GeoTiff => OpenedDataset::GeoTiff(info),
DatasetFormat::GeoJson => OpenedDataset::GeoJson(info),
DatasetFormat::Shapefile => OpenedDataset::Shapefile(info),
DatasetFormat::GeoParquet => OpenedDataset::GeoParquet(info),
DatasetFormat::GeoPackage => OpenedDataset::GeoPackage(info),
DatasetFormat::NetCdf => OpenedDataset::NetCdf(info),
DatasetFormat::Hdf5 => OpenedDataset::Hdf5(info),
DatasetFormat::Zarr => OpenedDataset::Zarr(info),
DatasetFormat::Grib => OpenedDataset::Grib(info),
DatasetFormat::FlatGeobuf => OpenedDataset::FlatGeobuf(info),
DatasetFormat::Jpeg2000 => OpenedDataset::Jpeg2000(info),
DatasetFormat::Vrt => OpenedDataset::Vrt(info),
DatasetFormat::Stac => OpenedDataset::Stac(info),
DatasetFormat::PMTiles
| DatasetFormat::MBTiles
| DatasetFormat::Copc
| DatasetFormat::Terrain
| DatasetFormat::Unknown => OpenedDataset::Unknown(info),
}
}
impl DatasetFormat {
pub fn is_geopackage(path: &Path) -> bool {
let ext = path
.extension()
.and_then(|e| e.to_str())
.map(str::to_lowercase)
.unwrap_or_default();
ext == "gpkg"
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
fn write_temp_file(name: &str, content: &[u8]) -> PathBuf {
let dir = std::env::temp_dir();
let path = dir.join(name);
let mut f = std::fs::File::create(&path).expect("create temp file");
f.write_all(content).expect("write temp file");
path
}
#[test]
fn test_cloud_s3_scheme_detected() {
let result = open("s3://my-bucket/data/world.tif");
assert!(result.is_ok(), "s3:// should succeed");
let ds = result.expect("s3 opened");
assert!(ds.is_cloud(), "should be cloud dataset");
if let OpenedDataset::Cloud { scheme, .. } = &ds {
assert_eq!(*scheme, CloudScheme::S3);
} else {
panic!("expected Cloud variant");
}
}
#[test]
fn test_cloud_gs_scheme_detected() {
let result = open("gs://bucket/raster.tif");
assert!(result.is_ok());
let ds = result.expect("gs opened");
assert!(ds.is_cloud());
if let OpenedDataset::Cloud { scheme, .. } = &ds {
assert_eq!(*scheme, CloudScheme::Gcs);
} else {
panic!("expected Cloud variant");
}
}
#[test]
fn test_cloud_az_scheme_detected() {
let result = open("az://container/layer.gpkg");
assert!(result.is_ok());
let ds = result.expect("az opened");
assert!(ds.is_cloud());
}
#[test]
fn test_cloud_http_scheme_detected() {
let result = open("https://example.com/layer.geojson");
assert!(result.is_ok());
let ds = result.expect("https opened");
assert!(ds.is_cloud());
if let OpenedDataset::Cloud { scheme, .. } = &ds {
assert_eq!(*scheme, CloudScheme::Http);
} else {
panic!("expected Cloud variant");
}
}
#[test]
fn test_cloud_guessed_format_from_extension() {
let result = open("s3://bucket/elevation.tif").expect("open");
if let OpenedDataset::Cloud { guessed_format, .. } = result {
assert_eq!(guessed_format, DatasetFormat::GeoTiff);
} else {
panic!("expected Cloud");
}
}
#[test]
fn test_open_nonexistent_file_returns_io_error() {
let result = open("/nonexistent/path/file.tif");
assert!(result.is_err(), "nonexistent file should error");
let err = result.expect_err("should be error");
assert!(
matches!(err, OxiGdalError::Io(IoError::NotFound { .. })),
"expected NotFound, got {err:?}"
);
}
#[test]
fn test_magic_tiff_little_endian() {
let bytes = [0x49u8, 0x49, 0x2A, 0x00, 0x00, 0x00, 0x00, 0x00];
let path = write_temp_file("test_magic_tiff_le.tif", &bytes);
let ds = open(&path).expect("open tiff le");
assert_eq!(ds.format(), DatasetFormat::GeoTiff);
assert!(ds.is_raster());
}
#[test]
fn test_magic_tiff_big_endian() {
let bytes = [0x4Du8, 0x4D, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x00];
let path = write_temp_file("test_magic_tiff_be.tif", &bytes);
let ds = open(&path).expect("open tiff be");
assert_eq!(ds.format(), DatasetFormat::GeoTiff);
}
#[test]
fn test_magic_hdf5() {
let path = write_temp_file("test_magic_hdf5.h5", &HDF5_MAGIC);
let ds = open(&path).expect("open hdf5");
assert_eq!(ds.format(), DatasetFormat::Hdf5);
assert!(ds.is_raster());
}
#[test]
fn test_magic_netcdf() {
let bytes = [0x43u8, 0x44, 0x46, 0x01, 0x00, 0x00, 0x00, 0x00];
let path = write_temp_file("test_magic_netcdf.nc", &bytes);
let ds = open(&path).expect("open netcdf");
assert_eq!(ds.format(), DatasetFormat::NetCdf);
assert!(ds.is_raster());
}
#[test]
fn test_magic_jp2() {
let path = write_temp_file("test_magic_jp2.jp2", &JP2_MAGIC);
let ds = open(&path).expect("open jp2");
assert_eq!(ds.format(), DatasetFormat::Jpeg2000);
assert!(ds.is_raster());
}
#[test]
fn test_extension_geojson_fallback() {
let content = b"{}";
let path = write_temp_file("test_ext_fallback.geojson", content);
let ds = open(&path).expect("open geojson");
assert_eq!(ds.format(), DatasetFormat::GeoJson);
assert!(ds.is_vector());
}
#[test]
fn test_extension_shapefile_fallback() {
let content = b"\x00\x00\x27\x0A"; let path = write_temp_file("test_ext_shapefile.shp", content);
let ds = open(&path).expect("open shp");
assert_eq!(ds.format(), DatasetFormat::Shapefile);
assert!(ds.is_vector());
}
#[test]
fn test_extension_vrt_fallback() {
let content = b"<VRTDataset />";
let path = write_temp_file("test_ext_vrt.vrt", content);
let ds = open(&path).expect("open vrt");
assert_eq!(ds.format(), DatasetFormat::Vrt);
assert!(ds.is_raster());
}
#[test]
fn test_extension_grib_fallback() {
let content = b"GRIB";
let path = write_temp_file("test_ext_grib.grib", content);
let ds = open(&path).expect("open grib");
assert_eq!(ds.format(), DatasetFormat::Grib);
}
#[test]
fn test_opened_dataset_not_cloud_for_local() {
let content = b"{}";
let path = write_temp_file("test_not_cloud.geojson", content);
let ds = open(&path).expect("open");
assert!(!ds.is_cloud());
}
#[test]
fn test_opened_dataset_info_present_for_local() {
let content = b"{}";
let path = write_temp_file("test_info_present.geojson", content);
let ds = open(&path).expect("open");
assert!(ds.info().is_some(), "local file should have info");
}
#[test]
fn test_is_geopackage_extension_check() {
let path = Path::new("layer.gpkg");
assert!(DatasetFormat::is_geopackage(path));
let path2 = Path::new("world.tif");
assert!(!DatasetFormat::is_geopackage(path2));
}
#[test]
fn test_format_display_all_variants() {
assert_eq!(DatasetFormat::GeoTiff.to_string(), "GTiff");
assert_eq!(DatasetFormat::GeoJson.to_string(), "GeoJSON");
assert_eq!(DatasetFormat::Shapefile.to_string(), "ESRI Shapefile");
assert_eq!(DatasetFormat::Hdf5.to_string(), "HDF5");
assert_eq!(DatasetFormat::Vrt.to_string(), "VRT");
assert_eq!(DatasetFormat::Unknown.to_string(), "Unknown");
}
}