use crate::error::{CityJsonStacError, Result};
use futures::StreamExt;
use object_store::DynObjectStore;
use std::io::Write;
use std::sync::Arc;
use tempfile::TempPath;
use url::Url;
pub async fn create_store_from_url(
url: &str,
options: Option<Vec<(&str, &str)>>,
) -> Result<Arc<DynObjectStore>> {
let url = Url::parse(url).map_err(CityJsonStacError::UrlError)?;
let (store, _path) =
object_store::parse_url_opts(&url, options.unwrap_or_default()).map_err(|e| {
CityJsonStacError::StorageError(format!("Failed to create object store: {e}"))
})?;
Ok(Arc::from(store))
}
pub async fn download_from_url(url: &str) -> Result<bytes::Bytes> {
let parsed_url = Url::parse(url).map_err(CityJsonStacError::UrlError)?;
match parsed_url.scheme() {
"s3" | "gs" | "az" | "azure" => {
let options: Vec<(String, String)> = Vec::new();
let (store, path) =
object_store::parse_url_opts(&parsed_url, options).map_err(|e| {
CityJsonStacError::StorageError(format!("Failed to create object store: {e}"))
})?;
let result = store.get(&path).await?;
let bytes = result.bytes().await?;
Ok(bytes)
}
"http" | "https" => {
let client = reqwest::Client::builder().build().map_err(|e| {
CityJsonStacError::StorageError(format!("Failed to create HTTP client: {e}"))
})?;
let response = client
.get(url)
.header(reqwest::header::ACCEPT_ENCODING, "identity")
.send()
.await
.map_err(|e| {
CityJsonStacError::StorageError(format!("HTTP request failed: {e}"))
})?;
if !response.status().is_success() {
return Err(CityJsonStacError::StorageError(format!(
"HTTP {} for {}",
response.status(),
url
)));
}
let bytes = response.bytes().await.map_err(|e| {
CityJsonStacError::StorageError(format!("Failed to read response body: {e}"))
})?;
Ok(bytes)
}
scheme => Err(CityJsonStacError::StorageError(format!(
"Unsupported URL scheme: {scheme}"
))),
}
}
pub async fn download_to_temp_file(url: &str, suffix: &str) -> Result<TempPath> {
let parsed_url = Url::parse(url).map_err(CityJsonStacError::UrlError)?;
let mut temp_file = tempfile::Builder::new().suffix(suffix).tempfile()?;
match parsed_url.scheme() {
"http" | "https" => {
let client = reqwest::Client::builder().build().map_err(|e| {
CityJsonStacError::StorageError(format!("Failed to create HTTP client: {e}"))
})?;
let response = client
.get(url)
.header(reqwest::header::ACCEPT_ENCODING, "identity")
.send()
.await
.map_err(|e| {
CityJsonStacError::StorageError(format!("HTTP request failed: {e}"))
})?;
if !response.status().is_success() {
return Err(CityJsonStacError::StorageError(format!(
"HTTP {} for {}",
response.status(),
url
)));
}
let mut stream = response.bytes_stream();
while let Some(chunk) = stream.next().await {
let chunk = chunk.map_err(|e| {
CityJsonStacError::StorageError(format!("Failed to read response body: {e}"))
})?;
temp_file.write_all(&chunk)?;
}
}
_ => {
let bytes = download_from_url(url).await?;
temp_file.write_all(&bytes)?;
}
}
Ok(temp_file.into_temp_path())
}
pub fn extract_extension_from_url(url: &str) -> Result<String> {
let last_segment = url.split('/').next_back().unwrap_or("");
let path_part = last_segment.split('?').next().unwrap_or("");
if let Some(ext) = extract_ext_from_filename(path_part) {
let lower = ext.to_lowercase();
match lower.as_str() {
"json" | "jsonl" | "cjseq" | "gml" | "xml" | "zip" | "gz" | "fcb" => return Ok(lower),
_ => {}
}
}
if let Some(query) = url.split('?').nth(1) {
for param in query.split('&') {
if let Some(value) = param
.strip_prefix("file=")
.or_else(|| param.strip_prefix("files="))
.or_else(|| param.strip_prefix("f="))
{
let decoded = value.replace("%2F", "/").replace("%2E", ".");
let filename = decoded.split('/').next_back().unwrap_or(value);
if let Some(ext) = extract_ext_from_filename(filename) {
return Ok(ext.to_lowercase());
}
}
}
}
if let Some(ext) = extract_ext_from_filename(path_part) {
return Ok(ext.to_lowercase());
}
Err(CityJsonStacError::Other(format!(
"No file extension found in URL: {url}",
)))
}
fn extract_ext_from_filename(filename: &str) -> Option<&str> {
if filename.contains('.') {
filename.rsplit('.').next().filter(|ext| !ext.is_empty())
} else {
None
}
}
pub fn is_remote_url(input: &str) -> bool {
input.starts_with("http://")
|| input.starts_with("https://")
|| input.starts_with("s3://")
|| input.starts_with("az://")
|| input.starts_with("azure://")
|| input.starts_with("gs://")
}
pub fn url_filename(url: &str) -> String {
if let Some(query) = url.split('?').nth(1) {
for param in query.split('&') {
if let Some(value) = param
.strip_prefix("file=")
.or_else(|| param.strip_prefix("files="))
.or_else(|| param.strip_prefix("f="))
{
let decoded = value.replace("%2F", "/").replace("%2E", ".");
let filename = decoded.split('/').next_back().unwrap_or(value);
if filename.contains('.') {
return filename.to_string();
}
}
}
}
url.split('/')
.next_back()
.and_then(|s| s.split('?').next())
.filter(|s| !s.is_empty())
.unwrap_or("remote.file")
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_extension_from_url() {
assert_eq!(
extract_extension_from_url("https://example.com/file.json").unwrap(),
"json"
);
assert_eq!(
extract_extension_from_url("https://example.com/file.city.json").unwrap(),
"json"
);
assert_eq!(
extract_extension_from_url("https://example.com/file.jsonl?query=1").unwrap(),
"jsonl"
);
assert_eq!(
extract_extension_from_url("https://example.com/data.cjseq").unwrap(),
"cjseq"
);
assert_eq!(
extract_extension_from_url("s3://bucket/path/to/file.fcb").unwrap(),
"fcb"
);
assert!(extract_extension_from_url("https://example.com/file").is_err());
assert!(extract_extension_from_url("https://example.com/").is_err());
assert_eq!(
extract_extension_from_url("https://example.com/file.GML").unwrap(),
"gml"
);
assert_eq!(
extract_extension_from_url("https://example.com/file.ZIP").unwrap(),
"zip"
);
assert_eq!(
extract_extension_from_url("https://example.com/download?file=data.gml&id=4").unwrap(),
"gml"
);
assert_eq!(
extract_extension_from_url(
"https://example.com/index.php?f=hooned_lod2-citygml.zip&page_id=837"
)
.unwrap(),
"zip"
);
assert_eq!(
extract_extension_from_url("https://example.com/massen.php?file=LoD2_data.xml&id=4")
.unwrap(),
"xml"
);
assert_eq!(
extract_extension_from_url(
"https://example.com/s/opendata/download?path=%2F3d&files=city_model.gml"
)
.unwrap(),
"gml"
);
}
#[test]
fn test_is_remote_url() {
assert!(is_remote_url("https://example.com/file.json"));
assert!(is_remote_url("http://example.com/file.json"));
assert!(is_remote_url("s3://bucket/path/to/file.json"));
assert!(is_remote_url("az://container/path/to/file.json"));
assert!(is_remote_url("azure://container/path/to/file.json"));
assert!(is_remote_url("gs://bucket/path/to/file.json"));
assert!(!is_remote_url("file.json"));
assert!(!is_remote_url("/path/to/file.json"));
assert!(!is_remote_url("./relative/path.json"));
}
#[test]
fn test_url_filename() {
assert_eq!(
url_filename("https://example.com/data/city.json"),
"city.json"
);
assert_eq!(
url_filename("https://example.com/data/building.city.json?v=2"),
"building.city.json"
);
assert_eq!(url_filename("https://example.com/"), "remote.file");
assert_eq!(
url_filename("http://test.example.org/path/to/file.json"),
"file.json"
);
assert_eq!(
url_filename("s3://my-bucket/path/to/data.cjseq"),
"data.cjseq"
);
assert_eq!(
url_filename("https://example.com/s/opendata/download?path=%2F3d&files=city_model.gml"),
"city_model.gml"
);
}
}