use crate::config::Config;
use anyhow::{Context, Result};
use clap::Args as ClapArgs;
use geojson::{Feature, FeatureCollection, GeoJson, Geometry, Value};
use std::path::PathBuf;
#[derive(Debug, ClapArgs)]
pub struct Args {
#[arg(long)]
pub bbox: Option<String>,
#[arg(long)]
pub polygon: Option<PathBuf>,
#[arg(short, long)]
pub output: Option<PathBuf>,
#[arg(long, default_value = "2024-04-16-beta.0")]
pub release: String,
#[arg(long, default_value = "transportation")]
pub theme: String,
}
fn parse_bbox(bbox: &str) -> Result<(f64, f64, f64, f64)> {
let parts: Vec<f64> = bbox
.split(',')
.map(|s| s.trim().parse::<f64>())
.collect::<Result<Vec<f64>, _>>()
.context("Invalid bbox format")?;
if parts.len() != 4 {
anyhow::bail!("bbox must have 4 values: MIN_LON,MIN_LAT,MAX_LON,MAX_LAT");
}
Ok((parts[0], parts[1], parts[2], parts[3]))
}
pub async fn run(args: Args) -> Result<()> {
let config = Config::load().unwrap_or_default();
config.init_logging();
tracing::info!("Extracting Overture Maps data (theme: {})", args.theme);
if args.bbox.is_none() && args.polygon.is_none() {
anyhow::bail!("Either --bbox or --polygon must be specified");
}
let (min_lon, min_lat, max_lon, max_lat) = if let Some(ref bbox) = args.bbox {
parse_bbox(bbox)?
} else {
let poly_str = std::fs::read_to_string(args.polygon.as_ref().unwrap())
.context("Failed to read polygon file")?;
let geojson: GeoJson = poly_str.parse().context("Failed to parse polygon GeoJSON")?;
extract_bbox_from_geojson(&geojson)?
};
tracing::info!(
"Bounding box: ({:.4},{:.4}) - ({:.4},{:.4})",
min_lon, min_lat, max_lon, max_lat
);
let base_url = format!(
"https://overturemaps-us-west-2.s3.amazonaws.com/{}/{}",
args.release, args.theme
);
tracing::info!("Fetching Overture data from: {}", base_url);
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(config.timeout_secs))
.build()?;
let segment_url = format!("{}/type=segment/", base_url);
tracing::info!("Fetching segment listing from: {}", segment_url);
let response = client.get(&segment_url).send().await;
match response {
Ok(resp) if resp.status().is_success() => {
let body = resp.text().await.unwrap_or_default();
let parquet_files = parse_s3_listing(&body);
if parquet_files.is_empty() {
tracing::warn!("No parquet files found in listing, trying direct download");
anyhow::bail!(
"Overture Maps data extraction requires parquet processing.\n\
\n\
To extract Overture data, use one of these approaches:\n\
1. Install the Overture CLI: pip install overturemaps\n\
2. Use the convert-osm command with a local .osm.pbf file\n\
3. Use extract-osm to download from Overpass API\n\
\n\
The Overture data is available at:\n\
{}",
base_url
);
}
tracing::info!("Found {} parquet files", parquet_files.len());
anyhow::bail!(
"Overture Maps parquet files found but parquet parsing is not yet available.\n\
\n\
Found {} parquet files. To process them:\n\
1. Use the Overture CLI: overturemaps download --bbox={},{},{},{} --type=segment\n\
2. Convert the resulting GeoJSON with: rmpca clean input.geojson -o output.geojson\n\
\n\
Files available at: {}",
parquet_files.len(),
min_lon, min_lat, max_lon, max_lat,
segment_url
);
}
Ok(resp) => {
let status = resp.status();
tracing::warn!("Overture API returned status: {}", status);
anyhow::bail!(
"Overture Maps API returned status: {}\n\
\n\
The release '{}' may not be available. Try:\n\
- Check available releases at: https://overturemaps.org/download/\n\
- Use --release flag to specify a different release\n\
\n\
Alternatively, use extract-osm to download from Overpass API",
status,
args.release
);
}
Err(e) => {
tracing::warn!("Failed to connect to Overture Maps: {}", e);
anyhow::bail!(
"Failed to connect to Overture Maps: {}\n\
\n\
Check your internet connection and try again.\n\
Alternatively, use extract-osm to download from Overpass API.",
e
);
}
}
}
fn extract_bbox_from_geojson(geojson: &GeoJson) -> Result<(f64, f64, f64, f64)> {
match geojson {
GeoJson::FeatureCollection(fc) => {
let mut min_lon = f64::INFINITY;
let mut min_lat = f64::INFINITY;
let mut max_lon = f64::NEG_INFINITY;
let mut max_lat = f64::NEG_INFINITY;
for feature in &fc.features {
if let Some(ref geom) = feature.geometry {
extract_coords_bounds(&geom.value, &mut min_lon, &mut min_lat, &mut max_lon, &mut max_lat);
}
}
if min_lon.is_infinite() {
anyhow::bail!("No coordinates found in polygon file");
}
Ok((min_lon, min_lat, max_lon, max_lat))
}
_ => anyhow::bail!("Polygon file must be a GeoJSON FeatureCollection"),
}
}
fn extract_coords_bounds(
value: &Value,
min_lon: &mut f64,
min_lat: &mut f64,
max_lon: &mut f64,
max_lat: &mut f64,
) {
match value {
Value::Point(coord) => {
if coord.len() >= 2 {
*min_lon = min_lon.min(coord[0]);
*min_lat = min_lat.min(coord[1]);
*max_lon = max_lon.max(coord[0]);
*max_lat = max_lat.max(coord[1]);
}
}
Value::LineString(coords) | Value::MultiPoint(coords) => {
for coord in coords {
if coord.len() >= 2 {
*min_lon = min_lon.min(coord[0]);
*min_lat = min_lat.min(coord[1]);
*max_lon = max_lon.max(coord[0]);
*max_lat = max_lat.max(coord[1]);
}
}
}
Value::Polygon(rings) | Value::MultiLineString(rings) => {
for ring in rings {
for coord in ring {
if coord.len() >= 2 {
*min_lon = min_lon.min(coord[0]);
*min_lat = min_lat.min(coord[1]);
*max_lon = max_lon.max(coord[0]);
*max_lat = max_lat.max(coord[1]);
}
}
}
}
Value::MultiPolygon(polygons) => {
for polygon in polygons {
for ring in polygon {
for coord in ring {
if coord.len() >= 2 {
*min_lon = min_lon.min(coord[0]);
*min_lat = min_lat.min(coord[1]);
*max_lon = max_lon.max(coord[0]);
*max_lat = max_lat.max(coord[1]);
}
}
}
}
}
Value::GeometryCollection(geoms) => {
for geom in geoms {
extract_coords_bounds(&geom.value, min_lon, min_lat, max_lon, max_lat);
}
}
}
}
fn parse_s3_listing(xml: &str) -> Vec<String> {
let mut files = Vec::new();
for key in xml.split("<Key>") {
if let Some(end) = key.find("</Key>") {
files.push(key[..end].to_string());
}
}
files.into_iter().filter(|f| f.ends_with(".parquet")).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_overture_args() {
let args = Args {
bbox: Some("-73.59,45.49,-73.55,45.52".to_string()),
polygon: None,
output: None,
release: "2024-04-16-beta.0".to_string(),
theme: "transportation".to_string(),
};
assert_eq!(args.bbox, Some("-73.59,45.49,-73.55,45.52".to_string()));
}
#[test]
fn test_parse_bbox() {
let bbox = parse_bbox("-73.59,45.49,-73.55,45.52").unwrap();
assert_eq!(bbox, (-73.59, 45.49, -73.55, 45.52));
assert!(parse_bbox("1,2,3").is_err());
}
#[test]
fn test_parse_s3_listing() {
let xml = r#"<ListBucketResult><Key>segment/file.parquet</Key><Key>segment/other.txt</Key></ListBucketResult>"#;
let files = parse_s3_listing(xml);
assert_eq!(files.len(), 1);
assert_eq!(files[0], "segment/file.parquet");
}
}