rmpca 0.2.0

Enterprise-grade unified CLI for rmp.ca operations - Rust port
//! Extract Overture Maps road data command
//!
//! This command extracts road data from Overture Maps for a given
//! bounding box or polygon using the Overture Maps S3/Parquet data.

use crate::config::Config;
use anyhow::{Context, Result};
use clap::Args as ClapArgs;
use geojson::{Feature, FeatureCollection, GeoJson, Geometry, Value};
use std::path::PathBuf;

#[derive(Debug, ClapArgs)]
pub struct Args {
    /// Bounding box: MIN_LON,MIN_LAT,MAX_LON,MAX_LAT
    #[arg(long)]
    pub bbox: Option<String>,

    /// Polygon file for Overture extraction (GeoJSON)
    #[arg(long)]
    pub polygon: Option<PathBuf>,

    /// Output file (default: stdout)
    #[arg(short, long)]
    pub output: Option<PathBuf>,

    /// Overture release (default: 2024-04-16-beta.0)
    #[arg(long, default_value = "2024-04-16-beta.0")]
    pub release: String,

    /// Theme to extract (default: transportation)
    #[arg(long, default_value = "transportation")]
    pub theme: String,
}

/// Parse bbox string
fn parse_bbox(bbox: &str) -> Result<(f64, f64, f64, f64)> {
    let parts: Vec<f64> = bbox
        .split(',')
        .map(|s| s.trim().parse::<f64>())
        .collect::<Result<Vec<f64>, _>>()
        .context("Invalid bbox format")?;

    if parts.len() != 4 {
        anyhow::bail!("bbox must have 4 values: MIN_LON,MIN_LAT,MAX_LON,MAX_LAT");
    }
    Ok((parts[0], parts[1], parts[2], parts[3]))
}

/// Extract Overture Maps road data
///
/// This downloads road segment data from Overture Maps' public S3 bucket.
/// Overture provides GeoParquet files organized by theme and type.
/// We use the AWS S3 public endpoint to list and download relevant tiles.
pub async fn run(args: Args) -> Result<()> {
    let config = Config::load().unwrap_or_default();
    config.init_logging();

    tracing::info!("Extracting Overture Maps data (theme: {})", args.theme);

    // Validate that we have either bbox or polygon
    if args.bbox.is_none() && args.polygon.is_none() {
        anyhow::bail!("Either --bbox or --polygon must be specified");
    }

    // Parse bbox
    let (min_lon, min_lat, max_lon, max_lat) = if let Some(ref bbox) = args.bbox {
        parse_bbox(bbox)?
    } else {
        // Try to extract bbox from polygon file
        let poly_str = std::fs::read_to_string(args.polygon.as_ref().unwrap())
            .context("Failed to read polygon file")?;
        let geojson: GeoJson = poly_str.parse().context("Failed to parse polygon GeoJSON")?;
        extract_bbox_from_geojson(&geojson)?
    };

    tracing::info!(
        "Bounding box: ({:.4},{:.4}) - ({:.4},{:.4})",
        min_lon, min_lat, max_lon, max_lat
    );

    // Overture Maps data is served as GeoParquet from S3
    // We'll use the public endpoint to fetch data
    let base_url = format!(
        "https://overturemaps-us-west-2.s3.amazonaws.com/{}/{}",
        args.release, args.theme
    );

    tracing::info!("Fetching Overture data from: {}", base_url);

    let client = reqwest::Client::builder()
        .timeout(std::time::Duration::from_secs(config.timeout_secs))
        .build()?;

    // For the transportation theme, we need to fetch the segment type
    // Overture organizes data as: {release}/{theme}/type={type}/*.parquet
    // We'll try to fetch the segment type which contains road data
    let segment_url = format!("{}/type=segment/", base_url);

    tracing::info!("Fetching segment listing from: {}", segment_url);

    // List available parquet files
    let response = client.get(&segment_url).send().await;

    match response {
        Ok(resp) if resp.status().is_success() => {
            let body = resp.text().await.unwrap_or_default();

            // Parse S3 listing to find relevant parquet files
            let parquet_files = parse_s3_listing(&body);

            if parquet_files.is_empty() {
                tracing::warn!("No parquet files found in listing, trying direct download");
                // Try alternative: use the Overture Maps CLI approach
                // For now, generate a helpful error message
                anyhow::bail!(
                    "Overture Maps data extraction requires parquet processing.\n\
                     \n\
                     To extract Overture data, use one of these approaches:\n\
                     1. Install the Overture CLI: pip install overturemaps\n\
                     2. Use the convert-osm command with a local .osm.pbf file\n\
                     3. Use extract-osm to download from Overpass API\n\
                     \n\
                     The Overture data is available at:\n\
                     {}",
                    base_url
                );
            }

            tracing::info!("Found {} parquet files", parquet_files.len());

            // We can't easily parse parquet in pure Rust without additional deps
            // Provide guidance instead
            anyhow::bail!(
                "Overture Maps parquet files found but parquet parsing is not yet available.\n\
                 \n\
                 Found {} parquet files. To process them:\n\
                 1. Use the Overture CLI: overturemaps download --bbox={},{},{},{} --type=segment\n\
                 2. Convert the resulting GeoJSON with: rmpca clean input.geojson -o output.geojson\n\
                 \n\
                 Files available at: {}",
                parquet_files.len(),
                min_lon, min_lat, max_lon, max_lat,
                segment_url
            );
        }
        Ok(resp) => {
            let status = resp.status();
            tracing::warn!("Overture API returned status: {}", status);
            anyhow::bail!(
                "Overture Maps API returned status: {}\n\
                 \n\
                 The release '{}' may not be available. Try:\n\
                 - Check available releases at: https://overturemaps.org/download/\n\
                 - Use --release flag to specify a different release\n\
                 \n\
                 Alternatively, use extract-osm to download from Overpass API",
                status,
                args.release
            );
        }
        Err(e) => {
            tracing::warn!("Failed to connect to Overture Maps: {}", e);
            anyhow::bail!(
                "Failed to connect to Overture Maps: {}\n\
                 \n\
                 Check your internet connection and try again.\n\
                 Alternatively, use extract-osm to download from Overpass API.",
                e
            );
        }
    }
}

/// Extract bounding box from a GeoJSON polygon
fn extract_bbox_from_geojson(geojson: &GeoJson) -> Result<(f64, f64, f64, f64)> {
    match geojson {
        GeoJson::FeatureCollection(fc) => {
            let mut min_lon = f64::INFINITY;
            let mut min_lat = f64::INFINITY;
            let mut max_lon = f64::NEG_INFINITY;
            let mut max_lat = f64::NEG_INFINITY;

            for feature in &fc.features {
                if let Some(ref geom) = feature.geometry {
                    extract_coords_bounds(&geom.value, &mut min_lon, &mut min_lat, &mut max_lon, &mut max_lat);
                }
            }

            if min_lon.is_infinite() {
                anyhow::bail!("No coordinates found in polygon file");
            }

            Ok((min_lon, min_lat, max_lon, max_lat))
        }
        _ => anyhow::bail!("Polygon file must be a GeoJSON FeatureCollection"),
    }
}

fn extract_coords_bounds(
    value: &Value,
    min_lon: &mut f64,
    min_lat: &mut f64,
    max_lon: &mut f64,
    max_lat: &mut f64,
) {
    match value {
        Value::Point(coord) => {
            if coord.len() >= 2 {
                *min_lon = min_lon.min(coord[0]);
                *min_lat = min_lat.min(coord[1]);
                *max_lon = max_lon.max(coord[0]);
                *max_lat = max_lat.max(coord[1]);
            }
        }
        Value::LineString(coords) | Value::MultiPoint(coords) => {
            for coord in coords {
                if coord.len() >= 2 {
                    *min_lon = min_lon.min(coord[0]);
                    *min_lat = min_lat.min(coord[1]);
                    *max_lon = max_lon.max(coord[0]);
                    *max_lat = max_lat.max(coord[1]);
                }
            }
        }
        Value::Polygon(rings) | Value::MultiLineString(rings) => {
            for ring in rings {
                for coord in ring {
                    if coord.len() >= 2 {
                        *min_lon = min_lon.min(coord[0]);
                        *min_lat = min_lat.min(coord[1]);
                        *max_lon = max_lon.max(coord[0]);
                        *max_lat = max_lat.max(coord[1]);
                    }
                }
            }
        }
        Value::MultiPolygon(polygons) => {
            for polygon in polygons {
                for ring in polygon {
                    for coord in ring {
                        if coord.len() >= 2 {
                            *min_lon = min_lon.min(coord[0]);
                            *min_lat = min_lat.min(coord[1]);
                            *max_lon = max_lon.max(coord[0]);
                            *max_lat = max_lat.max(coord[1]);
                        }
                    }
                }
            }
        }
        Value::GeometryCollection(geoms) => {
            for geom in geoms {
                extract_coords_bounds(&geom.value, min_lon, min_lat, max_lon, max_lat);
            }
        }
    }
}

/// Parse S3 XML listing to find parquet file keys
fn parse_s3_listing(xml: &str) -> Vec<String> {
    let mut files = Vec::new();
    // Simple XML parsing for <Key> elements
    for key in xml.split("<Key>") {
        if let Some(end) = key.find("</Key>") {
            files.push(key[..end].to_string());
        }
    }
    files.into_iter().filter(|f| f.ends_with(".parquet")).collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_extract_overture_args() {
        let args = Args {
            bbox: Some("-73.59,45.49,-73.55,45.52".to_string()),
            polygon: None,
            output: None,
            release: "2024-04-16-beta.0".to_string(),
            theme: "transportation".to_string(),
        };
        assert_eq!(args.bbox, Some("-73.59,45.49,-73.55,45.52".to_string()));
    }

    #[test]
    fn test_parse_bbox() {
        let bbox = parse_bbox("-73.59,45.49,-73.55,45.52").unwrap();
        assert_eq!(bbox, (-73.59, 45.49, -73.55, 45.52));
        assert!(parse_bbox("1,2,3").is_err());
    }

    #[test]
    fn test_parse_s3_listing() {
        let xml = r#"<ListBucketResult><Key>segment/file.parquet</Key><Key>segment/other.txt</Key></ListBucketResult>"#;
        let files = parse_s3_listing(xml);
        assert_eq!(files.len(), 1);
        assert_eq!(files[0], "segment/file.parquet");
    }
}