rmpca 0.2.0

Enterprise-grade unified CLI for rmp.ca operations - Rust port
//! Pipeline command: End-to-end data processing
//!
//! This command orchestrates the full pipeline: extract → clean → optimize → export

use crate::config::Config;
use crate::optimizer::RouteOptimizer;
use anyhow::{Context, Result};
use clap::Args as ClapArgs;
use geojson::GeoJson;
use std::path::PathBuf;

#[derive(Debug, ClapArgs)]
pub struct Args {
    /// Bounding box: MIN_LON,MIN_LAT,MAX_LON,MAX_LAT
    #[arg(long)]
    bbox: Option<String>,

    /// Polygon file for extraction
    #[arg(long)]
    polygon: Option<PathBuf>,

    /// Data source: overture or osm (default: osm)
    #[arg(long, default_value = "osm")]
    source: String,

    /// Input file (skip extraction)
    #[arg(long)]
    input: Option<PathBuf>,

    /// Output file (default: pipeline-output.geojson)
    #[arg(short, long)]
    output: Option<PathBuf>,

    /// Export as GPX
    #[arg(long)]
    gpx: bool,

    /// Skip cleaning step
    #[arg(long)]
    no_clean: bool,

    /// Turn penalties
    #[arg(long)]
    turn_left: Option<f64>,
    #[arg(long)]
    turn_right: Option<f64>,
    #[arg(long)]
    turn_u: Option<f64>,

    /// Depot location (LAT,LON)
    #[arg(long)]
    depot: Option<String>,

    /// Highway class filter for extraction
    #[arg(long)]
    highway: Option<String>,
}

/// End-to-end pipeline: extract → clean → optimize → export
pub async fn run(args: Args) -> Result<()> {
    let config = Config::load().unwrap_or_default();
    config.init_logging();

    tracing::info!("Starting pipeline");

    // ── Step 1: Extract or load data ──────────────────────────────────────
    let geojson_path = if let Some(ref input) = args.input {
        tracing::info!("Using input file: {}", input.display());
        input.clone()
    } else {
        // Need to extract data
        let bbox = args.bbox.as_ref()
            .context("Either --input or --bbox must be specified")?;

        let extract_output = std::env::temp_dir().join("rmpca_extract.geojson");

        match args.source.as_str() {
            "osm" => {
                tracing::info!("Extracting OSM data for bbox: {}", bbox);
                let extract_args = crate::commands::extract_osm::Args {
                    bbox: bbox.clone(),
                    output: Some(extract_output.clone()),
                    highway: args.highway.clone(),
                    overpass_url: "https://overpass-api.de/api/interpreter".to_string(),
                };
                crate::commands::extract_osm::run(extract_args).await
                    .context("Extraction step failed")?;
            }
            "overture" => {
                tracing::info!("Extracting Overture Maps data for bbox: {}", bbox);
                let extract_args = crate::commands::extract_overture::Args {
                    bbox: Some(bbox.clone()),
                    polygon: args.polygon.clone(),
                    output: Some(extract_output.clone()),
                    release: "2024-04-16-beta.0".to_string(),
                    theme: "transportation".to_string(),
                };
                crate::commands::extract_overture::run(extract_args).await
                    .context("Extraction step failed")?;
            }
            other => {
                anyhow::bail!("Unknown data source: {}. Use 'osm' or 'overture'.", other);
            }
        }

        extract_output
    };

    // ── Step 2: Clean ────────────────────────────────────────────────────
    let clean_path = if args.no_clean {
        geojson_path.clone()
    } else {
        let clean_output = std::env::temp_dir().join("rmpca_cleaned.geojson");
        tracing::info!("Cleaning GeoJSON: {}", geojson_path.display());

        let clean_args = crate::commands::clean::Args {
            input: geojson_path.clone(),
            output: Some(clean_output.clone()),
            min_length: 1.0,
            stats: true,
        };
        crate::commands::clean::run(clean_args).await
            .context("Cleaning step failed")?;

        clean_output
    };

    // ── Step 3: Validate ──────────────────────────────────────────────────
    tracing::info!("Validating cleaned GeoJSON: {}", clean_path.display());
    let validate_args = crate::commands::validate::Args {
        input: clean_path.clone(),
        remote: false,
        verbose: false,
    };
    if let Err(e) = crate::commands::validate::run(validate_args).await {
        tracing::warn!("Validation warnings: {}", e);
        // Don't fail the pipeline on validation warnings
    }

    // ── Step 4: Optimize ─────────────────────────────────────────────────
    tracing::info!("Optimizing route from: {}", clean_path.display());

    let geojson_str = std::fs::read_to_string(&clean_path)
        .with_context(|| format!("Failed to read {}", clean_path.display()))?;

    let feature_collection: geojson::FeatureCollection = geojson_str.parse()
        .context("Failed to parse GeoJSON")?;

    let mut optimizer = RouteOptimizer::new();
    optimizer.build_graph_from_features(&feature_collection.features)?;

    // Set turn penalties
    let turn_left = args.turn_left.unwrap_or(config.turn_left_penalty);
    let turn_right = args.turn_right.unwrap_or(config.turn_right_penalty);
    let turn_u = args.turn_u.unwrap_or(config.turn_u_penalty);
    optimizer.set_turn_penalties(turn_left, turn_right, turn_u);

    // Set depot
    if let Some(ref depot_str) = args.depot {
        let parts: Vec<f64> = depot_str
            .split(',')
            .map(|s| s.trim().parse::<f64>())
            .collect::<Result<Vec<f64>, _>>()
            .context("Invalid depot format. Use LAT,LON")?;
        if parts.len() != 2 {
            anyhow::bail!("Depot must be LAT,LON");
        }
        optimizer.set_depot(parts[0], parts[1]);
    }

    let result = optimizer.optimize()?;

    // ── Step 5: Export ────────────────────────────────────────────────────
    let output_path = args.output.clone().unwrap_or_else(|| {
        PathBuf::from("pipeline-output.geojson")
    });

    let output_text = if args.gpx {
        convert_to_gpx(&result)
    } else {
        // Output as GeoJSON FeatureCollection with the route
        let route_coords: Vec<Vec<f64>> = result.route.iter()
            .map(|p| vec![p.longitude, p.latitude])
            .collect();

        let geometry = geojson::Geometry::new(geojson::Value::LineString(route_coords));
        let mut properties = serde_json::Map::new();
        properties.insert("total_distance_km".to_string(), serde_json::Value::Number(
            serde_json::Number::from_f64(result.total_distance).unwrap_or(serde_json::Number::from(0))
        ));
        properties.insert("message".to_string(), serde_json::Value::String(result.message.clone()));
        properties.insert("point_count".to_string(), serde_json::Value::Number(
            serde_json::Number::from(result.route.len())
        ));

        let feature = geojson::Feature {
            geometry: Some(geometry),
            properties: Some(properties),
            ..Default::default()
        };

        let fc = geojson::FeatureCollection {
            features: vec![feature],
            bbox: None,
            foreign_members: None,
        };

        let geojson = GeoJson::from(fc);
        serde_json::to_string_pretty(&geojson)?
    };

    std::fs::write(&output_path, &output_text)
        .with_context(|| format!("Failed to write to {}", output_path.display()))?;

    tracing::info!(
        "Pipeline complete: {} points, {:.2} km → {}",
        result.route.len(),
        result.total_distance,
        output_path.display()
    );

    println!(
        "Pipeline complete: {} route points, {:.2} km total distance",
        result.route.len(),
        result.total_distance
    );
    println!("Output written to: {}", output_path.display());

    Ok(())
}

fn convert_to_gpx(result: &crate::optimizer::OptimizationResult) -> String {
    let mut gpx = String::from(
        r#"<?xml version="1.0" encoding="UTF-8"?>
<gpx version="1.1" creator="rmpca-pipeline">
  <trk><name>Pipeline Optimized Route</name><trkseg>"#,
    );

    for point in &result.route {
        gpx.push_str(&format!(
            r#"    <trkpt lat="{}" lon="{}" />"#,
            point.latitude, point.longitude
        ));
    }

    gpx.push_str(r#"  </trkseg></trk>
</gpx>"#);

    gpx
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_pipeline_args() {
        let args = Args {
            bbox: Some("-73.59,45.49,-73.55,45.52".to_string()),
            polygon: None,
            source: "osm".to_string(),
            input: None,
            output: None,
            gpx: false,
            no_clean: false,
            turn_left: Some(1.0),
            turn_right: None,
            turn_u: None,
            depot: None,
            highway: None,
        };
        assert_eq!(args.source, "osm");
        assert_eq!(args.turn_left, Some(1.0));
        assert!(!args.no_clean);
    }
}