rmpca 0.2.0

Enterprise-grade unified CLI for rmp.ca operations - Rust port
//! Clean command: Clean/repair GeoJSON files
//!
//! This command removes self-loops, duplicates, and short segments
//! from GeoJSON files.

use crate::config::Config;
use anyhow::{Context, Result};
use clap::Args as ClapArgs;
use geojson::{Feature, FeatureCollection, GeoJson, Geometry, Value};
use std::collections::HashSet;
use std::path::PathBuf;

#[derive(Debug, ClapArgs)]
pub struct Args {
    /// Input GeoJSON file
    pub input: PathBuf,

    /// Output file (default: stdout)
    #[arg(short, long)]
    pub output: Option<PathBuf>,

    /// Minimum segment length in meters (default: 1.0)
    #[arg(long, default_value = "1.0")]
    pub min_length: f64,

    /// Print statistics
    #[arg(long)]
    pub stats: bool,
}

/// Haversine distance between two lat/lon points in meters
fn haversine(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
    const R: f64 = 6_371_000.0;
    let dlat = (lat2 - lat1).to_radians();
    let dlon = (lon2 - lon1).to_radians();
    let a = (dlat / 2.0).sin().powi(2)
        + lat1.to_radians().cos() * lat2.to_radians().cos() * (dlon / 2.0).sin().powi(2);
    let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
    R * c
}

/// Calculate total length of a LineString in meters
fn linestring_length(coords: &[Vec<f64>]) -> f64 {
    let mut total = 0.0;
    for i in 0..coords.len().saturating_sub(1) {
        if coords[i].len() >= 2 && coords[i + 1].len() >= 2 {
            let lon1 = coords[i][0];
            let lat1 = coords[i][1];
            let lon2 = coords[i + 1][0];
            let lat2 = coords[i + 1][1];
            total += haversine(lat1, lon1, lat2, lon2);
        }
    }
    total
}

/// Check if a LineString is a self-loop (starts and ends at same point)
fn is_self_loop(coords: &[Vec<f64>]) -> bool {
    if coords.len() < 2 {
        return false;
    }
    let first = &coords[0];
    let last = &coords[coords.len() - 1];
    if first.len() >= 2 && last.len() >= 2 {
        (first[0] - last[0]).abs() < 1e-9 && (first[1] - last[1]).abs() < 1e-9
    } else {
        false
    }
}

/// Generate a deduplication key for a feature
fn feature_dedup_key(feature: &Feature) -> Option<String> {
    let geom = feature.geometry.as_ref()?;
    match &geom.value {
        Value::LineString(coords) => {
            if coords.is_empty() {
                return None;
            }
            // Use first and last coordinate as key (order-independent)
            let first = &coords[0];
            let last = &coords[coords.len() - 1];
            let mut parts = vec![
                format!("{:.7},{:.7}", first[1], first[0]),
                format!("{:.7},{:.7}", last[1], last[0]),
            ];
            parts.sort();
            Some(parts.join("|"))
        }
        Value::MultiLineString(multi) => {
            if multi.is_empty() || multi[0].is_empty() {
                return None;
            }
            let first_line = &multi[0];
            let first = &first_line[0];
            let last = &first_line[first_line.len() - 1];
            let mut parts = vec![
                format!("{:.7},{:.7}", first[1], first[0]),
                format!("{:.7},{:.7}", last[1], last[0]),
            ];
            parts.sort();
            Some(parts.join("|"))
        }
        _ => None,
    }
}

/// Clean/repair GeoJSON
pub async fn run(args: Args) -> Result<()> {
    let config = Config::load().unwrap_or_default();
    config.init_logging();

    tracing::info!("Cleaning GeoJSON: {}", args.input.display());

    // Read input
    let geojson_str = std::fs::read_to_string(&args.input)
        .with_context(|| format!("Failed to read {}", args.input.display()))?;

    let geojson: GeoJson = geojson_str.parse().context("Failed to parse GeoJSON")?;

    let fc = match geojson {
        GeoJson::FeatureCollection(fc) => fc,
        GeoJson::Feature(f) => FeatureCollection {
            features: vec![f],
            bbox: None,
            foreign_members: None,
        },
        GeoJson::Geometry(g) => {
            anyhow::bail!("Expected FeatureCollection, got bare Geometry");
        }
    };

    let input_count = fc.features.len();
    let mut stats = CleanStats::default();

    let mut cleaned: Vec<Feature> = Vec::new();
    let mut seen_keys: HashSet<String> = HashSet::new();

    for feature in fc.features {
        let geom = match feature.geometry.as_ref() {
            Some(g) => g,
            None => {
                stats.skipped_no_geometry += 1;
                continue;
            }
        };

        match &geom.value {
            Value::LineString(coords) => {
                // Skip self-loops
                if is_self_loop(coords) {
                    stats.self_loops_removed += 1;
                    continue;
                }

                // Skip short segments
                let length = linestring_length(coords);
                if length < args.min_length {
                    stats.short_segments_removed += 1;
                    continue;
                }

                // Deduplicate
                if let Some(key) = feature_dedup_key(&feature) {
                    if seen_keys.contains(&key) {
                        stats.duplicates_removed += 1;
                        continue;
                    }
                    seen_keys.insert(key);
                }

                cleaned.push(feature);
            }
            Value::MultiLineString(multi) => {
                // Process each LineString in the MultiLineString
                let mut kept_lines: Vec<Vec<Vec<f64>>> = Vec::new();
                for line in multi {
                    if is_self_loop(&line) {
                        stats.self_loops_removed += 1;
                        continue;
                    }
                    let length = linestring_length(&line);
                    if length < args.min_length {
                        stats.short_segments_removed += 1;
                        continue;
                    }
                    kept_lines.push(line.clone());
                }

                if kept_lines.is_empty() {
                    continue;
                }

                if kept_lines.len() == 1 {
                    // Convert back to single LineString
                    let mut f = feature.clone();
                    f.geometry = Some(Geometry::new(Value::LineString(kept_lines.into_iter().next().unwrap())));
                    if let Some(key) = feature_dedup_key(&f) {
                        if seen_keys.contains(&key) {
                            stats.duplicates_removed += 1;
                            continue;
                        }
                        seen_keys.insert(key);
                    }
                    cleaned.push(f);
                } else {
                    let mut f = feature.clone();
                    f.geometry = Some(Geometry::new(Value::MultiLineString(kept_lines)));
                    cleaned.push(f);
                }
            }
            Value::Point(_) | Value::MultiPoint(_) => {
                // Pass through points unchanged
                cleaned.push(feature);
            }
            Value::Polygon(_) | Value::MultiPolygon(_) => {
                // Pass through polygons unchanged
                cleaned.push(feature);
            }
            _ => {
                stats.skipped_unknown_type += 1;
            }
        }
    }

    let output_count = cleaned.len();
    let result_fc = FeatureCollection {
        features: cleaned,
        bbox: None,
        foreign_members: None,
    };

    let output_geojson = GeoJson::from(result_fc);
    let json = serde_json::to_string_pretty(&output_geojson)
        .context("Failed to serialize cleaned GeoJSON")?;

    match &args.output {
        Some(path) => {
            std::fs::write(path, &json)
                .with_context(|| format!("Failed to write to {}", path.display()))?;
            tracing::info!("Cleaned GeoJSON written to {}", path.display());
        }
        None => println!("{}", json),
    }

    if args.stats {
        println!("Clean Statistics:");
        println!("  Input features:    {}", input_count);
        println!("  Output features:   {}", output_count);
        println!("  Self-loops removed: {}", stats.self_loops_removed);
        println!("  Duplicates removed: {}", stats.duplicates_removed);
        println!("  Short segments removed: {}", stats.short_segments_removed);
        println!("  Skipped (no geometry): {}", stats.skipped_no_geometry);
        println!("  Skipped (unknown type): {}", stats.skipped_unknown_type);
    }

    tracing::info!(
        "Cleaned: {} → {} features (removed {} self-loops, {} duplicates, {} short segments)",
        input_count,
        output_count,
        stats.self_loops_removed,
        stats.duplicates_removed,
        stats.short_segments_removed
    );

    Ok(())
}

#[derive(Default)]
struct CleanStats {
    self_loops_removed: usize,
    duplicates_removed: usize,
    short_segments_removed: usize,
    skipped_no_geometry: usize,
    skipped_unknown_type: usize,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_clean_args() {
        let args = Args {
            input: PathBuf::from("test.geojson"),
            output: None,
            min_length: 2.0,
            stats: true,
        };
        assert_eq!(args.input, PathBuf::from("test.geojson"));
        assert_eq!(args.min_length, 2.0);
        assert!(args.stats);
    }

    #[test]
    fn test_haversine() {
        // Montreal downtown ~1.2km
        let d = haversine(45.5017, -73.5673, 45.5088, -73.5542);
        assert!(d > 800.0 && d < 2000.0, "d={}", d);
    }

    #[test]
    fn test_self_loop_detection() {
        let coords = vec![vec![-73.6, 45.5], vec![-73.61, 45.51], vec![-73.6, 45.5]];
        assert!(is_self_loop(&coords));

        let open = vec![vec![-73.6, 45.5], vec![-73.61, 45.51], vec![-73.62, 45.52]];
        assert!(!is_self_loop(&open));
    }

    #[test]
    fn test_linestring_length() {
        // Zero-length line
        let coords = vec![vec![-73.6, 45.5], vec![-73.6, 45.5]];
        assert_eq!(linestring_length(&coords), 0.0);

        // ~111km per degree of latitude
        let coords = vec![vec![0.0, 0.0], vec![0.0, 1.0]];
        let len = linestring_length(&coords);
        assert!(len > 110_000.0 && len < 112_000.0, "len={}", len);
    }

    #[test]
    fn test_feature_dedup_key() {
        let f1 = Feature {
            geometry: Some(Geometry::new(Value::LineString(vec![
                vec![-73.6, 45.5],
                vec![-73.61, 45.51],
            ]))),
            properties: None,
            ..Default::default()
        };
        let key1 = feature_dedup_key(&f1);
        assert!(key1.is_some());
    }
}