v2rmp 0.4.3

rmpca — Route Optimization TUI & Agent Engine
Documentation
use anyhow::Context;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::io::Read;
use std::time::Instant;

use super::clean::{clean_geojson, CleanOptions, CleanStats};

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompileRequest {
    pub input_geojson: String,
    pub output_rmp: String,
    pub compress: bool,
    pub road_classes: Vec<String>,
    pub clean_options: Option<CleanOptions>,
    #[serde(default)]
    pub prune_disconnected: bool,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompileResult {
    pub input_size_bytes: u64,
    pub output_size_bytes: u64,
    pub node_count: usize,
    pub edge_count: usize,
    pub elapsed_ms: u64,
}

/// .rmp binary format header (magic + version).
const RMP_MAGIC: &[u8; 4] = b"RMP1";

/// Compile a GeoJSON road network into the .rmp binary format.
///
/// Binary layout:
///   \[4\]  magic "RMP1"
///   \[4\]  node count (u32 LE)
///   \[4\]  edge count (u32 LE)
///   \[N\]  node entries: lat(f64) lon(f64) = 16 bytes each
///   \[E\]  edge entries: from(u32 LE) to(u32 LE) weight_m(f64 LE) oneway(u8) = 17 bytes each
///   \[4\]  CRC32 checksum (LE)
pub fn run_compile(req: &CompileRequest) -> anyhow::Result<CompileResult> {
    let start = Instant::now();

    // 1. Read the GeoJSON file
    let mut input_data = Vec::new();
    {
        let mut file = std::fs::File::open(&req.input_geojson)
            .with_context(|| format!("Failed to open input GeoJSON: {}", req.input_geojson))?;
        file.read_to_end(&mut input_data)?;
    }
    let input_size_bytes = input_data.len() as u64;

    // 2. Parse into FeatureCollection
    let mut geojson: geojson::FeatureCollection = serde_json::from_slice(&input_data)
        .with_context(|| "Failed to parse GeoJSON FeatureCollection")?;

    // 2.5. Clean GeoJSON if options provided
    let mut _clean_stats: Option<CleanStats> = None;
    if let Some(ref clean_opts) = req.clean_options {
        let (cleaned_fc, stats, _warnings) =
            clean_geojson(&geojson, clean_opts).with_context(|| "Failed to clean GeoJSON")?;
        _clean_stats = Some(stats);
        geojson = cleaned_fc;
    }

    // 3. Deduplicate nodes by snapping coordinates to 1e6 precision
    //    and 4. Build adjacency list of edges
    let mut node_map: HashMap<u64, u32> = HashMap::new();
    let mut nodes: Vec<(f64, f64)> = Vec::new(); // (lat, lon) in original precision
    let mut edges: Vec<(u32, u32, f64, u8)> = Vec::new(); // (from, to, weight_m, oneway)

    for feature in &geojson.features {
        let geometry = match feature.geometry.as_ref() {
            Some(g) => g,
            None => continue,
        };

        // Determine oneway from properties
        let oneway = feature
            .properties
            .as_ref()
            .and_then(|props| props.get("oneway"))
            .and_then(|v| v.as_str())
            .map(|s| {
                if matches!(s, "yes" | "1" | "true") {
                    1u8
                } else {
                    0u8
                }
            })
            .unwrap_or(0);

        let line_strings: Vec<&Vec<Vec<f64>>> = match &geometry.value {
            geojson::Value::LineString(coords) => vec![coords],
            geojson::Value::MultiLineString(multi) => multi.iter().collect(),
            _ => continue,
        };

        for coords in line_strings {
            if coords.len() < 2 {
                continue;
            }

            let coord_points: Vec<(f64, f64)> = coords
                .iter()
                .filter(|p| p.len() >= 2)
                .map(|p| (p[1], p[0])) // (lat, lon)
                .collect();

            if coord_points.len() < 2 {
                continue;
            }

            let mut last_node_id = None;
            for i in 0..coord_points.len() - 1 {
                let (lat1, lon1) = coord_points[i];
                let (lat2, lon2) = coord_points[i + 1];

                let from_node = match last_node_id {
                    Some(id) => id,
                    None => get_or_create_node(&mut node_map, &mut nodes, lat1, lon1),
                };
                let to_node = get_or_create_node(&mut node_map, &mut nodes, lat2, lon2);
                last_node_id = Some(to_node);

                let weight_m = super::haversine_m(lat1, lon1, lat2, lon2);
                edges.push((from_node, to_node, weight_m, oneway));
            }
        }
    }

    // 4.5 Prune disconnected subgraphs
    if req.prune_disconnected && !nodes.is_empty() {
        let mut adj: Vec<Vec<u32>> = vec![Vec::new(); nodes.len()];
        for &(from, to, _, _) in &edges {
            adj[from as usize].push(to);
            adj[to as usize].push(from);
        }

        let mut visited = vec![false; nodes.len()];
        let mut components = Vec::new();

        for i in 0..nodes.len() {
            if !visited[i] {
                let mut component = Vec::new();
                let mut stack = vec![i as u32];
                visited[i] = true;

                while let Some(node) = stack.pop() {
                    component.push(node);
                    for &neighbor in &adj[node as usize] {
                        if !visited[neighbor as usize] {
                            visited[neighbor as usize] = true;
                            stack.push(neighbor);
                        }
                    }
                }
                components.push(component);
            }
        }

        if components.len() > 1 {
            components.sort_by_key(|c| std::cmp::Reverse(c.len()));
            let largest_component = &components[0];
            let pruned_nodes_count = nodes.len() - largest_component.len();
            tracing::info!("Pruning disconnected subgraphs: kept largest component ({} nodes), pruned {} disconnected nodes in {} smaller subgraphs", largest_component.len(), pruned_nodes_count, components.len() - 1);

            let mut old_to_new = vec![None; nodes.len()];
            let mut new_nodes = Vec::with_capacity(largest_component.len());
            for &old_id in largest_component {
                old_to_new[old_id as usize] = Some(new_nodes.len() as u32);
                new_nodes.push(nodes[old_id as usize]);
            }

            let mut new_edges = Vec::new();
            let mut pruned_edges_count = 0;
            for &(from, to, weight, oneway) in &edges {
                if let (Some(new_from), Some(new_to)) =
                    (old_to_new[from as usize], old_to_new[to as usize])
                {
                    new_edges.push((new_from, new_to, weight, oneway));
                } else {
                    pruned_edges_count += 1;
                }
            }

            tracing::info!("Pruned {} disconnected edges", pruned_edges_count);
            nodes = new_nodes;
            edges = new_edges;
        } else {
            tracing::info!("Graph is fully connected, no subgraphs to prune.");
        }
    }

    let node_count = nodes.len();
    let edge_count = edges.len();

    // 5. Write the binary .rmp format
    let mut buf: Vec<u8> = Vec::new();

    // Magic
    buf.extend_from_slice(RMP_MAGIC);

    // Node count
    buf.extend_from_slice(&(node_count as u32).to_le_bytes());

    // Edge count
    buf.extend_from_slice(&(edge_count as u32).to_le_bytes());

    // Node entries: lat(f64 LE) lon(f64 LE) = 16 bytes each
    for (lat, lon) in &nodes {
        buf.extend_from_slice(&lat.to_le_bytes());
        buf.extend_from_slice(&lon.to_le_bytes());
    }

    // Edge entries: from(u32 LE) to(u32 LE) weight_m(f64 LE) oneway(u8) = 17 bytes each
    for (from, to, weight_m, oneway) in &edges {
        buf.extend_from_slice(&from.to_le_bytes());
        buf.extend_from_slice(&to.to_le_bytes());
        buf.extend_from_slice(&weight_m.to_le_bytes());
        buf.push(*oneway);
    }

    // CRC32 checksum (LE)
    let crc = crc32fast::hash(&buf);
    buf.extend_from_slice(&crc.to_le_bytes());

    // Write output file
    std::fs::write(&req.output_rmp, &buf)
        .with_context(|| format!("Failed to write output file: {}", req.output_rmp))?;

    let output_size_bytes = buf.len() as u64;
    let elapsed_ms = start.elapsed().as_millis() as u64;

    Ok(CompileResult {
        input_size_bytes,
        output_size_bytes,
        node_count,
        edge_count,
        elapsed_ms,
    })
}

/// Quick validation: check if a file starts with the RMP magic bytes.
#[allow(dead_code)]
pub fn is_rmp_file(data: &[u8]) -> bool {
    data.len() >= 4 && &data[..4] == RMP_MAGIC
}

/// Get or create a node ID for the given (lat, lon) coordinates.
/// Snaps to 1e6 precision for deduplication, but stores original-precision coords.
fn get_or_create_node(
    node_map: &mut HashMap<u64, u32>,
    nodes: &mut Vec<(f64, f64)>,
    lat: f64,
    lon: f64,
) -> u32 {
    let lat_i = (lat * 1e6) as i32;
    let lon_i = (lon * 1e6) as i32;
    let key = ((lat_i as u64) << 32) | (lon_i as u32 as u64);

    *node_map.entry(key).or_insert_with(|| {
        let id = nodes.len() as u32;
        nodes.push((lat, lon));
        id
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_run_compile_error_on_missing_input() {
        let req = CompileRequest {
            input_geojson: "non_existent_file.geojson".to_string(),
            output_rmp: "output.rmp".to_string(),
            compress: false,
            road_classes: vec![],
            clean_options: None,
            prune_disconnected: false,
        };

        let result = run_compile(&req);
        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(err.to_string().contains("Failed to open input GeoJSON"));
    }
}