tp_lib_core/io/
csv.rs

1//! CSV parsing and writing
2
3use crate::errors::ProjectionError;
4use crate::models::{GnssPosition, ProjectedPosition};
5use chrono::{DateTime, FixedOffset};
6use polars::prelude::*;
7use std::collections::HashMap;
8
9/// Parse GNSS positions from CSV file
10pub fn parse_gnss_csv(
11    path: &str,
12    crs: &str,
13    lat_col: &str,
14    lon_col: &str,
15    time_col: &str,
16) -> Result<Vec<GnssPosition>, ProjectionError> {
17    // Read CSV file using polars
18    let df = CsvReadOptions::default()
19        .with_has_header(true)
20        .try_into_reader_with_file_path(Some(path.into()))
21        .map_err(|e| {
22            ProjectionError::IoError(std::io::Error::new(
23                std::io::ErrorKind::InvalidData,
24                format!("Failed to read CSV: {}", e),
25            ))
26        })?
27        .finish()
28        .map_err(|e| {
29            ProjectionError::IoError(std::io::Error::new(
30                std::io::ErrorKind::InvalidData,
31                format!("Failed to parse CSV: {}", e),
32            ))
33        })?;
34
35    // Validate required columns exist
36    let schema = df.schema();
37    if !schema.contains(lat_col) {
38        return Err(ProjectionError::InvalidCoordinate(format!(
39            "Latitude column '{}' not found in CSV",
40            lat_col
41        )));
42    }
43    if !schema.contains(lon_col) {
44        return Err(ProjectionError::InvalidCoordinate(format!(
45            "Longitude column '{}' not found in CSV",
46            lon_col
47        )));
48    }
49    if !schema.contains(time_col) {
50        return Err(ProjectionError::InvalidTimestamp(format!(
51            "Timestamp column '{}' not found in CSV",
52            time_col
53        )));
54    }
55
56    // Get all column names for metadata preservation
57    let all_columns: Vec<String> = schema.iter_names().map(|s| s.to_string()).collect();
58
59    // Extract required columns
60    let lat_series = df.column(lat_col).map_err(|e| {
61        ProjectionError::InvalidCoordinate(format!("Failed to get latitude: {}", e))
62    })?;
63    let lon_series = df.column(lon_col).map_err(|e| {
64        ProjectionError::InvalidCoordinate(format!("Failed to get longitude: {}", e))
65    })?;
66    let time_series = df.column(time_col).map_err(|e| {
67        ProjectionError::InvalidTimestamp(format!("Failed to get timestamp: {}", e))
68    })?;
69
70    // Convert to f64 arrays
71    let lat_array = lat_series.f64().map_err(|e| {
72        ProjectionError::InvalidCoordinate(format!("Latitude must be numeric: {}", e))
73    })?;
74    let lon_array = lon_series.f64().map_err(|e| {
75        ProjectionError::InvalidCoordinate(format!("Longitude must be numeric: {}", e))
76    })?;
77    let time_array = time_series.str().map_err(|e| {
78        ProjectionError::InvalidTimestamp(format!("Timestamp must be string: {}", e))
79    })?;
80
81    // Build GNSS positions
82    let mut positions = Vec::new();
83    let row_count = df.height();
84
85    for i in 0..row_count {
86        // Get coordinates
87        let latitude = lat_array.get(i).ok_or_else(|| {
88            ProjectionError::InvalidCoordinate(format!("Missing latitude at row {}", i))
89        })?;
90        let longitude = lon_array.get(i).ok_or_else(|| {
91            ProjectionError::InvalidCoordinate(format!("Missing longitude at row {}", i))
92        })?;
93
94        // Get and parse timestamp
95        let time_str = time_array.get(i).ok_or_else(|| {
96            ProjectionError::InvalidTimestamp(format!("Missing timestamp at row {}", i))
97        })?;
98
99        let timestamp = DateTime::<FixedOffset>::parse_from_rfc3339(time_str)
100            .map_err(|e| ProjectionError::InvalidTimestamp(
101                format!("Invalid timestamp '{}' at row {}: {} (expected RFC3339 format with timezone, e.g., 2025-12-09T14:30:00+01:00)", 
102                    time_str, i, e)
103            ))?;
104
105        // Validate timezone is present
106        if timestamp.timezone().local_minus_utc() == 0
107            && !time_str.contains('+')
108            && !time_str.ends_with('Z')
109        {
110            return Err(ProjectionError::InvalidTimestamp(format!(
111                "Timestamp at row {} missing explicit timezone offset",
112                i
113            )));
114        }
115
116        // Build metadata from other columns
117        let mut metadata = HashMap::new();
118        for col_name in &all_columns {
119            if col_name != lat_col && col_name != lon_col && col_name != time_col {
120                if let Ok(series) = df.column(col_name) {
121                    if let Ok(str_series) = series.cast(&DataType::String) {
122                        if let Ok(str_chunked) = str_series.str() {
123                            if let Some(value) = str_chunked.get(i) {
124                                metadata.insert(col_name.clone(), value.to_string());
125                            }
126                        }
127                    }
128                }
129            }
130        }
131
132        // Create GNSS position
133        let mut position = GnssPosition::new(latitude, longitude, timestamp, crs.to_string())?;
134        position.metadata = metadata;
135        positions.push(position);
136    }
137
138    Ok(positions)
139}
140
141/// Write projected positions to CSV
142pub fn write_csv(
143    positions: &[ProjectedPosition],
144    writer: &mut impl std::io::Write,
145) -> Result<(), ProjectionError> {
146    use csv::Writer;
147
148    let mut csv_writer = Writer::from_writer(writer);
149
150    // Write header
151    csv_writer.write_record(&[
152        "original_lat",
153        "original_lon",
154        "original_time",
155        "projected_lat",
156        "projected_lon",
157        "netelement_id",
158        "measure_meters",
159        "projection_distance_meters",
160        "crs",
161    ])?;
162
163    // Write data rows
164    for pos in positions {
165        csv_writer.write_record(&[
166            pos.original.latitude.to_string(),
167            pos.original.longitude.to_string(),
168            pos.original.timestamp.to_rfc3339(),
169            pos.projected_coords.y().to_string(),
170            pos.projected_coords.x().to_string(),
171            pos.netelement_id.clone(),
172            pos.measure_meters.to_string(),
173            pos.projection_distance_meters.to_string(),
174            pos.crs.clone(),
175        ])?;
176    }
177
178    csv_writer.flush()?;
179    Ok(())
180}