Skip to main content

spatial_narrative/io/
csv_format.rs

1//! CSV format import/export.
2
3use super::format::Format;
4use crate::core::{
5    EventBuilder, Location, Narrative, NarrativeBuilder, SourceRef, SourceType, Timestamp,
6};
7use crate::{Error, Result};
8use csv::StringRecord;
9use std::io::{Read, Write};
10
11/// CSV format handler.
12///
13/// This format handler can import and export narratives in CSV format.
14/// The CSV must have latitude, longitude, and timestamp columns at minimum.
15///
16/// # Example
17///
18/// ```rust
19/// use spatial_narrative::io::{CsvFormat, Format};
20///
21/// let format = CsvFormat::default();
22///
23/// let csv_data = "lat,lon,timestamp,text\n\
24///                 40.7128,-74.006,2024-01-15T14:30:00Z,Something happened\n\
25///                 34.0522,-118.2437,2024-01-16T10:00:00Z,Another event";
26///
27/// let narrative = format.import_str(csv_data).unwrap();
28/// assert_eq!(narrative.events().len(), 2);
29/// ```
30#[derive(Debug, Clone)]
31pub struct CsvFormat {
32    /// Configuration options for CSV import/export
33    pub options: CsvOptions,
34}
35
36/// Configuration options for CSV import/export.
37#[derive(Debug, Clone)]
38pub struct CsvOptions {
39    /// Column name for latitude (defaults to "lat")
40    pub lat_column: String,
41
42    /// Column name for longitude (defaults to "lon")
43    pub lon_column: String,
44
45    /// Column name for timestamp (defaults to "timestamp")
46    pub timestamp_column: String,
47
48    /// Column name for elevation (optional)
49    pub elevation_column: Option<String>,
50
51    /// Column name for text/description (optional)
52    pub text_column: Option<String>,
53
54    /// Column name for tags (optional, comma-separated in cell)
55    pub tags_column: Option<String>,
56
57    /// Column name for source name (optional)
58    pub source_name_column: Option<String>,
59
60    /// Column name for source type (optional)
61    pub source_type_column: Option<String>,
62
63    /// Whether to include headers in exported CSV
64    pub include_headers: bool,
65
66    /// CSV delimiter character
67    pub delimiter: u8,
68}
69
70impl Default for CsvOptions {
71    fn default() -> Self {
72        Self {
73            lat_column: "lat".to_string(),
74            lon_column: "lon".to_string(),
75            timestamp_column: "timestamp".to_string(),
76            elevation_column: Some("elevation".to_string()),
77            text_column: Some("text".to_string()),
78            tags_column: Some("tags".to_string()),
79            source_name_column: Some("source".to_string()),
80            source_type_column: Some("source_type".to_string()),
81            include_headers: true,
82            delimiter: b',',
83        }
84    }
85}
86
87impl Default for CsvFormat {
88    fn default() -> Self {
89        Self::new()
90    }
91}
92
93impl CsvFormat {
94    /// Create a new CSV format handler with default options.
95    pub fn new() -> Self {
96        Self {
97            options: CsvOptions::default(),
98        }
99    }
100
101    /// Create a new CSV format handler with custom options.
102    pub fn with_options(options: CsvOptions) -> Self {
103        Self { options }
104    }
105
106    /// Helper to find column index by name
107    fn find_column(&self, headers: &StringRecord, name: &str) -> Option<usize> {
108        headers.iter().position(|h| h.eq_ignore_ascii_case(name))
109    }
110
111    /// Helper to get optional string value from record
112    fn get_optional(&self, record: &StringRecord, index: Option<usize>) -> Option<String> {
113        index.and_then(|i| record.get(i).filter(|s| !s.is_empty()).map(String::from))
114    }
115}
116
117impl Format for CsvFormat {
118    fn import<R: Read>(&self, reader: R) -> Result<Narrative> {
119        let mut csv_reader = csv::ReaderBuilder::new()
120            .delimiter(self.options.delimiter)
121            .from_reader(reader);
122
123        let headers = csv_reader.headers()?.clone();
124
125        // Find required columns
126        let lat_idx = self
127            .find_column(&headers, &self.options.lat_column)
128            .ok_or_else(|| {
129                Error::InvalidFormat(format!(
130                    "missing required column: {}",
131                    self.options.lat_column
132                ))
133            })?;
134
135        let lon_idx = self
136            .find_column(&headers, &self.options.lon_column)
137            .ok_or_else(|| {
138                Error::InvalidFormat(format!(
139                    "missing required column: {}",
140                    self.options.lon_column
141                ))
142            })?;
143
144        let ts_idx = self
145            .find_column(&headers, &self.options.timestamp_column)
146            .ok_or_else(|| {
147                Error::InvalidFormat(format!(
148                    "missing required column: {}",
149                    self.options.timestamp_column
150                ))
151            })?;
152
153        // Find optional columns
154        let elev_idx = self
155            .options
156            .elevation_column
157            .as_ref()
158            .and_then(|col| self.find_column(&headers, col));
159
160        let text_idx = self
161            .options
162            .text_column
163            .as_ref()
164            .and_then(|col| self.find_column(&headers, col));
165
166        let tags_idx = self
167            .options
168            .tags_column
169            .as_ref()
170            .and_then(|col| self.find_column(&headers, col));
171
172        let source_name_idx = self
173            .options
174            .source_name_column
175            .as_ref()
176            .and_then(|col| self.find_column(&headers, col));
177
178        let source_type_idx = self
179            .options
180            .source_type_column
181            .as_ref()
182            .and_then(|col| self.find_column(&headers, col));
183
184        let mut builder = NarrativeBuilder::new();
185
186        // Process each record
187        for (row_num, result) in csv_reader.records().enumerate() {
188            let record = result?;
189
190            // Parse required fields
191            let lat: f64 = record
192                .get(lat_idx)
193                .ok_or_else(|| Error::InvalidFormat(format!("missing lat at row {}", row_num)))?
194                .parse()
195                .map_err(|_| Error::InvalidFormat(format!("invalid lat at row {}", row_num)))?;
196
197            let lon: f64 = record
198                .get(lon_idx)
199                .ok_or_else(|| Error::InvalidFormat(format!("missing lon at row {}", row_num)))?
200                .parse()
201                .map_err(|_| Error::InvalidFormat(format!("invalid lon at row {}", row_num)))?;
202
203            let ts_str = record.get(ts_idx).ok_or_else(|| {
204                Error::InvalidFormat(format!("missing timestamp at row {}", row_num))
205            })?;
206
207            let timestamp = Timestamp::parse(ts_str).map_err(|_| {
208                Error::InvalidFormat(format!("invalid timestamp at row {}", row_num))
209            })?;
210
211            // Build location
212            let mut location = Location::new(lat, lon);
213            if let Some(elev_str) = self.get_optional(&record, elev_idx) {
214                if let Ok(elev) = elev_str.parse::<f64>() {
215                    location.elevation = Some(elev);
216                }
217            }
218
219            // Build event
220            let mut event_builder = EventBuilder::new().location(location).timestamp(timestamp);
221
222            // Add optional fields
223            if let Some(text) = self.get_optional(&record, text_idx) {
224                event_builder = event_builder.text(text);
225            }
226
227            if let Some(tags_str) = self.get_optional(&record, tags_idx) {
228                for tag in tags_str.split(',') {
229                    let trimmed = tag.trim();
230                    if !trimmed.is_empty() {
231                        event_builder = event_builder.tag(trimmed);
232                    }
233                }
234            }
235
236            if let Some(source_name) = self.get_optional(&record, source_name_idx) {
237                let source_type = self
238                    .get_optional(&record, source_type_idx)
239                    .and_then(|s| match s.to_lowercase().as_str() {
240                        "article" => Some(SourceType::Article),
241                        "report" => Some(SourceType::Report),
242                        "witness" => Some(SourceType::Witness),
243                        "sensor" => Some(SourceType::Sensor),
244                        _ => None,
245                    })
246                    .unwrap_or(SourceType::Article);
247
248                let mut source = SourceRef::new(source_type);
249                source.title = Some(source_name);
250                event_builder = event_builder.source(source);
251            }
252
253            let event = event_builder.build();
254            builder = builder.event(event);
255        }
256
257        Ok(builder.build())
258    }
259
260    fn export<W: Write>(&self, narrative: &Narrative, writer: W) -> Result<()> {
261        let mut csv_writer = csv::WriterBuilder::new()
262            .delimiter(self.options.delimiter)
263            .from_writer(writer);
264
265        // Write headers if enabled
266        if self.options.include_headers {
267            let mut headers = vec![
268                self.options.lat_column.as_str(),
269                self.options.lon_column.as_str(),
270                self.options.timestamp_column.as_str(),
271            ];
272
273            if let Some(ref col) = self.options.elevation_column {
274                headers.push(col);
275            }
276            if let Some(ref col) = self.options.text_column {
277                headers.push(col);
278            }
279            if let Some(ref col) = self.options.tags_column {
280                headers.push(col);
281            }
282            if let Some(ref col) = self.options.source_name_column {
283                headers.push(col);
284            }
285            if let Some(ref col) = self.options.source_type_column {
286                headers.push(col);
287            }
288
289            csv_writer.write_record(&headers)?;
290        }
291
292        // Write events
293        for event in narrative.events() {
294            let loc = &event.location;
295            let mut record = vec![
296                loc.lat.to_string(),
297                loc.lon.to_string(),
298                event.timestamp.to_rfc3339(),
299            ];
300
301            if self.options.elevation_column.is_some() {
302                record.push(loc.elevation.map(|e| e.to_string()).unwrap_or_default());
303            }
304
305            if self.options.text_column.is_some() {
306                record.push(event.text.clone());
307            }
308
309            if self.options.tags_column.is_some() {
310                record.push(event.tags.join(", "));
311            }
312
313            if self.options.source_name_column.is_some() {
314                record.push(
315                    event
316                        .sources
317                        .first()
318                        .and_then(|s| s.title.clone())
319                        .unwrap_or_default(),
320                );
321            }
322
323            if self.options.source_type_column.is_some() {
324                let type_str = event
325                    .sources
326                    .first()
327                    .map(|s| s.source_type.to_string())
328                    .unwrap_or_default();
329                record.push(type_str.to_string());
330            }
331
332            csv_writer.write_record(&record)?;
333        }
334
335        csv_writer.flush()?;
336        Ok(())
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343    use crate::core::Event;
344
345    #[test]
346    fn test_csv_import_basic() {
347        let csv_data = "lat,lon,timestamp\n\
348                       40.7128,-74.006,2024-01-15T14:30:00Z\n\
349                       34.0522,-118.2437,2024-01-16T10:00:00Z";
350
351        let format = CsvFormat::new();
352        let narrative = format.import_str(csv_data).unwrap();
353
354        assert_eq!(narrative.events().len(), 2);
355        assert_eq!(narrative.events()[0].location.lat, 40.7128);
356        assert_eq!(narrative.events()[1].location.lat, 34.0522);
357    }
358
359    #[test]
360    fn test_csv_import_with_text() {
361        let csv_data = "lat,lon,timestamp,text\n\
362                       40.7128,-74.006,2024-01-15T14:30:00Z,Event in NYC\n\
363                       34.0522,-118.2437,2024-01-16T10:00:00Z,Event in LA";
364
365        let format = CsvFormat::new();
366        let narrative = format.import_str(csv_data).unwrap();
367
368        assert_eq!(narrative.events()[0].text, "Event in NYC");
369        assert_eq!(narrative.events()[1].text, "Event in LA");
370    }
371
372    #[test]
373    fn test_csv_roundtrip() {
374        let event = Event::builder()
375            .location(Location::new(40.7128, -74.006))
376            .timestamp(Timestamp::parse("2024-01-15T14:30:00Z").unwrap())
377            .text("Test event")
378            .tag("tag1")
379            .tag("tag2")
380            .build();
381
382        let narrative = Narrative::builder().event(event).build();
383
384        let format = CsvFormat::new();
385        let exported = format.export_str(&narrative).unwrap();
386        let imported = format.import_str(&exported).unwrap();
387
388        assert_eq!(imported.events().len(), 1);
389        assert_eq!(imported.events()[0].text, "Test event");
390        assert_eq!(imported.events()[0].tags.len(), 2);
391    }
392
393    #[test]
394    fn test_csv_missing_required_column() {
395        let csv_data = "latitude,longitude\n40.7128,-74.006";
396
397        let format = CsvFormat::new();
398        let result = format.import_str(csv_data);
399
400        assert!(result.is_err());
401    }
402
403    #[test]
404    fn test_csv_custom_delimiter() {
405        let tsv_data = "lat\tlon\ttimestamp\n40.7128\t-74.006\t2024-01-15T14:30:00Z";
406
407        let options = CsvOptions {
408            delimiter: b'\t',
409            ..Default::default()
410        };
411        let format = CsvFormat::with_options(options);
412        let narrative = format.import_str(tsv_data).unwrap();
413
414        assert_eq!(narrative.events().len(), 1);
415    }
416}