1use super::format::Format;
4use crate::core::{
5 EventBuilder, Location, Narrative, NarrativeBuilder, SourceRef, SourceType, Timestamp,
6};
7use crate::{Error, Result};
8use csv::StringRecord;
9use std::io::{Read, Write};
10
11#[derive(Debug, Clone)]
31pub struct CsvFormat {
32 pub options: CsvOptions,
34}
35
36#[derive(Debug, Clone)]
38pub struct CsvOptions {
39 pub lat_column: String,
41
42 pub lon_column: String,
44
45 pub timestamp_column: String,
47
48 pub elevation_column: Option<String>,
50
51 pub text_column: Option<String>,
53
54 pub tags_column: Option<String>,
56
57 pub source_name_column: Option<String>,
59
60 pub source_type_column: Option<String>,
62
63 pub include_headers: bool,
65
66 pub delimiter: u8,
68}
69
70impl Default for CsvOptions {
71 fn default() -> Self {
72 Self {
73 lat_column: "lat".to_string(),
74 lon_column: "lon".to_string(),
75 timestamp_column: "timestamp".to_string(),
76 elevation_column: Some("elevation".to_string()),
77 text_column: Some("text".to_string()),
78 tags_column: Some("tags".to_string()),
79 source_name_column: Some("source".to_string()),
80 source_type_column: Some("source_type".to_string()),
81 include_headers: true,
82 delimiter: b',',
83 }
84 }
85}
86
87impl Default for CsvFormat {
88 fn default() -> Self {
89 Self::new()
90 }
91}
92
93impl CsvFormat {
94 pub fn new() -> Self {
96 Self {
97 options: CsvOptions::default(),
98 }
99 }
100
101 pub fn with_options(options: CsvOptions) -> Self {
103 Self { options }
104 }
105
106 fn find_column(&self, headers: &StringRecord, name: &str) -> Option<usize> {
108 headers.iter().position(|h| h.eq_ignore_ascii_case(name))
109 }
110
111 fn get_optional(&self, record: &StringRecord, index: Option<usize>) -> Option<String> {
113 index.and_then(|i| record.get(i).filter(|s| !s.is_empty()).map(String::from))
114 }
115}
116
117impl Format for CsvFormat {
118 fn import<R: Read>(&self, reader: R) -> Result<Narrative> {
119 let mut csv_reader = csv::ReaderBuilder::new()
120 .delimiter(self.options.delimiter)
121 .from_reader(reader);
122
123 let headers = csv_reader.headers()?.clone();
124
125 let lat_idx = self
127 .find_column(&headers, &self.options.lat_column)
128 .ok_or_else(|| {
129 Error::InvalidFormat(format!(
130 "missing required column: {}",
131 self.options.lat_column
132 ))
133 })?;
134
135 let lon_idx = self
136 .find_column(&headers, &self.options.lon_column)
137 .ok_or_else(|| {
138 Error::InvalidFormat(format!(
139 "missing required column: {}",
140 self.options.lon_column
141 ))
142 })?;
143
144 let ts_idx = self
145 .find_column(&headers, &self.options.timestamp_column)
146 .ok_or_else(|| {
147 Error::InvalidFormat(format!(
148 "missing required column: {}",
149 self.options.timestamp_column
150 ))
151 })?;
152
153 let elev_idx = self
155 .options
156 .elevation_column
157 .as_ref()
158 .and_then(|col| self.find_column(&headers, col));
159
160 let text_idx = self
161 .options
162 .text_column
163 .as_ref()
164 .and_then(|col| self.find_column(&headers, col));
165
166 let tags_idx = self
167 .options
168 .tags_column
169 .as_ref()
170 .and_then(|col| self.find_column(&headers, col));
171
172 let source_name_idx = self
173 .options
174 .source_name_column
175 .as_ref()
176 .and_then(|col| self.find_column(&headers, col));
177
178 let source_type_idx = self
179 .options
180 .source_type_column
181 .as_ref()
182 .and_then(|col| self.find_column(&headers, col));
183
184 let mut builder = NarrativeBuilder::new();
185
186 for (row_num, result) in csv_reader.records().enumerate() {
188 let record = result?;
189
190 let lat: f64 = record
192 .get(lat_idx)
193 .ok_or_else(|| Error::InvalidFormat(format!("missing lat at row {}", row_num)))?
194 .parse()
195 .map_err(|_| Error::InvalidFormat(format!("invalid lat at row {}", row_num)))?;
196
197 let lon: f64 = record
198 .get(lon_idx)
199 .ok_or_else(|| Error::InvalidFormat(format!("missing lon at row {}", row_num)))?
200 .parse()
201 .map_err(|_| Error::InvalidFormat(format!("invalid lon at row {}", row_num)))?;
202
203 let ts_str = record.get(ts_idx).ok_or_else(|| {
204 Error::InvalidFormat(format!("missing timestamp at row {}", row_num))
205 })?;
206
207 let timestamp = Timestamp::parse(ts_str).map_err(|_| {
208 Error::InvalidFormat(format!("invalid timestamp at row {}", row_num))
209 })?;
210
211 let mut location = Location::new(lat, lon);
213 if let Some(elev_str) = self.get_optional(&record, elev_idx) {
214 if let Ok(elev) = elev_str.parse::<f64>() {
215 location.elevation = Some(elev);
216 }
217 }
218
219 let mut event_builder = EventBuilder::new().location(location).timestamp(timestamp);
221
222 if let Some(text) = self.get_optional(&record, text_idx) {
224 event_builder = event_builder.text(text);
225 }
226
227 if let Some(tags_str) = self.get_optional(&record, tags_idx) {
228 for tag in tags_str.split(',') {
229 let trimmed = tag.trim();
230 if !trimmed.is_empty() {
231 event_builder = event_builder.tag(trimmed);
232 }
233 }
234 }
235
236 if let Some(source_name) = self.get_optional(&record, source_name_idx) {
237 let source_type = self
238 .get_optional(&record, source_type_idx)
239 .and_then(|s| match s.to_lowercase().as_str() {
240 "article" => Some(SourceType::Article),
241 "report" => Some(SourceType::Report),
242 "witness" => Some(SourceType::Witness),
243 "sensor" => Some(SourceType::Sensor),
244 _ => None,
245 })
246 .unwrap_or(SourceType::Article);
247
248 let mut source = SourceRef::new(source_type);
249 source.title = Some(source_name);
250 event_builder = event_builder.source(source);
251 }
252
253 let event = event_builder.build();
254 builder = builder.event(event);
255 }
256
257 Ok(builder.build())
258 }
259
260 fn export<W: Write>(&self, narrative: &Narrative, writer: W) -> Result<()> {
261 let mut csv_writer = csv::WriterBuilder::new()
262 .delimiter(self.options.delimiter)
263 .from_writer(writer);
264
265 if self.options.include_headers {
267 let mut headers = vec![
268 self.options.lat_column.as_str(),
269 self.options.lon_column.as_str(),
270 self.options.timestamp_column.as_str(),
271 ];
272
273 if let Some(ref col) = self.options.elevation_column {
274 headers.push(col);
275 }
276 if let Some(ref col) = self.options.text_column {
277 headers.push(col);
278 }
279 if let Some(ref col) = self.options.tags_column {
280 headers.push(col);
281 }
282 if let Some(ref col) = self.options.source_name_column {
283 headers.push(col);
284 }
285 if let Some(ref col) = self.options.source_type_column {
286 headers.push(col);
287 }
288
289 csv_writer.write_record(&headers)?;
290 }
291
292 for event in narrative.events() {
294 let loc = &event.location;
295 let mut record = vec![
296 loc.lat.to_string(),
297 loc.lon.to_string(),
298 event.timestamp.to_rfc3339(),
299 ];
300
301 if self.options.elevation_column.is_some() {
302 record.push(loc.elevation.map(|e| e.to_string()).unwrap_or_default());
303 }
304
305 if self.options.text_column.is_some() {
306 record.push(event.text.clone());
307 }
308
309 if self.options.tags_column.is_some() {
310 record.push(event.tags.join(", "));
311 }
312
313 if self.options.source_name_column.is_some() {
314 record.push(
315 event
316 .sources
317 .first()
318 .and_then(|s| s.title.clone())
319 .unwrap_or_default(),
320 );
321 }
322
323 if self.options.source_type_column.is_some() {
324 let type_str = event
325 .sources
326 .first()
327 .map(|s| s.source_type.to_string())
328 .unwrap_or_default();
329 record.push(type_str.to_string());
330 }
331
332 csv_writer.write_record(&record)?;
333 }
334
335 csv_writer.flush()?;
336 Ok(())
337 }
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343 use crate::core::Event;
344
345 #[test]
346 fn test_csv_import_basic() {
347 let csv_data = "lat,lon,timestamp\n\
348 40.7128,-74.006,2024-01-15T14:30:00Z\n\
349 34.0522,-118.2437,2024-01-16T10:00:00Z";
350
351 let format = CsvFormat::new();
352 let narrative = format.import_str(csv_data).unwrap();
353
354 assert_eq!(narrative.events().len(), 2);
355 assert_eq!(narrative.events()[0].location.lat, 40.7128);
356 assert_eq!(narrative.events()[1].location.lat, 34.0522);
357 }
358
359 #[test]
360 fn test_csv_import_with_text() {
361 let csv_data = "lat,lon,timestamp,text\n\
362 40.7128,-74.006,2024-01-15T14:30:00Z,Event in NYC\n\
363 34.0522,-118.2437,2024-01-16T10:00:00Z,Event in LA";
364
365 let format = CsvFormat::new();
366 let narrative = format.import_str(csv_data).unwrap();
367
368 assert_eq!(narrative.events()[0].text, "Event in NYC");
369 assert_eq!(narrative.events()[1].text, "Event in LA");
370 }
371
372 #[test]
373 fn test_csv_roundtrip() {
374 let event = Event::builder()
375 .location(Location::new(40.7128, -74.006))
376 .timestamp(Timestamp::parse("2024-01-15T14:30:00Z").unwrap())
377 .text("Test event")
378 .tag("tag1")
379 .tag("tag2")
380 .build();
381
382 let narrative = Narrative::builder().event(event).build();
383
384 let format = CsvFormat::new();
385 let exported = format.export_str(&narrative).unwrap();
386 let imported = format.import_str(&exported).unwrap();
387
388 assert_eq!(imported.events().len(), 1);
389 assert_eq!(imported.events()[0].text, "Test event");
390 assert_eq!(imported.events()[0].tags.len(), 2);
391 }
392
393 #[test]
394 fn test_csv_missing_required_column() {
395 let csv_data = "latitude,longitude\n40.7128,-74.006";
396
397 let format = CsvFormat::new();
398 let result = format.import_str(csv_data);
399
400 assert!(result.is_err());
401 }
402
403 #[test]
404 fn test_csv_custom_delimiter() {
405 let tsv_data = "lat\tlon\ttimestamp\n40.7128\t-74.006\t2024-01-15T14:30:00Z";
406
407 let options = CsvOptions {
408 delimiter: b'\t',
409 ..Default::default()
410 };
411 let format = CsvFormat::with_options(options);
412 let narrative = format.import_str(tsv_data).unwrap();
413
414 assert_eq!(narrative.events().len(), 1);
415 }
416}