Skip to main content

dkit_core/format/
csv.rs

1use std::io::{Read, Write};
2
3use indexmap::IndexMap;
4
5use crate::format::{FormatOptions, FormatReader, FormatWriter};
6use crate::value::Value;
7
8/// CSV 문자열 값을 적절한 Value 타입으로 변환
9/// 숫자 패턴이면 Integer/Float로, 그 외는 String으로 변환
10fn infer_value(s: &str) -> Value {
11    if s.is_empty() {
12        return Value::Null;
13    }
14    if let Ok(i) = s.parse::<i64>() {
15        return Value::Integer(i);
16    }
17    if let Ok(f) = s.parse::<f64>() {
18        if f.is_finite() {
19            return Value::Float(f);
20        }
21    }
22    Value::String(s.to_string())
23}
24
25/// CSV 포맷 Reader
26#[derive(Default)]
27pub struct CsvReader {
28    options: FormatOptions,
29}
30
31impl CsvReader {
32    pub fn new(options: FormatOptions) -> Self {
33        Self { options }
34    }
35}
36
37impl CsvReader {
38    fn build_reader<R: Read>(&self, rdr: R) -> csv::Reader<R> {
39        let delimiter = self.options.delimiter.unwrap_or(',') as u8;
40        csv::ReaderBuilder::new()
41            .has_headers(!self.options.no_header)
42            .delimiter(delimiter)
43            .from_reader(rdr)
44    }
45
46    fn records_to_value<R: Read>(&self, mut rdr: csv::Reader<R>) -> anyhow::Result<Value> {
47        let headers: Vec<String> = if self.options.no_header {
48            // 헤더 없는 모드: 첫 레코드를 읽어서 컬럼 수 파악
49            // csv crate가 has_headers(false)이면 헤더를 자동으로 건너뛰지 않음
50            Vec::new()
51        } else {
52            rdr.headers()
53                .map_err(|e| crate::error::DkitError::ParseError {
54                    format: "CSV".to_string(),
55                    source: Box::new(e),
56                })?
57                .iter()
58                .map(|h| h.to_string())
59                .collect()
60        };
61
62        let mut rows = Vec::new();
63
64        for result in rdr.records() {
65            let record = result.map_err(|e| crate::error::DkitError::ParseError {
66                format: "CSV".to_string(),
67                source: Box::new(e),
68            })?;
69
70            let col_names: Vec<String> = if self.options.no_header {
71                (0..record.len()).map(|i| format!("col{i}")).collect()
72            } else {
73                headers.clone()
74            };
75
76            let mut obj = IndexMap::new();
77            for (i, field) in record.iter().enumerate() {
78                let key = col_names
79                    .get(i)
80                    .cloned()
81                    .unwrap_or_else(|| format!("col{i}"));
82                obj.insert(key, infer_value(field));
83            }
84            rows.push(Value::Object(obj));
85        }
86
87        Ok(Value::Array(rows))
88    }
89}
90
91impl FormatReader for CsvReader {
92    fn read(&self, input: &str) -> anyhow::Result<Value> {
93        let rdr = self.build_reader(input.as_bytes());
94        self.records_to_value(rdr)
95    }
96
97    fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value> {
98        let rdr = self.build_reader(reader);
99        self.records_to_value(rdr)
100    }
101}
102
103/// CSV 포맷 Writer
104#[derive(Default)]
105pub struct CsvWriter {
106    options: FormatOptions,
107}
108
109impl CsvWriter {
110    pub fn new(options: FormatOptions) -> Self {
111        Self { options }
112    }
113}
114
115impl CsvWriter {
116    /// 모든 오브젝트에서 고유 키를 순서 보존하며 수집
117    fn collect_headers(rows: &[Value]) -> Vec<String> {
118        let mut headers = IndexMap::new();
119        for row in rows {
120            if let Value::Object(obj) = row {
121                for key in obj.keys() {
122                    headers.entry(key.clone()).or_insert(());
123                }
124            }
125        }
126        headers.into_keys().collect()
127    }
128
129    /// Value를 CSV 셀 문자열로 변환
130    fn value_to_field(v: &Value) -> String {
131        match v {
132            Value::Null => String::new(),
133            Value::Bool(b) => b.to_string(),
134            Value::Integer(n) => n.to_string(),
135            Value::Float(f) => f.to_string(),
136            Value::String(s) => s.clone(),
137            Value::Array(a) => {
138                // 배열은 JSON 형태로 직렬화
139                let parts: Vec<String> = a.iter().map(|v| format!("{v}")).collect();
140                format!("[{}]", parts.join(", "))
141            }
142            Value::Object(o) => {
143                // 오브젝트는 JSON 형태로 직렬화
144                let parts: Vec<String> = o.iter().map(|(k, v)| format!("\"{k}\": {v}")).collect();
145                format!("{{{}}}", parts.join(", "))
146            }
147        }
148    }
149
150    fn write_csv_to<W: Write>(&self, value: &Value, writer: W) -> anyhow::Result<()> {
151        let rows = match value {
152            Value::Array(arr) => arr,
153            _ => {
154                return Err(crate::error::DkitError::WriteError {
155                    format: "CSV".to_string(),
156                    source: "CSV output requires an Array of Objects".into(),
157                }
158                .into());
159            }
160        };
161
162        let delimiter = self.options.delimiter.unwrap_or(',') as u8;
163        let mut wtr = csv::WriterBuilder::new()
164            .delimiter(delimiter)
165            .from_writer(writer);
166
167        let headers = Self::collect_headers(rows);
168
169        // 헤더 쓰기
170        if !self.options.no_header {
171            wtr.write_record(&headers)
172                .map_err(|e| crate::error::DkitError::WriteError {
173                    format: "CSV".to_string(),
174                    source: Box::new(e),
175                })?;
176        }
177
178        // 데이터 행 쓰기
179        for row in rows {
180            let fields: Vec<String> = if let Value::Object(obj) = row {
181                headers
182                    .iter()
183                    .map(|h| obj.get(h).map(Self::value_to_field).unwrap_or_default())
184                    .collect()
185            } else {
186                // 오브젝트가 아닌 경우 단일 필드로 처리
187                vec![Self::value_to_field(row)]
188            };
189
190            wtr.write_record(&fields)
191                .map_err(|e| crate::error::DkitError::WriteError {
192                    format: "CSV".to_string(),
193                    source: Box::new(e),
194                })?;
195        }
196
197        wtr.flush()
198            .map_err(|e| crate::error::DkitError::WriteError {
199                format: "CSV".to_string(),
200                source: Box::new(e),
201            })?;
202
203        Ok(())
204    }
205}
206
207impl FormatWriter for CsvWriter {
208    fn write(&self, value: &Value) -> anyhow::Result<String> {
209        let mut buf = Vec::new();
210        self.write_csv_to(value, &mut buf)?;
211        String::from_utf8(buf).map_err(|e| {
212            crate::error::DkitError::WriteError {
213                format: "CSV".to_string(),
214                source: Box::new(e),
215            }
216            .into()
217        })
218    }
219
220    fn write_to_writer(&self, value: &Value, writer: impl Write) -> anyhow::Result<()> {
221        self.write_csv_to(value, writer)
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    // --- infer_value 테스트 ---
230
231    #[test]
232    fn test_infer_integer() {
233        assert_eq!(infer_value("42"), Value::Integer(42));
234        assert_eq!(infer_value("-7"), Value::Integer(-7));
235        assert_eq!(infer_value("0"), Value::Integer(0));
236    }
237
238    #[test]
239    fn test_infer_float() {
240        assert_eq!(infer_value("3.14"), Value::Float(3.14));
241        assert_eq!(infer_value("-0.5"), Value::Float(-0.5));
242    }
243
244    #[test]
245    fn test_infer_string() {
246        assert_eq!(infer_value("hello"), Value::String("hello".to_string()));
247        assert_eq!(infer_value("true"), Value::String("true".to_string()));
248    }
249
250    #[test]
251    fn test_infer_empty() {
252        assert_eq!(infer_value(""), Value::Null);
253    }
254
255    // --- CsvReader 기본 테스트 ---
256
257    #[test]
258    fn test_read_simple_csv() {
259        let reader = CsvReader::default();
260        let input = "name,age,city\nAlice,30,Seoul\nBob,25,Busan\n";
261        let v = reader.read(input).unwrap();
262        let arr = v.as_array().unwrap();
263        assert_eq!(arr.len(), 2);
264
265        let row0 = arr[0].as_object().unwrap();
266        assert_eq!(row0.get("name"), Some(&Value::String("Alice".to_string())));
267        assert_eq!(row0.get("age"), Some(&Value::Integer(30)));
268        assert_eq!(row0.get("city"), Some(&Value::String("Seoul".to_string())));
269
270        let row1 = arr[1].as_object().unwrap();
271        assert_eq!(row1.get("name"), Some(&Value::String("Bob".to_string())));
272        assert_eq!(row1.get("age"), Some(&Value::Integer(25)));
273        assert_eq!(row1.get("city"), Some(&Value::String("Busan".to_string())));
274    }
275
276    #[test]
277    fn test_read_no_header() {
278        let reader = CsvReader::new(FormatOptions {
279            no_header: true,
280            ..Default::default()
281        });
282        let input = "Alice,30,Seoul\nBob,25,Busan\n";
283        let v = reader.read(input).unwrap();
284        let arr = v.as_array().unwrap();
285        assert_eq!(arr.len(), 2);
286
287        let row0 = arr[0].as_object().unwrap();
288        assert_eq!(row0.get("col0"), Some(&Value::String("Alice".to_string())));
289        assert_eq!(row0.get("col1"), Some(&Value::Integer(30)));
290        assert_eq!(row0.get("col2"), Some(&Value::String("Seoul".to_string())));
291    }
292
293    #[test]
294    fn test_read_custom_delimiter() {
295        let reader = CsvReader::new(FormatOptions {
296            delimiter: Some('\t'),
297            ..Default::default()
298        });
299        let input = "name\tage\nAlice\t30\n";
300        let v = reader.read(input).unwrap();
301        let arr = v.as_array().unwrap();
302        assert_eq!(arr.len(), 1);
303
304        let row = arr[0].as_object().unwrap();
305        assert_eq!(row.get("name"), Some(&Value::String("Alice".to_string())));
306        assert_eq!(row.get("age"), Some(&Value::Integer(30)));
307    }
308
309    #[test]
310    fn test_read_quoted_fields() {
311        let reader = CsvReader::default();
312        let input = "name,description\nAlice,\"Hello, World!\"\nBob,\"He said \"\"hi\"\"\"\n";
313        let v = reader.read(input).unwrap();
314        let arr = v.as_array().unwrap();
315
316        let row0 = arr[0].as_object().unwrap();
317        assert_eq!(
318            row0.get("description"),
319            Some(&Value::String("Hello, World!".to_string()))
320        );
321
322        let row1 = arr[1].as_object().unwrap();
323        assert_eq!(
324            row1.get("description"),
325            Some(&Value::String("He said \"hi\"".to_string()))
326        );
327    }
328
329    #[test]
330    fn test_read_unicode() {
331        let reader = CsvReader::default();
332        let input = "이름,도시\n김철수,서울\n이영희,부산\n";
333        let v = reader.read(input).unwrap();
334        let arr = v.as_array().unwrap();
335        assert_eq!(arr.len(), 2);
336
337        let row0 = arr[0].as_object().unwrap();
338        assert_eq!(row0.get("이름"), Some(&Value::String("김철수".to_string())));
339        assert_eq!(row0.get("도시"), Some(&Value::String("서울".to_string())));
340    }
341
342    #[test]
343    fn test_read_emoji() {
344        let reader = CsvReader::default();
345        let input = "name,emoji\nAlice,🎉\nBob,🚀\n";
346        let v = reader.read(input).unwrap();
347        let arr = v.as_array().unwrap();
348
349        let row0 = arr[0].as_object().unwrap();
350        assert_eq!(row0.get("emoji"), Some(&Value::String("🎉".to_string())));
351    }
352
353    #[test]
354    fn test_read_empty_csv() {
355        let reader = CsvReader::default();
356        let input = "name,age\n";
357        let v = reader.read(input).unwrap();
358        let arr = v.as_array().unwrap();
359        assert!(arr.is_empty());
360    }
361
362    #[test]
363    fn test_read_empty_fields() {
364        let reader = CsvReader::default();
365        let input = "name,age,city\nAlice,,Seoul\n";
366        let v = reader.read(input).unwrap();
367        let arr = v.as_array().unwrap();
368
369        let row = arr[0].as_object().unwrap();
370        assert_eq!(row.get("age"), Some(&Value::Null));
371    }
372
373    #[test]
374    fn test_read_float_values() {
375        let reader = CsvReader::default();
376        let input = "name,score\nAlice,98.5\nBob,87.3\n";
377        let v = reader.read(input).unwrap();
378        let arr = v.as_array().unwrap();
379
380        let row0 = arr[0].as_object().unwrap();
381        assert_eq!(row0.get("score"), Some(&Value::Float(98.5)));
382    }
383
384    #[test]
385    fn test_read_from_reader() {
386        let reader = CsvReader::default();
387        let input = b"name,age\nAlice,30\n";
388        let v = reader.read_from_reader(input.as_slice()).unwrap();
389        let arr = v.as_array().unwrap();
390        assert_eq!(arr.len(), 1);
391    }
392
393    // --- CsvWriter 테스트 ---
394
395    #[test]
396    fn test_write_simple() {
397        let writer = CsvWriter::default();
398        let value = Value::Array(vec![
399            Value::Object({
400                let mut m = IndexMap::new();
401                m.insert("name".to_string(), Value::String("Alice".to_string()));
402                m.insert("age".to_string(), Value::Integer(30));
403                m
404            }),
405            Value::Object({
406                let mut m = IndexMap::new();
407                m.insert("name".to_string(), Value::String("Bob".to_string()));
408                m.insert("age".to_string(), Value::Integer(25));
409                m
410            }),
411        ]);
412
413        let output = writer.write(&value).unwrap();
414        let lines: Vec<&str> = output.trim().split('\n').collect();
415        assert_eq!(lines[0], "name,age");
416        assert_eq!(lines[1], "Alice,30");
417        assert_eq!(lines[2], "Bob,25");
418    }
419
420    #[test]
421    fn test_write_no_header() {
422        let writer = CsvWriter::new(FormatOptions {
423            no_header: true,
424            ..Default::default()
425        });
426        let value = Value::Array(vec![Value::Object({
427            let mut m = IndexMap::new();
428            m.insert("name".to_string(), Value::String("Alice".to_string()));
429            m.insert("age".to_string(), Value::Integer(30));
430            m
431        })]);
432
433        let output = writer.write(&value).unwrap();
434        let lines: Vec<&str> = output.trim().split('\n').collect();
435        assert_eq!(lines.len(), 1);
436        assert_eq!(lines[0], "Alice,30");
437    }
438
439    #[test]
440    fn test_write_custom_delimiter() {
441        let writer = CsvWriter::new(FormatOptions {
442            delimiter: Some('\t'),
443            ..Default::default()
444        });
445        let value = Value::Array(vec![Value::Object({
446            let mut m = IndexMap::new();
447            m.insert("name".to_string(), Value::String("Alice".to_string()));
448            m.insert("age".to_string(), Value::Integer(30));
449            m
450        })]);
451
452        let output = writer.write(&value).unwrap();
453        assert!(output.contains("name\tage"));
454        assert!(output.contains("Alice\t30"));
455    }
456
457    #[test]
458    fn test_write_quoted_fields() {
459        let writer = CsvWriter::default();
460        let value = Value::Array(vec![Value::Object({
461            let mut m = IndexMap::new();
462            m.insert(
463                "desc".to_string(),
464                Value::String("Hello, World!".to_string()),
465            );
466            m
467        })]);
468
469        let output = writer.write(&value).unwrap();
470        assert!(output.contains("\"Hello, World!\""));
471    }
472
473    #[test]
474    fn test_write_null_values() {
475        let writer = CsvWriter::default();
476        let value = Value::Array(vec![Value::Object({
477            let mut m = IndexMap::new();
478            m.insert("name".to_string(), Value::String("Alice".to_string()));
479            m.insert("age".to_string(), Value::Null);
480            m
481        })]);
482
483        let output = writer.write(&value).unwrap();
484        let lines: Vec<&str> = output.trim().split('\n').collect();
485        assert_eq!(lines[1], "Alice,");
486    }
487
488    #[test]
489    fn test_write_unicode() {
490        let writer = CsvWriter::default();
491        let value = Value::Array(vec![Value::Object({
492            let mut m = IndexMap::new();
493            m.insert("이름".to_string(), Value::String("김철수".to_string()));
494            m.insert("도시".to_string(), Value::String("서울".to_string()));
495            m
496        })]);
497
498        let output = writer.write(&value).unwrap();
499        assert!(output.contains("이름"));
500        assert!(output.contains("김철수"));
501    }
502
503    #[test]
504    fn test_write_non_array_error() {
505        let writer = CsvWriter::default();
506        let result = writer.write(&Value::Object(IndexMap::new()));
507        assert!(result.is_err());
508    }
509
510    #[test]
511    fn test_write_to_writer() {
512        let writer = CsvWriter::default();
513        let value = Value::Array(vec![Value::Object({
514            let mut m = IndexMap::new();
515            m.insert("x".to_string(), Value::Integer(1));
516            m
517        })]);
518
519        let mut buf = Vec::new();
520        writer.write_to_writer(&value, &mut buf).unwrap();
521        let output = String::from_utf8(buf).unwrap();
522        assert!(output.contains("x\n1\n") || output.contains("x\r\n1\r\n"));
523    }
524
525    #[test]
526    fn test_write_missing_keys() {
527        let writer = CsvWriter::default();
528        let value = Value::Array(vec![
529            Value::Object({
530                let mut m = IndexMap::new();
531                m.insert("a".to_string(), Value::Integer(1));
532                m.insert("b".to_string(), Value::Integer(2));
533                m
534            }),
535            Value::Object({
536                let mut m = IndexMap::new();
537                m.insert("a".to_string(), Value::Integer(3));
538                // b is missing
539                m
540            }),
541        ]);
542
543        let output = writer.write(&value).unwrap();
544        let lines: Vec<&str> = output.trim().split('\n').collect();
545        assert_eq!(lines[0], "a,b");
546        assert_eq!(lines[1], "1,2");
547        assert_eq!(lines[2], "3,");
548    }
549
550    // --- 왕복 변환 테스트 ---
551
552    #[test]
553    fn test_roundtrip() {
554        let input = "name,age,score\nAlice,30,98.5\nBob,25,87.3\n";
555        let reader = CsvReader::default();
556        let writer = CsvWriter::default();
557
558        let value = reader.read(input).unwrap();
559        let output = writer.write(&value).unwrap();
560        let value2 = reader.read(&output).unwrap();
561
562        assert_eq!(value, value2);
563    }
564
565    #[test]
566    fn test_empty_array_write() {
567        let writer = CsvWriter::default();
568        let value = Value::Array(vec![]);
569        // 빈 배열은 헤더도 데이터도 없으므로 빈 출력
570        let output = writer.write(&value).unwrap();
571        // csv crate may add BOM or trailing bytes; just check no data rows
572        let reader = CsvReader::default();
573        let value2 = reader.read(&output).unwrap();
574        assert_eq!(value2.as_array().unwrap().len(), 0);
575    }
576
577    #[test]
578    fn test_write_bool_values() {
579        let writer = CsvWriter::default();
580        let value = Value::Array(vec![Value::Object({
581            let mut m = IndexMap::new();
582            m.insert("flag".to_string(), Value::Bool(true));
583            m
584        })]);
585
586        let output = writer.write(&value).unwrap();
587        let lines: Vec<&str> = output.trim().split('\n').collect();
588        assert_eq!(lines[1], "true");
589    }
590}