Skip to main content

dkit_core/format/
jsonl.rs

1use std::io::{BufRead, Read, Write};
2
3use crate::format::{FormatReader, FormatWriter};
4use crate::value::Value;
5
6use super::json::{from_json_value, to_json_value};
7
8/// JSONL (JSON Lines) 포맷 Reader
9///
10/// 한 줄에 하나의 JSON 객체를 읽어 배열(Value::Array)로 변환한다.
11/// 빈 줄은 무시하고, 파싱 실패 시 줄 번호를 포함한 에러를 반환한다.
12pub struct JsonlReader;
13
14impl JsonlReader {
15    fn parse_lines(&self, input: &str) -> anyhow::Result<Value> {
16        let mut items = Vec::new();
17        for (line_num, line) in input.lines().enumerate() {
18            let trimmed = line.trim();
19            if trimmed.is_empty() {
20                continue;
21            }
22            let json_val: serde_json::Value =
23                serde_json::from_str(trimmed).map_err(|e| crate::error::DkitError::ParseError {
24                    format: "JSONL".to_string(),
25                    source: Box::new(std::io::Error::new(
26                        std::io::ErrorKind::InvalidData,
27                        format!("line {}: {e}", line_num + 1),
28                    )),
29                })?;
30            items.push(from_json_value(json_val));
31        }
32        Ok(Value::Array(items))
33    }
34}
35
36impl FormatReader for JsonlReader {
37    fn read(&self, input: &str) -> anyhow::Result<Value> {
38        self.parse_lines(input)
39    }
40
41    fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value> {
42        let buf_reader = std::io::BufReader::new(reader);
43        let mut items = Vec::new();
44        for (line_num, line_result) in buf_reader.lines().enumerate() {
45            let line = line_result.map_err(|e| crate::error::DkitError::ParseError {
46                format: "JSONL".to_string(),
47                source: Box::new(e),
48            })?;
49            let trimmed = line.trim().to_string();
50            if trimmed.is_empty() {
51                continue;
52            }
53            let json_val: serde_json::Value = serde_json::from_str(&trimmed).map_err(|e| {
54                crate::error::DkitError::ParseError {
55                    format: "JSONL".to_string(),
56                    source: Box::new(std::io::Error::new(
57                        std::io::ErrorKind::InvalidData,
58                        format!("line {}: {e}", line_num + 1),
59                    )),
60                }
61            })?;
62            items.push(from_json_value(json_val));
63        }
64        Ok(Value::Array(items))
65    }
66}
67
68/// JSONL (JSON Lines) 포맷 Writer
69///
70/// Value::Array의 각 원소를 한 줄씩 JSON으로 직렬화한다.
71/// Array가 아닌 값은 단일 줄로 출력한다.
72pub struct JsonlWriter;
73
74impl FormatWriter for JsonlWriter {
75    fn write(&self, value: &Value) -> anyhow::Result<String> {
76        let mut output = String::new();
77        match value {
78            Value::Array(items) => {
79                for item in items {
80                    let json_val = to_json_value(item);
81                    let line = serde_json::to_string(&json_val).map_err(|e| {
82                        crate::error::DkitError::WriteError {
83                            format: "JSONL".to_string(),
84                            source: Box::new(e),
85                        }
86                    })?;
87                    output.push_str(&line);
88                    output.push('\n');
89                }
90            }
91            other => {
92                let json_val = to_json_value(other);
93                let line = serde_json::to_string(&json_val).map_err(|e| {
94                    crate::error::DkitError::WriteError {
95                        format: "JSONL".to_string(),
96                        source: Box::new(e),
97                    }
98                })?;
99                output.push_str(&line);
100                output.push('\n');
101            }
102        }
103        Ok(output)
104    }
105
106    fn write_to_writer(&self, value: &Value, mut writer: impl Write) -> anyhow::Result<()> {
107        match value {
108            Value::Array(items) => {
109                for item in items {
110                    let json_val = to_json_value(item);
111                    serde_json::to_writer(&mut writer, &json_val).map_err(|e| {
112                        crate::error::DkitError::WriteError {
113                            format: "JSONL".to_string(),
114                            source: Box::new(e),
115                        }
116                    })?;
117                    writer
118                        .write_all(b"\n")
119                        .map_err(|e| crate::error::DkitError::WriteError {
120                            format: "JSONL".to_string(),
121                            source: Box::new(e),
122                        })?;
123                }
124            }
125            other => {
126                let json_val = to_json_value(other);
127                serde_json::to_writer(&mut writer, &json_val).map_err(|e| {
128                    crate::error::DkitError::WriteError {
129                        format: "JSONL".to_string(),
130                        source: Box::new(e),
131                    }
132                })?;
133                writer
134                    .write_all(b"\n")
135                    .map_err(|e| crate::error::DkitError::WriteError {
136                        format: "JSONL".to_string(),
137                        source: Box::new(e),
138                    })?;
139            }
140        }
141        Ok(())
142    }
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148    use indexmap::IndexMap;
149
150    // --- JsonlReader 테스트 ---
151
152    #[test]
153    fn test_read_basic() {
154        let reader = JsonlReader;
155        let input = r#"{"name":"Alice","age":30}
156{"name":"Bob","age":25}"#;
157        let result = reader.read(input).unwrap();
158        let arr = result.as_array().unwrap();
159        assert_eq!(arr.len(), 2);
160        assert_eq!(
161            arr[0].as_object().unwrap().get("name"),
162            Some(&Value::String("Alice".to_string()))
163        );
164        assert_eq!(
165            arr[1].as_object().unwrap().get("age"),
166            Some(&Value::Integer(25))
167        );
168    }
169
170    #[test]
171    fn test_read_skip_empty_lines() {
172        let reader = JsonlReader;
173        let input = r#"{"a":1}
174
175{"b":2}
176
177"#;
178        let result = reader.read(input).unwrap();
179        let arr = result.as_array().unwrap();
180        assert_eq!(arr.len(), 2);
181    }
182
183    #[test]
184    fn test_read_single_line() {
185        let reader = JsonlReader;
186        let input = r#"{"key":"value"}"#;
187        let result = reader.read(input).unwrap();
188        let arr = result.as_array().unwrap();
189        assert_eq!(arr.len(), 1);
190    }
191
192    #[test]
193    fn test_read_empty_input() {
194        let reader = JsonlReader;
195        let result = reader.read("").unwrap();
196        let arr = result.as_array().unwrap();
197        assert!(arr.is_empty());
198    }
199
200    #[test]
201    fn test_read_only_empty_lines() {
202        let reader = JsonlReader;
203        let result = reader.read("\n\n\n").unwrap();
204        let arr = result.as_array().unwrap();
205        assert!(arr.is_empty());
206    }
207
208    #[test]
209    fn test_read_various_json_types() {
210        let reader = JsonlReader;
211        let input = "42\n\"hello\"\ntrue\nnull\n[1,2,3]";
212        let result = reader.read(input).unwrap();
213        let arr = result.as_array().unwrap();
214        assert_eq!(arr.len(), 5);
215        assert_eq!(arr[0], Value::Integer(42));
216        assert_eq!(arr[1], Value::String("hello".to_string()));
217        assert_eq!(arr[2], Value::Bool(true));
218        assert_eq!(arr[3], Value::Null);
219        assert_eq!(arr[4].as_array().unwrap().len(), 3);
220    }
221
222    #[test]
223    fn test_read_malformed_line_error_with_line_number() {
224        let reader = JsonlReader;
225        let input = r#"{"a":1}
226{invalid json}
227{"b":2}"#;
228        let err = reader.read(input).unwrap_err();
229        let msg = err.to_string();
230        assert!(msg.contains("JSONL"));
231        assert!(msg.contains("line 2"));
232    }
233
234    #[test]
235    fn test_read_from_reader() {
236        let reader = JsonlReader;
237        let input = b"{\"x\":1}\n{\"x\":2}\n";
238        let result = reader.read_from_reader(&input[..]).unwrap();
239        let arr = result.as_array().unwrap();
240        assert_eq!(arr.len(), 2);
241    }
242
243    #[test]
244    fn test_read_whitespace_trimmed() {
245        let reader = JsonlReader;
246        let input = "  {\"a\":1}  \n  {\"b\":2}  ";
247        let result = reader.read(input).unwrap();
248        let arr = result.as_array().unwrap();
249        assert_eq!(arr.len(), 2);
250    }
251
252    #[test]
253    fn test_read_unicode() {
254        let reader = JsonlReader;
255        let input = r#"{"emoji":"🎉","korean":"한글"}"#;
256        let result = reader.read(input).unwrap();
257        let arr = result.as_array().unwrap();
258        let obj = arr[0].as_object().unwrap();
259        assert_eq!(obj.get("emoji"), Some(&Value::String("🎉".to_string())));
260        assert_eq!(obj.get("korean"), Some(&Value::String("한글".to_string())));
261    }
262
263    // --- JsonlWriter 테스트 ---
264
265    #[test]
266    fn test_write_array() {
267        let writer = JsonlWriter;
268        let value = Value::Array(vec![
269            Value::Object({
270                let mut m = IndexMap::new();
271                m.insert("name".to_string(), Value::String("Alice".to_string()));
272                m.insert("age".to_string(), Value::Integer(30));
273                m
274            }),
275            Value::Object({
276                let mut m = IndexMap::new();
277                m.insert("name".to_string(), Value::String("Bob".to_string()));
278                m.insert("age".to_string(), Value::Integer(25));
279                m
280            }),
281        ]);
282        let output = writer.write(&value).unwrap();
283        let lines: Vec<&str> = output.trim_end().split('\n').collect();
284        assert_eq!(lines.len(), 2);
285        // Each line should be valid JSON containing the expected fields
286        let parsed0: serde_json::Value = serde_json::from_str(lines[0]).unwrap();
287        assert_eq!(parsed0["name"], "Alice");
288        assert_eq!(parsed0["age"], 30);
289        let parsed1: serde_json::Value = serde_json::from_str(lines[1]).unwrap();
290        assert_eq!(parsed1["name"], "Bob");
291        assert_eq!(parsed1["age"], 25);
292    }
293
294    #[test]
295    fn test_write_empty_array() {
296        let writer = JsonlWriter;
297        let output = writer.write(&Value::Array(vec![])).unwrap();
298        assert_eq!(output, "");
299    }
300
301    #[test]
302    fn test_write_non_array() {
303        let writer = JsonlWriter;
304        let output = writer.write(&Value::Integer(42)).unwrap();
305        assert_eq!(output, "42\n");
306    }
307
308    #[test]
309    fn test_write_to_writer() {
310        let writer = JsonlWriter;
311        let value = Value::Array(vec![Value::Integer(1), Value::Integer(2)]);
312        let mut buf = Vec::new();
313        writer.write_to_writer(&value, &mut buf).unwrap();
314        assert_eq!(String::from_utf8(buf).unwrap(), "1\n2\n");
315    }
316
317    // --- 라운드트립 테스트 ---
318
319    #[test]
320    fn test_roundtrip() {
321        let original = Value::Array(vec![
322            Value::Object({
323                let mut m = IndexMap::new();
324                m.insert("id".to_string(), Value::Integer(1));
325                m.insert("name".to_string(), Value::String("test".to_string()));
326                m.insert("active".to_string(), Value::Bool(true));
327                m
328            }),
329            Value::Object({
330                let mut m = IndexMap::new();
331                m.insert("id".to_string(), Value::Integer(2));
332                m.insert("name".to_string(), Value::String("other".to_string()));
333                m.insert("active".to_string(), Value::Bool(false));
334                m
335            }),
336        ]);
337
338        let writer = JsonlWriter;
339        let written = writer.write(&original).unwrap();
340
341        let reader = JsonlReader;
342        let parsed = reader.read(&written).unwrap();
343
344        assert_eq!(original, parsed);
345    }
346
347    #[test]
348    fn test_roundtrip_nested() {
349        let original = Value::Array(vec![Value::Object({
350            let mut m = IndexMap::new();
351            m.insert(
352                "data".to_string(),
353                Value::Array(vec![Value::Integer(1), Value::Integer(2)]),
354            );
355            m.insert(
356                "nested".to_string(),
357                Value::Object({
358                    let mut inner = IndexMap::new();
359                    inner.insert("key".to_string(), Value::String("val".to_string()));
360                    inner
361                }),
362            );
363            m
364        })]);
365
366        let writer = JsonlWriter;
367        let written = writer.write(&original).unwrap();
368        let reader = JsonlReader;
369        let parsed = reader.read(&written).unwrap();
370        assert_eq!(original, parsed);
371    }
372
373    // --- 대용량 테스트 ---
374
375    #[test]
376    fn test_large_input() {
377        let lines: Vec<String> = (0..1000)
378            .map(|i| format!(r#"{{"id":{i},"value":"item_{i}"}}"#))
379            .collect();
380        let input = lines.join("\n");
381
382        let reader = JsonlReader;
383        let result = reader.read(&input).unwrap();
384        let arr = result.as_array().unwrap();
385        assert_eq!(arr.len(), 1000);
386        assert_eq!(
387            arr[999].as_object().unwrap().get("id"),
388            Some(&Value::Integer(999))
389        );
390    }
391}