1use std::io::{BufRead, Read, Write};
2
3use crate::format::{FormatReader, FormatWriter};
4use crate::value::Value;
5
6use super::json::{from_json_value, to_json_value};
7
8pub struct JsonlReader;
13
14impl JsonlReader {
15 fn parse_lines(&self, input: &str) -> anyhow::Result<Value> {
16 let mut items = Vec::new();
17 for (line_num, line) in input.lines().enumerate() {
18 let trimmed = line.trim();
19 if trimmed.is_empty() {
20 continue;
21 }
22 let json_val: serde_json::Value =
23 serde_json::from_str(trimmed).map_err(|e| crate::error::DkitError::ParseError {
24 format: "JSONL".to_string(),
25 source: Box::new(std::io::Error::new(
26 std::io::ErrorKind::InvalidData,
27 format!("line {}: {e}", line_num + 1),
28 )),
29 })?;
30 items.push(from_json_value(json_val));
31 }
32 Ok(Value::Array(items))
33 }
34}
35
36impl FormatReader for JsonlReader {
37 fn read(&self, input: &str) -> anyhow::Result<Value> {
38 self.parse_lines(input)
39 }
40
41 fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value> {
42 let buf_reader = std::io::BufReader::new(reader);
43 let mut items = Vec::new();
44 for (line_num, line_result) in buf_reader.lines().enumerate() {
45 let line = line_result.map_err(|e| crate::error::DkitError::ParseError {
46 format: "JSONL".to_string(),
47 source: Box::new(e),
48 })?;
49 let trimmed = line.trim().to_string();
50 if trimmed.is_empty() {
51 continue;
52 }
53 let json_val: serde_json::Value = serde_json::from_str(&trimmed).map_err(|e| {
54 crate::error::DkitError::ParseError {
55 format: "JSONL".to_string(),
56 source: Box::new(std::io::Error::new(
57 std::io::ErrorKind::InvalidData,
58 format!("line {}: {e}", line_num + 1),
59 )),
60 }
61 })?;
62 items.push(from_json_value(json_val));
63 }
64 Ok(Value::Array(items))
65 }
66}
67
68pub struct JsonlWriter;
73
74impl FormatWriter for JsonlWriter {
75 fn write(&self, value: &Value) -> anyhow::Result<String> {
76 let mut output = String::new();
77 match value {
78 Value::Array(items) => {
79 for item in items {
80 let json_val = to_json_value(item);
81 let line = serde_json::to_string(&json_val).map_err(|e| {
82 crate::error::DkitError::WriteError {
83 format: "JSONL".to_string(),
84 source: Box::new(e),
85 }
86 })?;
87 output.push_str(&line);
88 output.push('\n');
89 }
90 }
91 other => {
92 let json_val = to_json_value(other);
93 let line = serde_json::to_string(&json_val).map_err(|e| {
94 crate::error::DkitError::WriteError {
95 format: "JSONL".to_string(),
96 source: Box::new(e),
97 }
98 })?;
99 output.push_str(&line);
100 output.push('\n');
101 }
102 }
103 Ok(output)
104 }
105
106 fn write_to_writer(&self, value: &Value, mut writer: impl Write) -> anyhow::Result<()> {
107 match value {
108 Value::Array(items) => {
109 for item in items {
110 let json_val = to_json_value(item);
111 serde_json::to_writer(&mut writer, &json_val).map_err(|e| {
112 crate::error::DkitError::WriteError {
113 format: "JSONL".to_string(),
114 source: Box::new(e),
115 }
116 })?;
117 writer
118 .write_all(b"\n")
119 .map_err(|e| crate::error::DkitError::WriteError {
120 format: "JSONL".to_string(),
121 source: Box::new(e),
122 })?;
123 }
124 }
125 other => {
126 let json_val = to_json_value(other);
127 serde_json::to_writer(&mut writer, &json_val).map_err(|e| {
128 crate::error::DkitError::WriteError {
129 format: "JSONL".to_string(),
130 source: Box::new(e),
131 }
132 })?;
133 writer
134 .write_all(b"\n")
135 .map_err(|e| crate::error::DkitError::WriteError {
136 format: "JSONL".to_string(),
137 source: Box::new(e),
138 })?;
139 }
140 }
141 Ok(())
142 }
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148 use indexmap::IndexMap;
149
150 #[test]
153 fn test_read_basic() {
154 let reader = JsonlReader;
155 let input = r#"{"name":"Alice","age":30}
156{"name":"Bob","age":25}"#;
157 let result = reader.read(input).unwrap();
158 let arr = result.as_array().unwrap();
159 assert_eq!(arr.len(), 2);
160 assert_eq!(
161 arr[0].as_object().unwrap().get("name"),
162 Some(&Value::String("Alice".to_string()))
163 );
164 assert_eq!(
165 arr[1].as_object().unwrap().get("age"),
166 Some(&Value::Integer(25))
167 );
168 }
169
170 #[test]
171 fn test_read_skip_empty_lines() {
172 let reader = JsonlReader;
173 let input = r#"{"a":1}
174
175{"b":2}
176
177"#;
178 let result = reader.read(input).unwrap();
179 let arr = result.as_array().unwrap();
180 assert_eq!(arr.len(), 2);
181 }
182
183 #[test]
184 fn test_read_single_line() {
185 let reader = JsonlReader;
186 let input = r#"{"key":"value"}"#;
187 let result = reader.read(input).unwrap();
188 let arr = result.as_array().unwrap();
189 assert_eq!(arr.len(), 1);
190 }
191
192 #[test]
193 fn test_read_empty_input() {
194 let reader = JsonlReader;
195 let result = reader.read("").unwrap();
196 let arr = result.as_array().unwrap();
197 assert!(arr.is_empty());
198 }
199
200 #[test]
201 fn test_read_only_empty_lines() {
202 let reader = JsonlReader;
203 let result = reader.read("\n\n\n").unwrap();
204 let arr = result.as_array().unwrap();
205 assert!(arr.is_empty());
206 }
207
208 #[test]
209 fn test_read_various_json_types() {
210 let reader = JsonlReader;
211 let input = "42\n\"hello\"\ntrue\nnull\n[1,2,3]";
212 let result = reader.read(input).unwrap();
213 let arr = result.as_array().unwrap();
214 assert_eq!(arr.len(), 5);
215 assert_eq!(arr[0], Value::Integer(42));
216 assert_eq!(arr[1], Value::String("hello".to_string()));
217 assert_eq!(arr[2], Value::Bool(true));
218 assert_eq!(arr[3], Value::Null);
219 assert_eq!(arr[4].as_array().unwrap().len(), 3);
220 }
221
222 #[test]
223 fn test_read_malformed_line_error_with_line_number() {
224 let reader = JsonlReader;
225 let input = r#"{"a":1}
226{invalid json}
227{"b":2}"#;
228 let err = reader.read(input).unwrap_err();
229 let msg = err.to_string();
230 assert!(msg.contains("JSONL"));
231 assert!(msg.contains("line 2"));
232 }
233
234 #[test]
235 fn test_read_from_reader() {
236 let reader = JsonlReader;
237 let input = b"{\"x\":1}\n{\"x\":2}\n";
238 let result = reader.read_from_reader(&input[..]).unwrap();
239 let arr = result.as_array().unwrap();
240 assert_eq!(arr.len(), 2);
241 }
242
243 #[test]
244 fn test_read_whitespace_trimmed() {
245 let reader = JsonlReader;
246 let input = " {\"a\":1} \n {\"b\":2} ";
247 let result = reader.read(input).unwrap();
248 let arr = result.as_array().unwrap();
249 assert_eq!(arr.len(), 2);
250 }
251
252 #[test]
253 fn test_read_unicode() {
254 let reader = JsonlReader;
255 let input = r#"{"emoji":"🎉","korean":"한글"}"#;
256 let result = reader.read(input).unwrap();
257 let arr = result.as_array().unwrap();
258 let obj = arr[0].as_object().unwrap();
259 assert_eq!(obj.get("emoji"), Some(&Value::String("🎉".to_string())));
260 assert_eq!(obj.get("korean"), Some(&Value::String("한글".to_string())));
261 }
262
263 #[test]
266 fn test_write_array() {
267 let writer = JsonlWriter;
268 let value = Value::Array(vec![
269 Value::Object({
270 let mut m = IndexMap::new();
271 m.insert("name".to_string(), Value::String("Alice".to_string()));
272 m.insert("age".to_string(), Value::Integer(30));
273 m
274 }),
275 Value::Object({
276 let mut m = IndexMap::new();
277 m.insert("name".to_string(), Value::String("Bob".to_string()));
278 m.insert("age".to_string(), Value::Integer(25));
279 m
280 }),
281 ]);
282 let output = writer.write(&value).unwrap();
283 let lines: Vec<&str> = output.trim_end().split('\n').collect();
284 assert_eq!(lines.len(), 2);
285 let parsed0: serde_json::Value = serde_json::from_str(lines[0]).unwrap();
287 assert_eq!(parsed0["name"], "Alice");
288 assert_eq!(parsed0["age"], 30);
289 let parsed1: serde_json::Value = serde_json::from_str(lines[1]).unwrap();
290 assert_eq!(parsed1["name"], "Bob");
291 assert_eq!(parsed1["age"], 25);
292 }
293
294 #[test]
295 fn test_write_empty_array() {
296 let writer = JsonlWriter;
297 let output = writer.write(&Value::Array(vec![])).unwrap();
298 assert_eq!(output, "");
299 }
300
301 #[test]
302 fn test_write_non_array() {
303 let writer = JsonlWriter;
304 let output = writer.write(&Value::Integer(42)).unwrap();
305 assert_eq!(output, "42\n");
306 }
307
308 #[test]
309 fn test_write_to_writer() {
310 let writer = JsonlWriter;
311 let value = Value::Array(vec![Value::Integer(1), Value::Integer(2)]);
312 let mut buf = Vec::new();
313 writer.write_to_writer(&value, &mut buf).unwrap();
314 assert_eq!(String::from_utf8(buf).unwrap(), "1\n2\n");
315 }
316
317 #[test]
320 fn test_roundtrip() {
321 let original = Value::Array(vec![
322 Value::Object({
323 let mut m = IndexMap::new();
324 m.insert("id".to_string(), Value::Integer(1));
325 m.insert("name".to_string(), Value::String("test".to_string()));
326 m.insert("active".to_string(), Value::Bool(true));
327 m
328 }),
329 Value::Object({
330 let mut m = IndexMap::new();
331 m.insert("id".to_string(), Value::Integer(2));
332 m.insert("name".to_string(), Value::String("other".to_string()));
333 m.insert("active".to_string(), Value::Bool(false));
334 m
335 }),
336 ]);
337
338 let writer = JsonlWriter;
339 let written = writer.write(&original).unwrap();
340
341 let reader = JsonlReader;
342 let parsed = reader.read(&written).unwrap();
343
344 assert_eq!(original, parsed);
345 }
346
347 #[test]
348 fn test_roundtrip_nested() {
349 let original = Value::Array(vec![Value::Object({
350 let mut m = IndexMap::new();
351 m.insert(
352 "data".to_string(),
353 Value::Array(vec![Value::Integer(1), Value::Integer(2)]),
354 );
355 m.insert(
356 "nested".to_string(),
357 Value::Object({
358 let mut inner = IndexMap::new();
359 inner.insert("key".to_string(), Value::String("val".to_string()));
360 inner
361 }),
362 );
363 m
364 })]);
365
366 let writer = JsonlWriter;
367 let written = writer.write(&original).unwrap();
368 let reader = JsonlReader;
369 let parsed = reader.read(&written).unwrap();
370 assert_eq!(original, parsed);
371 }
372
373 #[test]
376 fn test_large_input() {
377 let lines: Vec<String> = (0..1000)
378 .map(|i| format!(r#"{{"id":{i},"value":"item_{i}"}}"#))
379 .collect();
380 let input = lines.join("\n");
381
382 let reader = JsonlReader;
383 let result = reader.read(&input).unwrap();
384 let arr = result.as_array().unwrap();
385 assert_eq!(arr.len(), 1000);
386 assert_eq!(
387 arr[999].as_object().unwrap().get("id"),
388 Some(&Value::Integer(999))
389 );
390 }
391}