1use std::io::{Read, Write};
7
8use serde_json::Value;
9
10use crate::column::Column;
11use crate::dataframe::DataFrame;
12use crate::error::DataFrameError;
13use crate::scalar::Scalar;
14
15impl DataFrame {
16 pub fn to_json(&self) -> Result<String, DataFrameError> {
18 let rows = self.to_json_rows();
19 let val = Value::Array(rows);
20 serde_json::to_string_pretty(&val).map_err(DataFrameError::from)
21 }
22
23 pub fn to_json_writer<W: Write>(&self, writer: W) -> Result<(), DataFrameError> {
25 let rows = self.to_json_rows();
26 let val = Value::Array(rows);
27 serde_json::to_writer_pretty(writer, &val).map_err(DataFrameError::from)
28 }
29
30 pub fn to_json_file(&self, path: &std::path::Path) -> Result<(), DataFrameError> {
32 let file = std::fs::File::create(path)?;
33 let writer = std::io::BufWriter::new(file);
34 self.to_json_writer(writer)
35 }
36
37 pub fn from_json(json: &str) -> Result<Self, DataFrameError> {
39 let val: Value = serde_json::from_str(json)?;
40 Self::from_json_value(&val)
41 }
42
43 pub fn from_json_reader<R: Read>(reader: R) -> Result<Self, DataFrameError> {
45 let val: Value = serde_json::from_reader(reader)?;
46 Self::from_json_value(&val)
47 }
48
49 fn to_json_rows(&self) -> Vec<Value> {
51 let mut rows = Vec::with_capacity(self.height());
52 let names = self.column_names();
53
54 for i in 0..self.height() {
55 let mut map = serde_json::Map::new();
56 for (col_idx, name) in names.iter().enumerate() {
57 let val = self.columns().get(col_idx).and_then(|c| c.get(i));
58 map.insert((*name).to_string(), scalar_to_json(val));
59 }
60 rows.push(Value::Object(map));
61 }
62 rows
63 }
64
65 fn from_json_value(val: &Value) -> Result<Self, DataFrameError> {
67 let arr = match val {
68 Value::Array(a) => a,
69 Value::Null
70 | Value::Bool(_)
71 | Value::Number(_)
72 | Value::String(_)
73 | Value::Object(_) => {
74 return Err(DataFrameError::Other(
75 "expected JSON array of objects".to_string(),
76 ));
77 }
78 };
79
80 if arr.is_empty() {
81 return Ok(Self::empty());
82 }
83
84 #[allow(
86 clippy::indexing_slicing,
87 reason = "arr is non-empty (is_empty() guard above); index 0 is always valid"
88 )]
89 let first = match &arr[0] {
90 Value::Object(m) => m,
91 Value::Null
92 | Value::Bool(_)
93 | Value::Number(_)
94 | Value::String(_)
95 | Value::Array(_) => {
96 return Err(DataFrameError::Other(
97 "expected JSON object as array element".to_string(),
98 ));
99 }
100 };
101
102 let col_names: Vec<String> = first.keys().cloned().collect();
103 let n_rows = arr.len();
104
105 let mut raw_cols: Vec<Vec<Option<&Value>>> = col_names
107 .iter()
108 .map(|_| Vec::with_capacity(n_rows))
109 .collect();
110
111 for row_val in arr {
112 let obj = match row_val {
113 Value::Object(m) => m,
114 Value::Null
115 | Value::Bool(_)
116 | Value::Number(_)
117 | Value::String(_)
118 | Value::Array(_) => {
119 return Err(DataFrameError::Other(
120 "expected JSON object as array element".to_string(),
121 ));
122 }
123 };
124 for (col_idx, name) in col_names.iter().enumerate() {
125 #[allow(
127 clippy::indexing_slicing,
128 reason = "col_idx iterates 0..col_names.len() which equals raw_cols.len(); index is always in bounds"
129 )]
130 raw_cols[col_idx].push(obj.get(name));
131 }
132 }
133
134 let columns: Vec<Column> = col_names
136 .into_iter()
137 .zip(raw_cols)
138 .map(|(name, vals)| infer_column(&name, &vals))
139 .collect();
140
141 DataFrame::new(columns)
142 }
143}
144
145fn scalar_to_json(val: Option<Scalar>) -> Value {
147 match val {
148 None | Some(Scalar::Null) => Value::Null,
149 Some(Scalar::Bool(b)) => Value::Bool(b),
150 Some(Scalar::Int64(n)) => Value::Number(n.into()),
151 Some(Scalar::UInt64(n)) => Value::Number(n.into()),
152 Some(Scalar::Float64(f)) => {
153 serde_json::Number::from_f64(f).map_or(Value::Null, Value::Number)
154 }
155 Some(Scalar::String(s)) => Value::String(s),
156 }
157}
158
159fn infer_column(name: &str, vals: &[Option<&Value>]) -> Column {
161 let first_non_null = vals.iter().find_map(|v| match v {
163 Some(Value::Null) | None => None,
164 Some(inner) => Some(*inner),
165 });
166
167 match first_non_null {
168 Some(Value::Bool(_)) => {
169 let data: Vec<Option<bool>> = vals
170 .iter()
171 .map(|v| match v {
172 Some(Value::Bool(b)) => Some(*b),
173 Some(
174 Value::Null
175 | Value::Number(_)
176 | Value::String(_)
177 | Value::Array(_)
178 | Value::Object(_),
179 )
180 | None => None,
181 })
182 .collect();
183 Column::new_bool(name, data)
184 }
185 Some(Value::Number(first_num)) => {
186 let has_float = vals.iter().any(
188 |v| matches!(v, Some(Value::Number(num)) if num.is_f64() && num.as_i64().is_none()),
189 );
190 if has_float {
191 let data: Vec<Option<f64>> = vals
192 .iter()
193 .map(|v| match v {
194 Some(Value::Number(num)) => num.as_f64(),
195 Some(
196 Value::Null
197 | Value::Bool(_)
198 | Value::String(_)
199 | Value::Array(_)
200 | Value::Object(_),
201 )
202 | None => None,
203 })
204 .collect();
205 Column::new_f64(name, data)
206 } else if first_num.is_u64() && first_num.as_i64().is_none() {
207 let data: Vec<Option<u64>> = vals
209 .iter()
210 .map(|v| match v {
211 Some(Value::Number(num)) => num.as_u64(),
212 Some(
213 Value::Null
214 | Value::Bool(_)
215 | Value::String(_)
216 | Value::Array(_)
217 | Value::Object(_),
218 )
219 | None => None,
220 })
221 .collect();
222 Column::new_u64(name, data)
223 } else {
224 let data: Vec<Option<i64>> = vals
225 .iter()
226 .map(|v| match v {
227 Some(Value::Number(num)) => num.as_i64(),
228 Some(
229 Value::Null
230 | Value::Bool(_)
231 | Value::String(_)
232 | Value::Array(_)
233 | Value::Object(_),
234 )
235 | None => None,
236 })
237 .collect();
238 Column::new_i64(name, data)
239 }
240 }
241 Some(Value::String(_) | Value::Array(_) | Value::Object(_) | Value::Null) | None => {
243 let data: Vec<Option<String>> = vals
244 .iter()
245 .map(|v| match v {
246 Some(Value::String(s)) => Some(s.clone()),
247 Some(Value::Null) | None => None,
248 Some(other) => Some(other.to_string()),
249 })
250 .collect();
251 Column::new_string(name, data)
252 }
253 }
254}
255
256#[cfg(test)]
257mod tests {
258 use super::*;
259
260 #[test]
261 fn roundtrip_json_string() {
262 let df = DataFrame::new(vec![
263 Column::from_strs("name", &["alice", "bob"]),
264 Column::from_i64s("age", vec![30, 25]),
265 ])
266 .unwrap_or_else(|_| unreachable!());
267
268 let json = df.to_json().unwrap_or_else(|_| unreachable!());
269 let df2 = DataFrame::from_json(&json).unwrap_or_else(|_| unreachable!());
270
271 assert_eq!(df2.height(), 2);
272 assert_eq!(df2.width(), 2);
273 }
274
275 #[test]
276 fn from_json_mixed_types() {
277 let json = r#"[
278 {"x": 1, "y": "hello", "z": true},
279 {"x": 2, "y": "world", "z": false}
280 ]"#;
281 let df = DataFrame::from_json(json).unwrap_or_else(|_| unreachable!());
282 assert_eq!(df.height(), 2);
283 assert_eq!(df.width(), 3);
284 }
285
286 #[test]
287 fn from_json_with_nulls() {
288 let json = r#"[
289 {"x": 1, "y": "a"},
290 {"x": null, "y": "b"},
291 {"x": 3, "y": null}
292 ]"#;
293 let df = DataFrame::from_json(json).unwrap_or_else(|_| unreachable!());
294 assert_eq!(df.height(), 3);
295
296 let x = df.column("x").unwrap_or_else(|_| unreachable!());
297 assert_eq!(x.get(0), Some(Scalar::Int64(1)));
298 assert_eq!(x.get(1), Some(Scalar::Null));
299 assert_eq!(x.get(2), Some(Scalar::Int64(3)));
300 }
301
302 #[test]
303 fn from_json_empty_array() {
304 let df = DataFrame::from_json("[]").unwrap_or_else(|_| unreachable!());
305 assert!(df.is_empty());
306 }
307
308 #[test]
309 fn from_json_invalid() {
310 assert!(DataFrame::from_json("not json").is_err());
311 assert!(DataFrame::from_json("42").is_err());
312 }
313
314 #[test]
315 fn from_json_floats() {
316 let json = r#"[{"val": 1.5}, {"val": 2.5}]"#;
317 let df = DataFrame::from_json(json).unwrap_or_else(|_| unreachable!());
318 let col = df.column("val").unwrap_or_else(|_| unreachable!());
319 assert_eq!(col.get(0), Some(Scalar::Float64(1.5)));
320 }
321
322 #[test]
323 fn roundtrip_json_file() {
324 let dir = tempfile::tempdir().unwrap_or_else(|_| unreachable!());
325 let path = dir.path().join("test.json");
326
327 let df = DataFrame::new(vec![
328 Column::from_strs("drug", &["asp", "met"]),
329 Column::from_i64s("n", vec![100, 200]),
330 ])
331 .unwrap_or_else(|_| unreachable!());
332
333 df.to_json_file(&path).unwrap_or_else(|_| unreachable!());
334
335 let file = std::fs::File::open(&path).unwrap_or_else(|_| unreachable!());
336 let reader = std::io::BufReader::new(file);
337 let df2 = DataFrame::from_json_reader(reader).unwrap_or_else(|_| unreachable!());
338
339 assert_eq!(df2.height(), 2);
340 }
341}