1use std::collections::BTreeMap;
10use std::path::Path;
11
12use zenith_core::DataContext;
13
14#[derive(Debug)]
18pub struct DataInputError {
19 pub message: String,
21}
22
23impl DataInputError {
24 fn new(msg: impl Into<String>) -> Self {
25 Self {
26 message: msg.into(),
27 }
28 }
29}
30
31impl std::fmt::Display for DataInputError {
32 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33 f.write_str(&self.message)
34 }
35}
36
37pub fn load_data_context(path: &Path) -> Result<DataContext, DataInputError> {
54 let ext = path
55 .extension()
56 .and_then(|e| e.to_str())
57 .unwrap_or("")
58 .to_ascii_lowercase();
59
60 match ext.as_str() {
61 "json" => load_from_json(path),
62 "csv" => load_from_csv(path),
63 other => Err(DataInputError::new(format!(
64 "--data: unsupported file extension '.{other}'; expected .json or .csv"
65 ))),
66 }
67}
68
69fn load_from_json(path: &Path) -> Result<DataContext, DataInputError> {
72 let bytes = std::fs::read(path).map_err(|e| {
73 DataInputError::new(format!("--data: cannot read '{}': {}", path.display(), e))
74 })?;
75 let text = std::str::from_utf8(&bytes).map_err(|e| {
76 DataInputError::new(format!(
77 "--data: '{}' is not valid UTF-8: {}",
78 path.display(),
79 e
80 ))
81 })?;
82 let value: serde_json::Value = serde_json::from_str(text).map_err(|e| {
83 DataInputError::new(format!(
84 "--data: '{}' is not valid JSON: {}",
85 path.display(),
86 e
87 ))
88 })?;
89
90 let obj = match value {
92 serde_json::Value::Object(map) => map,
93 serde_json::Value::Array(arr) => {
94 let first = arr.into_iter().next().ok_or_else(|| {
95 DataInputError::new(format!(
96 "--data: '{}' is an empty JSON array; expected a non-empty array or object",
97 path.display()
98 ))
99 })?;
100 match first {
101 serde_json::Value::Object(map) => map,
102 other => {
103 return Err(DataInputError::new(format!(
104 "--data: first element of '{}' is {} not an object",
105 path.display(),
106 json_kind_name(&other)
107 )));
108 }
109 }
110 }
111 other => {
112 return Err(DataInputError::new(format!(
113 "--data: '{}' contains {} not a JSON object or array",
114 path.display(),
115 json_kind_name(&other)
116 )));
117 }
118 };
119
120 let mut fields: BTreeMap<String, String> = BTreeMap::new();
121 let mut arrays: BTreeMap<String, Vec<String>> = BTreeMap::new();
122 flatten_object(&obj, String::new(), &mut fields, &mut arrays);
123 Ok(DataContext { fields, arrays })
124}
125
126fn flatten_object(
134 obj: &serde_json::Map<String, serde_json::Value>,
135 prefix: String,
136 out_fields: &mut BTreeMap<String, String>,
137 out_arrays: &mut BTreeMap<String, Vec<String>>,
138) {
139 for (key, val) in obj {
140 let path = if prefix.is_empty() {
141 key.clone()
142 } else {
143 format!("{prefix}.{key}")
144 };
145 match val {
146 serde_json::Value::Object(inner) => {
147 flatten_object(inner, path, out_fields, out_arrays);
148 }
149 serde_json::Value::Array(arr) => {
150 let strings: Vec<String> = arr
152 .iter()
153 .filter_map(|e| match e {
154 serde_json::Value::Number(n) => Some(n.to_string()),
155 serde_json::Value::String(s) => Some(s.clone()),
156 serde_json::Value::Bool(b) => Some(b.to_string()),
157 serde_json::Value::Null => Some(String::new()),
158 _ => None,
159 })
160 .collect();
161 if !strings.is_empty() {
162 out_arrays.insert(path, strings);
163 }
164 }
165 serde_json::Value::String(s) => {
166 out_fields.insert(path, s.clone());
167 }
168 serde_json::Value::Number(n) => {
169 out_fields.insert(path, n.to_string());
170 }
171 serde_json::Value::Bool(b) => {
172 out_fields.insert(path, b.to_string());
173 }
174 serde_json::Value::Null => {
175 out_fields.insert(path, String::new());
176 }
177 }
178 }
179}
180
181fn json_kind_name(v: &serde_json::Value) -> &'static str {
183 match v {
184 serde_json::Value::Null => "null",
185 serde_json::Value::Bool(_) => "a boolean",
186 serde_json::Value::Number(_) => "a number",
187 serde_json::Value::String(_) => "a string",
188 serde_json::Value::Array(_) => "an array",
189 serde_json::Value::Object(_) => "an object",
190 }
191}
192
193fn load_from_csv(path: &Path) -> Result<DataContext, DataInputError> {
196 let bytes = std::fs::read(path).map_err(|e| {
197 DataInputError::new(format!("--data: cannot read '{}': {}", path.display(), e))
198 })?;
199 let text = std::str::from_utf8(&bytes).map_err(|e| {
200 DataInputError::new(format!(
201 "--data: '{}' is not valid UTF-8: {}",
202 path.display(),
203 e
204 ))
205 })?;
206
207 let mut reader = csv::ReaderBuilder::new()
210 .flexible(true)
211 .from_reader(text.as_bytes());
212 let headers = reader
213 .headers()
214 .map_err(|e| {
215 DataInputError::new(format!(
216 "--data: CSV header error in '{}': {}",
217 path.display(),
218 e
219 ))
220 })?
221 .clone();
222
223 let mut all_records: Vec<csv::StringRecord> = Vec::new();
225 for result in reader.records() {
226 let record = result.map_err(|e| {
227 DataInputError::new(format!(
228 "--data: CSV parse error in '{}': {}",
229 path.display(),
230 e
231 ))
232 })?;
233 all_records.push(record);
234 }
235
236 if all_records.is_empty() {
238 return Err(DataInputError::new(format!(
239 "--data: '{}' has a header but no data rows",
240 path.display()
241 )));
242 }
243
244 let fields: BTreeMap<String, String> = all_records
247 .first()
248 .map(|first_record| {
249 headers
250 .iter()
251 .zip(first_record.iter())
252 .map(|(h, v)| (h.to_owned(), v.to_owned()))
253 .collect()
254 })
255 .unwrap_or_default();
256
257 let mut arrays: BTreeMap<String, Vec<String>> = BTreeMap::new();
261 for (col_idx, header) in headers.iter().enumerate() {
262 let column: Vec<String> = all_records
263 .iter()
264 .map(|rec| rec.get(col_idx).unwrap_or("").to_owned())
265 .collect();
266 arrays.insert(header.to_owned(), column);
267 }
268
269 Ok(DataContext { fields, arrays })
270}
271
272#[cfg(test)]
275mod tests {
276 use super::*;
277 use std::io::Write;
278
279 fn as_strs(arr: Option<&[String]>) -> Option<Vec<&str>> {
282 arr.map(|a| a.iter().map(String::as_str).collect())
283 }
284
285 fn write_temp(suffix: &str, content: &[u8]) -> (tempfile::TempDir, std::path::PathBuf) {
286 let dir = tempfile::TempDir::new().unwrap();
287 let path = dir.path().join(format!("data{suffix}"));
288 std::fs::File::create(&path)
289 .unwrap()
290 .write_all(content)
291 .unwrap();
292 (dir, path)
293 }
294
295 #[test]
298 fn json_flat_object_fields() {
299 let (_dir, path) = write_temp(".json", br#"{"name": "Alice", "age": 30, "active": true}"#);
300 let ctx = load_data_context(&path).unwrap();
301 assert_eq!(ctx.get("name"), Some("Alice"));
302 assert_eq!(ctx.get("age"), Some("30"));
303 assert_eq!(ctx.get("active"), Some("true"));
304 }
305
306 #[test]
307 fn json_null_becomes_empty_string() {
308 let (_dir, path) = write_temp(".json", br#"{"x": null}"#);
309 let ctx = load_data_context(&path).unwrap();
310 assert_eq!(ctx.get("x"), Some(""));
311 }
312
313 #[test]
316 fn json_nested_object_flattens() {
317 let (_dir, path) = write_temp(
318 ".json",
319 br#"{"revenue": {"total": 42, "tax": 3.5}, "label": "Q1"}"#,
320 );
321 let ctx = load_data_context(&path).unwrap();
322 assert_eq!(ctx.get("revenue.total"), Some("42"));
323 assert_eq!(ctx.get("revenue.tax"), Some("3.5"));
324 assert_eq!(ctx.get("label"), Some("Q1"));
325 assert_eq!(ctx.get("revenue"), None);
327 }
328
329 #[test]
332 fn json_nested_array_is_skipped() {
333 let (_dir, path) = write_temp(".json", br#"{"tags": [1, 2, 3], "val": "ok"}"#);
334 let ctx = load_data_context(&path).unwrap();
335 assert_eq!(ctx.get("val"), Some("ok"));
336 assert_eq!(ctx.get("tags"), None);
337 }
338
339 #[test]
342 fn json_array_first_element_used() {
343 let (_dir, path) = write_temp(
344 ".json",
345 br##"[{"color": "#ff0000"}, {"color": "#00ff00"}]"##,
346 );
347 let ctx = load_data_context(&path).unwrap();
348 assert_eq!(ctx.get("color"), Some("#ff0000"));
349 }
350
351 #[test]
352 fn json_empty_array_is_error() {
353 let (_dir, path) = write_temp(".json", b"[]");
354 let err = load_data_context(&path).unwrap_err();
355 assert!(
356 err.message.contains("empty JSON array"),
357 "expected 'empty JSON array' in error; got: {}",
358 err.message
359 );
360 }
361
362 #[test]
363 fn json_array_non_object_first_element_is_error() {
364 let (_dir, path) = write_temp(".json", b"[42]");
365 let err = load_data_context(&path).unwrap_err();
366 assert!(
367 err.message.contains("not an object"),
368 "expected 'not an object' in error; got: {}",
369 err.message
370 );
371 }
372
373 #[test]
374 fn json_top_level_scalar_is_error() {
375 let (_dir, path) = write_temp(".json", b"\"hello\"");
376 let err = load_data_context(&path).unwrap_err();
377 assert!(
378 err.message.contains("not a JSON object or array"),
379 "expected 'not a JSON object or array' in error; got: {}",
380 err.message
381 );
382 }
383
384 #[test]
387 fn csv_header_and_first_row() {
388 let (_dir, path) = write_temp(".csv", b"name,city\nAlice,Wonderland\nBob,Nowhere");
389 let ctx = load_data_context(&path).unwrap();
390 assert_eq!(ctx.get("name"), Some("Alice"));
391 assert_eq!(ctx.get("city"), Some("Wonderland"));
392 }
393
394 #[test]
395 fn csv_no_data_rows_is_error() {
396 let (_dir, path) = write_temp(".csv", b"name,city\n");
397 let err = load_data_context(&path).unwrap_err();
398 assert!(
399 err.message.contains("no data rows"),
400 "expected 'no data rows' in error; got: {}",
401 err.message
402 );
403 }
404
405 #[test]
408 fn unknown_extension_is_error() {
409 let (_dir, path) = write_temp(".toml", b"key = \"val\"");
410 let err = load_data_context(&path).unwrap_err();
411 assert!(
412 err.message.contains("unsupported file extension"),
413 "expected 'unsupported file extension' in error; got: {}",
414 err.message
415 );
416 }
417
418 #[test]
421 fn json_fields_are_sorted() {
422 let (_dir, path) = write_temp(".json", br#"{"z": "last", "a": "first", "m": "middle"}"#);
423 let ctx = load_data_context(&path).unwrap();
424 let keys: Vec<&str> = ctx.fields.keys().map(String::as_str).collect();
425 assert_eq!(keys, vec!["a", "m", "z"]);
426 }
427
428 #[test]
431 fn json_array_value_populates_arrays() {
432 let (_dir, path) = write_temp(".json", br#"{"sales": [12, 18, 15]}"#);
433 let ctx = load_data_context(&path).unwrap();
434 assert_eq!(
435 as_strs(ctx.get_array("sales")),
436 Some(vec!["12", "18", "15"]),
437 "numeric JSON array must populate arrays map"
438 );
439 assert_eq!(ctx.get("sales"), None);
441 }
442
443 #[test]
444 fn json_array_with_mixed_scalars() {
445 let (_dir, path) = write_temp(".json", br#"{"vals": [1, "two", true, null]}"#);
446 let ctx = load_data_context(&path).unwrap();
447 assert_eq!(
448 as_strs(ctx.get_array("vals")),
449 Some(vec!["1", "two", "true", ""]),
450 );
451 }
452
453 #[test]
454 fn json_empty_array_is_not_inserted() {
455 let (_dir, path) = write_temp(".json", br#"{"empty": [], "x": "y"}"#);
456 let ctx = load_data_context(&path).unwrap();
457 assert!(
458 ctx.get_array("empty").is_none(),
459 "empty array must not be inserted"
460 );
461 assert_eq!(ctx.get("x"), Some("y"));
462 }
463
464 #[test]
465 fn json_scalar_and_array_coexist() {
466 let (_dir, path) = write_temp(".json", br#"{"name": "Alice", "scores": [10, 20, 30]}"#);
467 let ctx = load_data_context(&path).unwrap();
468 assert_eq!(ctx.get("name"), Some("Alice"));
469 assert_eq!(
470 as_strs(ctx.get_array("scores")),
471 Some(vec!["10", "20", "30"])
472 );
473 }
474
475 #[test]
478 fn csv_all_rows_populate_arrays() {
479 let (_dir, path) = write_temp(".csv", b"month,revenue\nJan,100\nFeb,200\nMar,150");
480 let ctx = load_data_context(&path).unwrap();
481 assert_eq!(ctx.get("month"), Some("Jan"));
483 assert_eq!(ctx.get("revenue"), Some("100"));
484 assert_eq!(
486 as_strs(ctx.get_array("month")),
487 Some(vec!["Jan", "Feb", "Mar"]),
488 );
489 assert_eq!(
490 as_strs(ctx.get_array("revenue")),
491 Some(vec!["100", "200", "150"]),
492 );
493 }
494
495 #[test]
496 fn csv_short_row_pads_with_empty_string() {
497 let (_dir, path) = write_temp(".csv", b"month,revenue\nJan,100\nFeb");
499 let ctx = load_data_context(&path).unwrap();
500 assert_eq!(
501 as_strs(ctx.get_array("revenue")),
502 Some(vec!["100", ""]),
503 "short CSV row must pad missing cells with empty string"
504 );
505 }
506}