Skip to main content

zenith_cli/commands/render/
data_input.rs

1//! Data-context loading from JSON or CSV files for `zenith render --data`.
2//!
3//! [`load_data_context`] reads a JSON object/array or CSV file and returns a
4//! [`DataContext`] populated with flat string scalar fields AND named array
5//! columns. JSON nested objects are flattened to dot-paths (`revenue.total`);
6//! JSON arrays become named columns in `arrays`. CSV takes the first data row
7//! for scalar `fields` and ALL rows as per-column `arrays`.
8
9use std::collections::BTreeMap;
10use std::path::Path;
11
12use zenith_core::DataContext;
13
14// ── Error type ─────────────────────────────────────────────────────────────
15
16/// Error produced while loading a data context file.
17#[derive(Debug)]
18pub struct DataInputError {
19    /// Human-readable description of the failure.
20    pub message: String,
21}
22
23impl DataInputError {
24    fn new(msg: impl Into<String>) -> Self {
25        Self {
26            message: msg.into(),
27        }
28    }
29}
30
31impl std::fmt::Display for DataInputError {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        f.write_str(&self.message)
34    }
35}
36
37// ── Public entry point ─────────────────────────────────────────────────────
38
39/// Load a [`DataContext`] from `path`.
40///
41/// The file format is inferred from the extension:
42/// - `.json` — a JSON **object** (used directly) or a JSON **array** (first
43///   element must be an object; empty array or non-object first element →
44///   error). Nested objects are flattened to dot-path keys
45///   (`{"a":{"b":1}}` → `"a.b" => "1"`). Scalar values: strings are used
46///   as-is; numbers and booleans are converted via `to_string`; `null` →
47///   empty string. Arrays nested *inside* a data object are skipped.
48/// - `.csv` — header row gives field names; the **first data row** supplies
49///   values. No data rows → error.
50/// - Any other extension → error.
51///
52/// Returns `Err(DataInputError)` on any I/O, parse, or shape failure.
53pub fn load_data_context(path: &Path) -> Result<DataContext, DataInputError> {
54    let ext = path
55        .extension()
56        .and_then(|e| e.to_str())
57        .unwrap_or("")
58        .to_ascii_lowercase();
59
60    match ext.as_str() {
61        "json" => load_from_json(path),
62        "csv" => load_from_csv(path),
63        other => Err(DataInputError::new(format!(
64            "--data: unsupported file extension '.{other}'; expected .json or .csv"
65        ))),
66    }
67}
68
69// ── JSON loader ────────────────────────────────────────────────────────────
70
71fn load_from_json(path: &Path) -> Result<DataContext, DataInputError> {
72    let bytes = std::fs::read(path).map_err(|e| {
73        DataInputError::new(format!("--data: cannot read '{}': {}", path.display(), e))
74    })?;
75    let text = std::str::from_utf8(&bytes).map_err(|e| {
76        DataInputError::new(format!(
77            "--data: '{}' is not valid UTF-8: {}",
78            path.display(),
79            e
80        ))
81    })?;
82    let value: serde_json::Value = serde_json::from_str(text).map_err(|e| {
83        DataInputError::new(format!(
84            "--data: '{}' is not valid JSON: {}",
85            path.display(),
86            e
87        ))
88    })?;
89
90    // Accept a top-level object or a top-level array (use first element).
91    let obj = match value {
92        serde_json::Value::Object(map) => map,
93        serde_json::Value::Array(arr) => {
94            let first = arr.into_iter().next().ok_or_else(|| {
95                DataInputError::new(format!(
96                    "--data: '{}' is an empty JSON array; expected a non-empty array or object",
97                    path.display()
98                ))
99            })?;
100            match first {
101                serde_json::Value::Object(map) => map,
102                other => {
103                    return Err(DataInputError::new(format!(
104                        "--data: first element of '{}' is {} not an object",
105                        path.display(),
106                        json_kind_name(&other)
107                    )));
108                }
109            }
110        }
111        other => {
112            return Err(DataInputError::new(format!(
113                "--data: '{}' contains {} not a JSON object or array",
114                path.display(),
115                json_kind_name(&other)
116            )));
117        }
118    };
119
120    let mut fields: BTreeMap<String, String> = BTreeMap::new();
121    let mut arrays: BTreeMap<String, Vec<String>> = BTreeMap::new();
122    flatten_object(&obj, String::new(), &mut fields, &mut arrays);
123    Ok(DataContext { fields, arrays })
124}
125
126/// Recursively flatten a JSON object into dot-path scalar keys and array columns.
127///
128/// Scalar values (string, number, bool, null) are written into `out_fields`
129/// under their dot-path key. Array values whose elements are all scalars are
130/// collected into `out_arrays` under the same dot-path key; nested-object or
131/// nested-array elements within an array are silently skipped (the rest of the
132/// array still populates the column).
133fn flatten_object(
134    obj: &serde_json::Map<String, serde_json::Value>,
135    prefix: String,
136    out_fields: &mut BTreeMap<String, String>,
137    out_arrays: &mut BTreeMap<String, Vec<String>>,
138) {
139    for (key, val) in obj {
140        let path = if prefix.is_empty() {
141            key.clone()
142        } else {
143            format!("{prefix}.{key}")
144        };
145        match val {
146            serde_json::Value::Object(inner) => {
147                flatten_object(inner, path, out_fields, out_arrays);
148            }
149            serde_json::Value::Array(arr) => {
150                // Collect scalar elements in order; skip nested objects/arrays.
151                let strings: Vec<String> = arr
152                    .iter()
153                    .filter_map(|e| match e {
154                        serde_json::Value::Number(n) => Some(n.to_string()),
155                        serde_json::Value::String(s) => Some(s.clone()),
156                        serde_json::Value::Bool(b) => Some(b.to_string()),
157                        serde_json::Value::Null => Some(String::new()),
158                        _ => None,
159                    })
160                    .collect();
161                if !strings.is_empty() {
162                    out_arrays.insert(path, strings);
163                }
164            }
165            serde_json::Value::String(s) => {
166                out_fields.insert(path, s.clone());
167            }
168            serde_json::Value::Number(n) => {
169                out_fields.insert(path, n.to_string());
170            }
171            serde_json::Value::Bool(b) => {
172                out_fields.insert(path, b.to_string());
173            }
174            serde_json::Value::Null => {
175                out_fields.insert(path, String::new());
176            }
177        }
178    }
179}
180
181/// Return a short human-readable type name for error messages.
182fn json_kind_name(v: &serde_json::Value) -> &'static str {
183    match v {
184        serde_json::Value::Null => "null",
185        serde_json::Value::Bool(_) => "a boolean",
186        serde_json::Value::Number(_) => "a number",
187        serde_json::Value::String(_) => "a string",
188        serde_json::Value::Array(_) => "an array",
189        serde_json::Value::Object(_) => "an object",
190    }
191}
192
193// ── CSV loader ─────────────────────────────────────────────────────────────
194
195fn load_from_csv(path: &Path) -> Result<DataContext, DataInputError> {
196    let bytes = std::fs::read(path).map_err(|e| {
197        DataInputError::new(format!("--data: cannot read '{}': {}", path.display(), e))
198    })?;
199    let text = std::str::from_utf8(&bytes).map_err(|e| {
200        DataInputError::new(format!(
201            "--data: '{}' is not valid UTF-8: {}",
202            path.display(),
203            e
204        ))
205    })?;
206
207    // Flexible: tolerate rows with fewer/more fields than the header; short
208    // rows are padded per-column below so a series stays category-aligned.
209    let mut reader = csv::ReaderBuilder::new()
210        .flexible(true)
211        .from_reader(text.as_bytes());
212    let headers = reader
213        .headers()
214        .map_err(|e| {
215            DataInputError::new(format!(
216                "--data: CSV header error in '{}': {}",
217                path.display(),
218                e
219            ))
220        })?
221        .clone();
222
223    // Collect ALL data rows so we can build per-column arrays.
224    let mut all_records: Vec<csv::StringRecord> = Vec::new();
225    for result in reader.records() {
226        let record = result.map_err(|e| {
227            DataInputError::new(format!(
228                "--data: CSV parse error in '{}': {}",
229                path.display(),
230                e
231            ))
232        })?;
233        all_records.push(record);
234    }
235
236    // Require at least one data row (preserves the existing documented contract).
237    if all_records.is_empty() {
238        return Err(DataInputError::new(format!(
239            "--data: '{}' has a header but no data rows",
240            path.display()
241        )));
242    }
243
244    // `fields`: first data row only (scalar KPI use — existing behaviour unchanged).
245    // all_records is non-empty: the is_empty() guard above returned early on empty.
246    let fields: BTreeMap<String, String> = all_records
247        .first()
248        .map(|first_record| {
249            headers
250                .iter()
251                .zip(first_record.iter())
252                .map(|(h, v)| (h.to_owned(), v.to_owned()))
253                .collect()
254        })
255        .unwrap_or_default();
256
257    // `arrays`: per-column slices across ALL rows, keyed by header name.
258    // Short rows have missing cells filled with an empty string to keep
259    // per-series length consistent with the category count.
260    let mut arrays: BTreeMap<String, Vec<String>> = BTreeMap::new();
261    for (col_idx, header) in headers.iter().enumerate() {
262        let column: Vec<String> = all_records
263            .iter()
264            .map(|rec| rec.get(col_idx).unwrap_or("").to_owned())
265            .collect();
266        arrays.insert(header.to_owned(), column);
267    }
268
269    Ok(DataContext { fields, arrays })
270}
271
272// ── Unit tests ─────────────────────────────────────────────────────────────
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277    use std::io::Write;
278
279    /// Collect an array accessor's `&[String]` into borrowed `&str`s for ergonomic
280    /// comparison against `&str` literals in assertions.
281    fn as_strs(arr: Option<&[String]>) -> Option<Vec<&str>> {
282        arr.map(|a| a.iter().map(String::as_str).collect())
283    }
284
285    fn write_temp(suffix: &str, content: &[u8]) -> (tempfile::TempDir, std::path::PathBuf) {
286        let dir = tempfile::TempDir::new().unwrap();
287        let path = dir.path().join(format!("data{suffix}"));
288        std::fs::File::create(&path)
289            .unwrap()
290            .write_all(content)
291            .unwrap();
292        (dir, path)
293    }
294
295    // ── JSON: flat object ─────────────────────────────────────────────────
296
297    #[test]
298    fn json_flat_object_fields() {
299        let (_dir, path) = write_temp(".json", br#"{"name": "Alice", "age": 30, "active": true}"#);
300        let ctx = load_data_context(&path).unwrap();
301        assert_eq!(ctx.get("name"), Some("Alice"));
302        assert_eq!(ctx.get("age"), Some("30"));
303        assert_eq!(ctx.get("active"), Some("true"));
304    }
305
306    #[test]
307    fn json_null_becomes_empty_string() {
308        let (_dir, path) = write_temp(".json", br#"{"x": null}"#);
309        let ctx = load_data_context(&path).unwrap();
310        assert_eq!(ctx.get("x"), Some(""));
311    }
312
313    // ── JSON: nested object flattens to dot-paths ─────────────────────────
314
315    #[test]
316    fn json_nested_object_flattens() {
317        let (_dir, path) = write_temp(
318            ".json",
319            br#"{"revenue": {"total": 42, "tax": 3.5}, "label": "Q1"}"#,
320        );
321        let ctx = load_data_context(&path).unwrap();
322        assert_eq!(ctx.get("revenue.total"), Some("42"));
323        assert_eq!(ctx.get("revenue.tax"), Some("3.5"));
324        assert_eq!(ctx.get("label"), Some("Q1"));
325        // Parent key should NOT be inserted.
326        assert_eq!(ctx.get("revenue"), None);
327    }
328
329    // ── JSON: array nested inside object is skipped ───────────────────────
330
331    #[test]
332    fn json_nested_array_is_skipped() {
333        let (_dir, path) = write_temp(".json", br#"{"tags": [1, 2, 3], "val": "ok"}"#);
334        let ctx = load_data_context(&path).unwrap();
335        assert_eq!(ctx.get("val"), Some("ok"));
336        assert_eq!(ctx.get("tags"), None);
337    }
338
339    // ── JSON: top-level array — first element used ────────────────────────
340
341    #[test]
342    fn json_array_first_element_used() {
343        let (_dir, path) = write_temp(
344            ".json",
345            br##"[{"color": "#ff0000"}, {"color": "#00ff00"}]"##,
346        );
347        let ctx = load_data_context(&path).unwrap();
348        assert_eq!(ctx.get("color"), Some("#ff0000"));
349    }
350
351    #[test]
352    fn json_empty_array_is_error() {
353        let (_dir, path) = write_temp(".json", b"[]");
354        let err = load_data_context(&path).unwrap_err();
355        assert!(
356            err.message.contains("empty JSON array"),
357            "expected 'empty JSON array' in error; got: {}",
358            err.message
359        );
360    }
361
362    #[test]
363    fn json_array_non_object_first_element_is_error() {
364        let (_dir, path) = write_temp(".json", b"[42]");
365        let err = load_data_context(&path).unwrap_err();
366        assert!(
367            err.message.contains("not an object"),
368            "expected 'not an object' in error; got: {}",
369            err.message
370        );
371    }
372
373    #[test]
374    fn json_top_level_scalar_is_error() {
375        let (_dir, path) = write_temp(".json", b"\"hello\"");
376        let err = load_data_context(&path).unwrap_err();
377        assert!(
378            err.message.contains("not a JSON object or array"),
379            "expected 'not a JSON object or array' in error; got: {}",
380            err.message
381        );
382    }
383
384    // ── CSV ───────────────────────────────────────────────────────────────
385
386    #[test]
387    fn csv_header_and_first_row() {
388        let (_dir, path) = write_temp(".csv", b"name,city\nAlice,Wonderland\nBob,Nowhere");
389        let ctx = load_data_context(&path).unwrap();
390        assert_eq!(ctx.get("name"), Some("Alice"));
391        assert_eq!(ctx.get("city"), Some("Wonderland"));
392    }
393
394    #[test]
395    fn csv_no_data_rows_is_error() {
396        let (_dir, path) = write_temp(".csv", b"name,city\n");
397        let err = load_data_context(&path).unwrap_err();
398        assert!(
399            err.message.contains("no data rows"),
400            "expected 'no data rows' in error; got: {}",
401            err.message
402        );
403    }
404
405    // ── Unknown extension ─────────────────────────────────────────────────
406
407    #[test]
408    fn unknown_extension_is_error() {
409        let (_dir, path) = write_temp(".toml", b"key = \"val\"");
410        let err = load_data_context(&path).unwrap_err();
411        assert!(
412            err.message.contains("unsupported file extension"),
413            "expected 'unsupported file extension' in error; got: {}",
414            err.message
415        );
416    }
417
418    // ── BTreeMap determinism ──────────────────────────────────────────────
419
420    #[test]
421    fn json_fields_are_sorted() {
422        let (_dir, path) = write_temp(".json", br#"{"z": "last", "a": "first", "m": "middle"}"#);
423        let ctx = load_data_context(&path).unwrap();
424        let keys: Vec<&str> = ctx.fields.keys().map(String::as_str).collect();
425        assert_eq!(keys, vec!["a", "m", "z"]);
426    }
427
428    // ── JSON: array values populate arrays map ────────────────────────────
429
430    #[test]
431    fn json_array_value_populates_arrays() {
432        let (_dir, path) = write_temp(".json", br#"{"sales": [12, 18, 15]}"#);
433        let ctx = load_data_context(&path).unwrap();
434        assert_eq!(
435            as_strs(ctx.get_array("sales")),
436            Some(vec!["12", "18", "15"]),
437            "numeric JSON array must populate arrays map"
438        );
439        // The key must NOT appear in scalar fields.
440        assert_eq!(ctx.get("sales"), None);
441    }
442
443    #[test]
444    fn json_array_with_mixed_scalars() {
445        let (_dir, path) = write_temp(".json", br#"{"vals": [1, "two", true, null]}"#);
446        let ctx = load_data_context(&path).unwrap();
447        assert_eq!(
448            as_strs(ctx.get_array("vals")),
449            Some(vec!["1", "two", "true", ""]),
450        );
451    }
452
453    #[test]
454    fn json_empty_array_is_not_inserted() {
455        let (_dir, path) = write_temp(".json", br#"{"empty": [], "x": "y"}"#);
456        let ctx = load_data_context(&path).unwrap();
457        assert!(
458            ctx.get_array("empty").is_none(),
459            "empty array must not be inserted"
460        );
461        assert_eq!(ctx.get("x"), Some("y"));
462    }
463
464    #[test]
465    fn json_scalar_and_array_coexist() {
466        let (_dir, path) = write_temp(".json", br#"{"name": "Alice", "scores": [10, 20, 30]}"#);
467        let ctx = load_data_context(&path).unwrap();
468        assert_eq!(ctx.get("name"), Some("Alice"));
469        assert_eq!(
470            as_strs(ctx.get_array("scores")),
471            Some(vec!["10", "20", "30"])
472        );
473    }
474
475    // ── CSV: all rows populate arrays map ────────────────────────────────
476
477    #[test]
478    fn csv_all_rows_populate_arrays() {
479        let (_dir, path) = write_temp(".csv", b"month,revenue\nJan,100\nFeb,200\nMar,150");
480        let ctx = load_data_context(&path).unwrap();
481        // Scalar fields: first row only.
482        assert_eq!(ctx.get("month"), Some("Jan"));
483        assert_eq!(ctx.get("revenue"), Some("100"));
484        // Array columns: all rows.
485        assert_eq!(
486            as_strs(ctx.get_array("month")),
487            Some(vec!["Jan", "Feb", "Mar"]),
488        );
489        assert_eq!(
490            as_strs(ctx.get_array("revenue")),
491            Some(vec!["100", "200", "150"]),
492        );
493    }
494
495    #[test]
496    fn csv_short_row_pads_with_empty_string() {
497        // Second row is missing the revenue cell.
498        let (_dir, path) = write_temp(".csv", b"month,revenue\nJan,100\nFeb");
499        let ctx = load_data_context(&path).unwrap();
500        assert_eq!(
501            as_strs(ctx.get_array("revenue")),
502            Some(vec!["100", ""]),
503            "short CSV row must pad missing cells with empty string"
504        );
505    }
506}