Skip to main content

apcore_toolkit/formatting/
tabular.rs

1// Byte-equivalent tabular data formatters: CSV and JSONL.
2//
3// Cross-SDK byte-identity contract: every SDK (Python / TypeScript / Rust)
4// emits identical bytes for the same input. Consumers (apcore-cli, apcore-mcp,
5// apcore-a2a, downstream CLIs) MUST delegate to these formatters rather than
6// reimplementing.
7//
8// See apcore-toolkit/docs/features/formatting.md § Tabular Formats.
9
10use serde_json::{Map, Value};
11
12const BOM: char = '\u{FEFF}';
13
14/// Render rows as RFC 4180 CSV.
15///
16/// Header columns are the union of keys across all rows, preserved in
17/// insertion order from first occurrence. Rows missing a key emit an empty
18/// cell. Non-scalar values are serialized as canonical JSON inside the cell.
19/// Cells containing `,`, `"`, `\n`, or `\r` are quote-wrapped with embedded
20/// `"` doubled. Line terminator is CRLF.
21pub fn format_csv(rows: &[Map<String, Value>], bom: bool) -> String {
22    if rows.is_empty() {
23        return String::new();
24    }
25
26    let mut keys: Vec<String> = Vec::new();
27    let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
28    for row in rows {
29        for k in row.keys() {
30            if seen.insert(k.clone()) {
31                keys.push(k.clone());
32            }
33        }
34    }
35
36    let mut lines: Vec<String> = Vec::with_capacity(rows.len() + 1);
37    lines.push(csv_join(
38        &keys.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
39    ));
40    for row in rows {
41        let cells: Vec<String> = keys.iter().map(|k| csv_cell(row.get(k))).collect();
42        lines.push(csv_join(
43            &cells.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
44        ));
45    }
46
47    let mut body = lines.join("\r\n");
48    body.push_str("\r\n");
49    if bom {
50        let mut out = String::with_capacity(body.len() + 3);
51        out.push(BOM);
52        out.push_str(&body);
53        out
54    } else {
55        body
56    }
57}
58
59/// Render rows as JSON Lines. Each row is canonical compact JSON; LF
60/// terminator; no trailing blank line.
61pub fn format_jsonl(rows: &[Map<String, Value>]) -> String {
62    if rows.is_empty() {
63        return String::new();
64    }
65    let mut out = String::new();
66    for row in rows {
67        out.push_str(&canonical_json(&Value::Object(row.clone())));
68        out.push('\n');
69    }
70    out
71}
72
73fn csv_join(cells: &[&str]) -> String {
74    cells
75        .iter()
76        .map(|c| csv_escape(c))
77        .collect::<Vec<_>>()
78        .join(",")
79}
80
81fn csv_escape(value: &str) -> String {
82    if value.contains(',') || value.contains('"') || value.contains('\n') || value.contains('\r') {
83        let escaped = value.replace('"', "\"\"");
84        format!("\"{escaped}\"")
85    } else {
86        value.to_string()
87    }
88}
89
90fn csv_cell(value: Option<&Value>) -> String {
91    match value {
92        None | Some(Value::Null) => String::new(),
93        Some(Value::Bool(true)) => "true".to_string(),
94        Some(Value::Bool(false)) => "false".to_string(),
95        Some(Value::String(s)) => s.clone(),
96        Some(Value::Number(n)) => canonical_number(n),
97        Some(v @ (Value::Array(_) | Value::Object(_))) => canonical_json(v),
98    }
99}
100
101/// Canonical compact JSON aligned with JS `JSON.stringify`: no whitespace
102/// between tokens, insertion-order preserved (via `serde_json` `preserve_order`
103/// feature), unicode preserved, whole-number floats render as plain integers.
104fn canonical_json(value: &Value) -> String {
105    let canonicalized = canonicalize_value(value);
106    serde_json::to_string(&canonicalized).unwrap_or_default()
107}
108
109fn canonicalize_value(value: &Value) -> Value {
110    match value {
111        Value::Number(n) => {
112            if let Some(f) = n.as_f64() {
113                if f.is_finite() {
114                    if f == f.trunc() && f.abs() < (i64::MAX as f64) {
115                        // Whole-number float → int, matching JS canonical form.
116                        Value::Number(serde_json::Number::from(f as i64))
117                    } else {
118                        // Preserve original number representation for fractional values.
119                        Value::Number(n.clone())
120                    }
121                } else {
122                    Value::Null
123                }
124            } else {
125                Value::Number(n.clone())
126            }
127        }
128        Value::Array(arr) => Value::Array(arr.iter().map(canonicalize_value).collect()),
129        Value::Object(map) => {
130            let mut out = Map::new();
131            for (k, v) in map {
132                out.insert(k.clone(), canonicalize_value(v));
133            }
134            Value::Object(out)
135        }
136        _ => value.clone(),
137    }
138}
139
140fn canonical_number(n: &serde_json::Number) -> String {
141    if let Some(i) = n.as_i64() {
142        return i.to_string();
143    }
144    if let Some(u) = n.as_u64() {
145        return u.to_string();
146    }
147    if let Some(f) = n.as_f64() {
148        if !f.is_finite() {
149            return String::new();
150        }
151        if f == f.trunc() && f.abs() < (i64::MAX as f64) {
152            return (f as i64).to_string();
153        }
154        return f.to_string();
155    }
156    n.to_string()
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162    use serde_json::json;
163
164    fn row(v: Value) -> Map<String, Value> {
165        match v {
166            Value::Object(m) => m,
167            _ => panic!("row helper expects an object"),
168        }
169    }
170
171    #[test]
172    fn empty_csv() {
173        assert_eq!(format_csv(&[], false), "");
174    }
175
176    #[test]
177    fn single_row_csv() {
178        let rows = vec![row(json!({"a": 1, "b": 2}))];
179        assert_eq!(format_csv(&rows, false), "a,b\r\n1,2\r\n");
180    }
181
182    #[test]
183    fn heterogeneous_keys_csv() {
184        let rows = vec![row(json!({"a": 1})), row(json!({"a": 2, "b": 3}))];
185        assert_eq!(format_csv(&rows, false), "a,b\r\n1,\r\n2,3\r\n");
186    }
187
188    #[test]
189    fn nested_object_csv() {
190        let rows = vec![row(json!({"schema": {"type": "object"}}))];
191        let out = format_csv(&rows, false);
192        assert!(out.contains("\"{\"\"type\"\":\"\"object\"\"}\""));
193        assert!(!out.contains("'"));
194    }
195
196    #[test]
197    fn rfc4180_escaping() {
198        assert_eq!(
199            format_csv(&[row(json!({"a": "x,y"}))], false),
200            "a\r\n\"x,y\"\r\n"
201        );
202        assert_eq!(
203            format_csv(&[row(json!({"a": "she said \"hi\""}))], false),
204            "a\r\n\"she said \"\"hi\"\"\"\r\n"
205        );
206    }
207
208    #[test]
209    fn scalar_types_csv() {
210        let rows = vec![row(json!({
211            "n": null,
212            "b": true,
213            "f": false,
214            "i": 42,
215            "fw": 1.0,
216            "ff": 1.5,
217        }))];
218        assert_eq!(
219            format_csv(&rows, false),
220            "n,b,f,i,fw,ff\r\n,true,false,42,1,1.5\r\n"
221        );
222    }
223
224    #[test]
225    fn bom_option() {
226        let rows = vec![row(json!({"a": 1}))];
227        assert!(format_csv(&rows, true).starts_with('\u{FEFF}'));
228        assert!(!format_csv(&rows, false).starts_with('\u{FEFF}'));
229    }
230
231    #[test]
232    fn empty_jsonl() {
233        assert_eq!(format_jsonl(&[]), "");
234    }
235
236    #[test]
237    fn jsonl_lf_no_trailing_blank() {
238        let rows = vec![row(json!({"a": 1})), row(json!({"b": 2}))];
239        assert_eq!(format_jsonl(&rows), "{\"a\":1}\n{\"b\":2}\n");
240    }
241
242    #[test]
243    fn jsonl_canonical_float() {
244        let rows = vec![row(json!({"fw": 1.0, "ff": 1.5}))];
245        assert_eq!(format_jsonl(&rows), "{\"fw\":1,\"ff\":1.5}\n");
246    }
247}