apcore_toolkit/formatting/
tabular.rs1use serde_json::{Map, Value};
11
12const BOM: char = '\u{FEFF}';
13
14pub fn format_csv(rows: &[Map<String, Value>], bom: bool) -> String {
22 if rows.is_empty() {
23 return String::new();
24 }
25
26 let mut keys: Vec<String> = Vec::new();
27 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
28 for row in rows {
29 for k in row.keys() {
30 if seen.insert(k.clone()) {
31 keys.push(k.clone());
32 }
33 }
34 }
35
36 let mut lines: Vec<String> = Vec::with_capacity(rows.len() + 1);
37 lines.push(csv_join(
38 &keys.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
39 ));
40 for row in rows {
41 let cells: Vec<String> = keys.iter().map(|k| csv_cell(row.get(k))).collect();
42 lines.push(csv_join(
43 &cells.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
44 ));
45 }
46
47 let mut body = lines.join("\r\n");
48 body.push_str("\r\n");
49 if bom {
50 let mut out = String::with_capacity(body.len() + 3);
51 out.push(BOM);
52 out.push_str(&body);
53 out
54 } else {
55 body
56 }
57}
58
59pub fn format_jsonl(rows: &[Map<String, Value>]) -> String {
62 if rows.is_empty() {
63 return String::new();
64 }
65 let mut out = String::new();
66 for row in rows {
67 out.push_str(&canonical_json(&Value::Object(row.clone())));
68 out.push('\n');
69 }
70 out
71}
72
73fn csv_join(cells: &[&str]) -> String {
74 cells
75 .iter()
76 .map(|c| csv_escape(c))
77 .collect::<Vec<_>>()
78 .join(",")
79}
80
81fn csv_escape(value: &str) -> String {
82 if value.contains(',') || value.contains('"') || value.contains('\n') || value.contains('\r') {
83 let escaped = value.replace('"', "\"\"");
84 format!("\"{escaped}\"")
85 } else {
86 value.to_string()
87 }
88}
89
90fn csv_cell(value: Option<&Value>) -> String {
91 match value {
92 None | Some(Value::Null) => String::new(),
93 Some(Value::Bool(true)) => "true".to_string(),
94 Some(Value::Bool(false)) => "false".to_string(),
95 Some(Value::String(s)) => s.clone(),
96 Some(Value::Number(n)) => canonical_number(n),
97 Some(v @ (Value::Array(_) | Value::Object(_))) => canonical_json(v),
98 }
99}
100
101fn canonical_json(value: &Value) -> String {
105 let canonicalized = canonicalize_value(value);
106 serde_json::to_string(&canonicalized).unwrap_or_default()
107}
108
109fn canonicalize_value(value: &Value) -> Value {
110 match value {
111 Value::Number(n) => {
112 if let Some(f) = n.as_f64() {
113 if f.is_finite() {
114 if f == f.trunc() && f.abs() < (i64::MAX as f64) {
115 Value::Number(serde_json::Number::from(f as i64))
117 } else {
118 Value::Number(n.clone())
120 }
121 } else {
122 Value::Null
123 }
124 } else {
125 Value::Number(n.clone())
126 }
127 }
128 Value::Array(arr) => Value::Array(arr.iter().map(canonicalize_value).collect()),
129 Value::Object(map) => {
130 let mut out = Map::new();
131 for (k, v) in map {
132 out.insert(k.clone(), canonicalize_value(v));
133 }
134 Value::Object(out)
135 }
136 _ => value.clone(),
137 }
138}
139
140fn canonical_number(n: &serde_json::Number) -> String {
141 if let Some(i) = n.as_i64() {
142 return i.to_string();
143 }
144 if let Some(u) = n.as_u64() {
145 return u.to_string();
146 }
147 if let Some(f) = n.as_f64() {
148 if !f.is_finite() {
149 return String::new();
150 }
151 if f == f.trunc() && f.abs() < (i64::MAX as f64) {
152 return (f as i64).to_string();
153 }
154 return f.to_string();
155 }
156 n.to_string()
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162 use serde_json::json;
163
164 fn row(v: Value) -> Map<String, Value> {
165 match v {
166 Value::Object(m) => m,
167 _ => panic!("row helper expects an object"),
168 }
169 }
170
171 #[test]
172 fn empty_csv() {
173 assert_eq!(format_csv(&[], false), "");
174 }
175
176 #[test]
177 fn single_row_csv() {
178 let rows = vec![row(json!({"a": 1, "b": 2}))];
179 assert_eq!(format_csv(&rows, false), "a,b\r\n1,2\r\n");
180 }
181
182 #[test]
183 fn heterogeneous_keys_csv() {
184 let rows = vec![row(json!({"a": 1})), row(json!({"a": 2, "b": 3}))];
185 assert_eq!(format_csv(&rows, false), "a,b\r\n1,\r\n2,3\r\n");
186 }
187
188 #[test]
189 fn nested_object_csv() {
190 let rows = vec![row(json!({"schema": {"type": "object"}}))];
191 let out = format_csv(&rows, false);
192 assert!(out.contains("\"{\"\"type\"\":\"\"object\"\"}\""));
193 assert!(!out.contains("'"));
194 }
195
196 #[test]
197 fn rfc4180_escaping() {
198 assert_eq!(
199 format_csv(&[row(json!({"a": "x,y"}))], false),
200 "a\r\n\"x,y\"\r\n"
201 );
202 assert_eq!(
203 format_csv(&[row(json!({"a": "she said \"hi\""}))], false),
204 "a\r\n\"she said \"\"hi\"\"\"\r\n"
205 );
206 }
207
208 #[test]
209 fn scalar_types_csv() {
210 let rows = vec![row(json!({
211 "n": null,
212 "b": true,
213 "f": false,
214 "i": 42,
215 "fw": 1.0,
216 "ff": 1.5,
217 }))];
218 assert_eq!(
219 format_csv(&rows, false),
220 "n,b,f,i,fw,ff\r\n,true,false,42,1,1.5\r\n"
221 );
222 }
223
224 #[test]
225 fn bom_option() {
226 let rows = vec![row(json!({"a": 1}))];
227 assert!(format_csv(&rows, true).starts_with('\u{FEFF}'));
228 assert!(!format_csv(&rows, false).starts_with('\u{FEFF}'));
229 }
230
231 #[test]
232 fn empty_jsonl() {
233 assert_eq!(format_jsonl(&[]), "");
234 }
235
236 #[test]
237 fn jsonl_lf_no_trailing_blank() {
238 let rows = vec![row(json!({"a": 1})), row(json!({"b": 2}))];
239 assert_eq!(format_jsonl(&rows), "{\"a\":1}\n{\"b\":2}\n");
240 }
241
242 #[test]
243 fn jsonl_canonical_float() {
244 let rows = vec![row(json!({"fw": 1.0, "ff": 1.5}))];
245 assert_eq!(format_jsonl(&rows), "{\"fw\":1,\"ff\":1.5}\n");
246 }
247}