Skip to main content

jpx_core/extensions/
format.rs

1//! CSV and TSV formatting functions.
2
3use std::collections::HashSet;
4
5use csv::WriterBuilder;
6use serde_json::Value;
7
8use crate::functions::{Function, custom_error};
9use crate::interpreter::SearchResult;
10use crate::registry::register_if_enabled;
11use crate::{Context, Runtime, arg, defn};
12
13/// Convert a serde_json::Value to a string suitable for CSV field.
14fn value_to_csv_string(value: &Value) -> String {
15    match value {
16        Value::Null => String::new(),
17        Value::Bool(b) => b.to_string(),
18        Value::Number(n) => n.to_string(),
19        Value::String(s) => s.clone(),
20        Value::Array(_) | Value::Object(_) => serde_json::to_string(value).unwrap_or_default(),
21    }
22}
23
24/// Write a single row using the csv crate's writer.
25fn write_csv_row(fields: &[String], delimiter: u8) -> Result<String, std::io::Error> {
26    let mut wtr = WriterBuilder::new()
27        .delimiter(delimiter)
28        .has_headers(false)
29        .from_writer(vec![]);
30
31    wtr.write_record(fields)?;
32    wtr.flush()?;
33
34    let data = wtr
35        .into_inner()
36        .map_err(|e| std::io::Error::other(e.to_string()))?;
37
38    let mut s = String::from_utf8(data).unwrap_or_default();
39    if s.ends_with('\n') {
40        s.pop();
41    }
42    if s.ends_with('\r') {
43        s.pop();
44    }
45    Ok(s)
46}
47
48/// Write multiple rows using the csv crate's writer.
49fn write_csv_rows(rows: &[Vec<String>], delimiter: u8) -> Result<String, std::io::Error> {
50    let mut wtr = WriterBuilder::new()
51        .delimiter(delimiter)
52        .has_headers(false)
53        .from_writer(vec![]);
54
55    for row in rows {
56        wtr.write_record(row)?;
57    }
58    wtr.flush()?;
59
60    let data = wtr
61        .into_inner()
62        .map_err(|e| std::io::Error::other(e.to_string()))?;
63
64    let mut s = String::from_utf8(data).unwrap_or_default();
65    if s.ends_with('\n') {
66        s.pop();
67    }
68    if s.ends_with('\r') {
69        s.pop();
70    }
71    Ok(s)
72}
73
74// =============================================================================
75// to_csv(array) -> string
76// =============================================================================
77
78defn!(ToCsvFn, vec![arg!(array)], None);
79
80impl Function for ToCsvFn {
81    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
82        self.signature.validate(args, ctx)?;
83
84        let arr = args[0].as_array().unwrap();
85
86        if arr.is_empty() {
87            return Ok(Value::String(String::new()));
88        }
89
90        let fields: Vec<String> = arr.iter().map(value_to_csv_string).collect();
91
92        match write_csv_row(&fields, b',') {
93            Ok(s) => Ok(Value::String(s)),
94            Err(e) => Err(custom_error(ctx, &format!("CSV write error: {}", e))),
95        }
96    }
97}
98
99// =============================================================================
100// to_tsv(array) -> string
101// =============================================================================
102
103defn!(ToTsvFn, vec![arg!(array)], None);
104
105impl Function for ToTsvFn {
106    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
107        self.signature.validate(args, ctx)?;
108
109        let arr = args[0].as_array().unwrap();
110
111        if arr.is_empty() {
112            return Ok(Value::String(String::new()));
113        }
114
115        let fields: Vec<String> = arr.iter().map(value_to_csv_string).collect();
116
117        match write_csv_row(&fields, b'\t') {
118            Ok(s) => Ok(Value::String(s)),
119            Err(e) => Err(custom_error(ctx, &format!("TSV write error: {}", e))),
120        }
121    }
122}
123
124// =============================================================================
125// to_csv_rows(array_of_arrays) -> string
126// =============================================================================
127
128defn!(ToCsvRowsFn, vec![arg!(array)], None);
129
130impl Function for ToCsvRowsFn {
131    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
132        self.signature.validate(args, ctx)?;
133
134        let rows_var = args[0].as_array().unwrap();
135
136        if rows_var.is_empty() {
137            return Ok(Value::String(String::new()));
138        }
139
140        let rows: Vec<Vec<String>> = rows_var
141            .iter()
142            .map(|row| {
143                if let Some(arr) = row.as_array() {
144                    arr.iter().map(value_to_csv_string).collect()
145                } else {
146                    vec![value_to_csv_string(row)]
147                }
148            })
149            .collect();
150
151        match write_csv_rows(&rows, b',') {
152            Ok(s) => Ok(Value::String(s)),
153            Err(e) => Err(custom_error(ctx, &format!("CSV write error: {}", e))),
154        }
155    }
156}
157
158// =============================================================================
159// to_csv_table(array_of_objects, columns?) -> string
160// =============================================================================
161
162defn!(ToCsvTableFn, vec![arg!(array)], Some(arg!(array)));
163
164impl Function for ToCsvTableFn {
165    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
166        self.signature.validate(args, ctx)?;
167
168        let rows = args[0].as_array().unwrap();
169
170        if rows.is_empty() {
171            return Ok(Value::String(String::new()));
172        }
173
174        // Determine columns: from second argument or from first object's keys
175        let columns: Vec<String> = if args.len() > 1 {
176            args[1]
177                .as_array()
178                .unwrap()
179                .iter()
180                .filter_map(|v| v.as_str().map(|s| s.to_string()))
181                .collect()
182        } else if let Some(obj) = rows[0].as_object() {
183            let mut keys: Vec<String> = obj.keys().cloned().collect();
184            keys.sort();
185            keys
186        } else {
187            return Ok(Value::String(String::new()));
188        };
189
190        if columns.is_empty() {
191            return Ok(Value::String(String::new()));
192        }
193
194        let mut all_rows: Vec<Vec<String>> = Vec::with_capacity(rows.len() + 1);
195
196        // Header row
197        all_rows.push(columns.clone());
198
199        // Data rows
200        for row in rows.iter() {
201            if let Some(obj) = row.as_object() {
202                let data_row: Vec<String> = columns
203                    .iter()
204                    .map(|col| obj.get(col).map(value_to_csv_string).unwrap_or_default())
205                    .collect();
206                all_rows.push(data_row);
207            } else {
208                all_rows.push(columns.iter().map(|_| String::new()).collect());
209            }
210        }
211
212        match write_csv_rows(&all_rows, b',') {
213            Ok(s) => Ok(Value::String(s)),
214            Err(e) => Err(custom_error(ctx, &format!("CSV write error: {}", e))),
215        }
216    }
217}
218
219// =============================================================================
220// from_csv(string) -> array of arrays
221// =============================================================================
222
223defn!(FromCsvFn, vec![arg!(string)], None);
224
225impl Function for FromCsvFn {
226    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
227        self.signature.validate(args, ctx)?;
228        let input = args[0].as_str().unwrap();
229        parse_delimited(input, b',', ctx)
230    }
231}
232
233// =============================================================================
234// from_tsv(string) -> array of arrays
235// =============================================================================
236
237defn!(FromTsvFn, vec![arg!(string)], None);
238
239impl Function for FromTsvFn {
240    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
241        self.signature.validate(args, ctx)?;
242        let input = args[0].as_str().unwrap();
243        parse_delimited(input, b'\t', ctx)
244    }
245}
246
247/// Parse a delimited string (CSV or TSV) into an array of arrays.
248fn parse_delimited(input: &str, delimiter: u8, ctx: &Context<'_>) -> SearchResult {
249    use csv::ReaderBuilder;
250
251    if input.trim().is_empty() {
252        return Ok(Value::Array(vec![]));
253    }
254
255    let mut reader = ReaderBuilder::new()
256        .delimiter(delimiter)
257        .has_headers(false)
258        .flexible(true)
259        .from_reader(input.as_bytes());
260
261    let mut rows: Vec<Value> = Vec::new();
262
263    for result in reader.records() {
264        match result {
265            Ok(record) => {
266                let row: Vec<Value> = record
267                    .iter()
268                    .map(|field| Value::String(field.to_string()))
269                    .collect();
270                rows.push(Value::Array(row));
271            }
272            Err(e) => {
273                return Err(custom_error(ctx, &format!("CSV parse error: {}", e)));
274            }
275        }
276    }
277
278    Ok(Value::Array(rows))
279}
280
281/// Register format functions filtered by the enabled set.
282pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
283    register_if_enabled(runtime, "to_csv", enabled, Box::new(ToCsvFn::new()));
284    register_if_enabled(runtime, "to_tsv", enabled, Box::new(ToTsvFn::new()));
285    register_if_enabled(
286        runtime,
287        "to_csv_rows",
288        enabled,
289        Box::new(ToCsvRowsFn::new()),
290    );
291    register_if_enabled(
292        runtime,
293        "to_csv_table",
294        enabled,
295        Box::new(ToCsvTableFn::new()),
296    );
297    register_if_enabled(runtime, "from_csv", enabled, Box::new(FromCsvFn::new()));
298    register_if_enabled(runtime, "from_tsv", enabled, Box::new(FromTsvFn::new()));
299}
300
301#[cfg(test)]
302mod tests {
303    use crate::Runtime;
304    use serde_json::json;
305
306    fn setup_runtime() -> Runtime {
307        Runtime::builder()
308            .with_standard()
309            .with_all_extensions()
310            .build()
311    }
312
313    // =========================================================================
314    // to_csv tests
315    // =========================================================================
316
317    #[test]
318    fn test_to_csv_simple() {
319        let runtime = setup_runtime();
320        let expr = runtime.compile("to_csv(@)").unwrap();
321        let data = json!(["a", "b", "c"]);
322        let result = expr.search(&data).unwrap();
323        assert_eq!(result.as_str().unwrap(), "a,b,c");
324    }
325
326    #[test]
327    fn test_to_csv_mixed_types() {
328        let runtime = setup_runtime();
329        let expr = runtime.compile("to_csv(@)").unwrap();
330        let data = json!(["hello", 42, true, null]);
331        let result = expr.search(&data).unwrap();
332        assert_eq!(result.as_str().unwrap(), "hello,42,true,");
333    }
334
335    #[test]
336    fn test_to_csv_with_comma() {
337        let runtime = setup_runtime();
338        let expr = runtime.compile("to_csv(@)").unwrap();
339        let data = json!(["hello, world", "test"]);
340        let result = expr.search(&data).unwrap();
341        assert_eq!(result.as_str().unwrap(), "\"hello, world\",test");
342    }
343
344    #[test]
345    fn test_to_csv_with_quotes() {
346        let runtime = setup_runtime();
347        let expr = runtime.compile("to_csv(@)").unwrap();
348        let data = json!(["say \"hello\"", "test"]);
349        let result = expr.search(&data).unwrap();
350        assert_eq!(result.as_str().unwrap(), "\"say \"\"hello\"\"\",test");
351    }
352
353    #[test]
354    fn test_to_csv_with_newline() {
355        let runtime = setup_runtime();
356        let expr = runtime.compile("to_csv(@)").unwrap();
357        let data = json!(["line1\nline2", "test"]);
358        let result = expr.search(&data).unwrap();
359        assert_eq!(result.as_str().unwrap(), "\"line1\nline2\",test");
360    }
361
362    #[test]
363    fn test_to_csv_empty() {
364        let runtime = setup_runtime();
365        let expr = runtime.compile("to_csv(@)").unwrap();
366        let data = json!([]);
367        let result = expr.search(&data).unwrap();
368        assert_eq!(result.as_str().unwrap(), "");
369    }
370
371    #[test]
372    fn test_to_csv_with_leading_trailing_space() {
373        let runtime = setup_runtime();
374        let expr = runtime.compile("to_csv(@)").unwrap();
375        let data = json!(["  hello  ", "test"]);
376        let result = expr.search(&data).unwrap();
377        // csv crate quotes fields with leading/trailing whitespace to preserve them
378        assert!(result.as_str().unwrap().contains("hello"));
379    }
380
381    // =========================================================================
382    // to_tsv tests
383    // =========================================================================
384
385    #[test]
386    fn test_to_tsv_simple() {
387        let runtime = setup_runtime();
388        let expr = runtime.compile("to_tsv(@)").unwrap();
389        let data = json!(["a", "b", "c"]);
390        let result = expr.search(&data).unwrap();
391        assert_eq!(result.as_str().unwrap(), "a\tb\tc");
392    }
393
394    #[test]
395    fn test_to_tsv_mixed_types() {
396        let runtime = setup_runtime();
397        let expr = runtime.compile("to_tsv(@)").unwrap();
398        let data = json!(["hello", 42, true, null]);
399        let result = expr.search(&data).unwrap();
400        assert_eq!(result.as_str().unwrap(), "hello\t42\ttrue\t");
401    }
402
403    // =========================================================================
404    // to_csv_rows tests
405    // =========================================================================
406
407    #[test]
408    fn test_to_csv_rows_simple() {
409        let runtime = setup_runtime();
410        let expr = runtime.compile("to_csv_rows(@)").unwrap();
411        let data = json!([[1, 2, 3], [4, 5, 6]]);
412        let result = expr.search(&data).unwrap();
413        assert_eq!(result.as_str().unwrap(), "1,2,3\n4,5,6");
414    }
415
416    #[test]
417    fn test_to_csv_rows_with_strings() {
418        let runtime = setup_runtime();
419        let expr = runtime.compile("to_csv_rows(@)").unwrap();
420        let data = json!([["a", "b"], ["c", "d"]]);
421        let result = expr.search(&data).unwrap();
422        assert_eq!(result.as_str().unwrap(), "a,b\nc,d");
423    }
424
425    #[test]
426    fn test_to_csv_rows_empty() {
427        let runtime = setup_runtime();
428        let expr = runtime.compile("to_csv_rows(@)").unwrap();
429        let data = json!([]);
430        let result = expr.search(&data).unwrap();
431        assert_eq!(result.as_str().unwrap(), "");
432    }
433
434    #[test]
435    fn test_to_csv_rows_with_special_chars() {
436        let runtime = setup_runtime();
437        let expr = runtime.compile("to_csv_rows(@)").unwrap();
438        let data = json!([["hello, world", "test"], ["a\"b", "c"]]);
439        let result = expr.search(&data).unwrap();
440        // Should properly escape commas and quotes
441        assert!(result.as_str().unwrap().contains("\"hello, world\""));
442        assert!(result.as_str().unwrap().contains("\"a\"\"b\""));
443    }
444
445    // =========================================================================
446    // to_csv_table tests
447    // =========================================================================
448
449    #[test]
450    fn test_to_csv_table_simple() {
451        let runtime = setup_runtime();
452        let expr = runtime.compile("to_csv_table(@)").unwrap();
453        let data = json!([{"name": "alice", "age": 30}, {"name": "bob", "age": 25}]);
454        let result = expr.search(&data).unwrap();
455        // Keys are sorted alphabetically
456        assert_eq!(result.as_str().unwrap(), "age,name\n30,alice\n25,bob");
457    }
458
459    #[test]
460    fn test_to_csv_table_with_columns() {
461        let runtime = setup_runtime();
462        let expr = runtime
463            .compile("to_csv_table(@, `[\"name\", \"age\"]`)")
464            .unwrap();
465        let data = json!([{"name": "alice", "age": 30}, {"name": "bob", "age": 25}]);
466        let result = expr.search(&data).unwrap();
467        // Columns in specified order
468        assert_eq!(result.as_str().unwrap(), "name,age\nalice,30\nbob,25");
469    }
470
471    #[test]
472    fn test_to_csv_table_missing_field() {
473        let runtime = setup_runtime();
474        let expr = runtime
475            .compile("to_csv_table(@, `[\"name\", \"age\", \"email\"]`)")
476            .unwrap();
477        let data = json!([{"name": "alice", "age": 30}, {"name": "bob"}]);
478        let result = expr.search(&data).unwrap();
479        // Missing fields are empty
480        assert_eq!(result.as_str().unwrap(), "name,age,email\nalice,30,\nbob,,");
481    }
482
483    #[test]
484    fn test_to_csv_table_empty() {
485        let runtime = setup_runtime();
486        let expr = runtime.compile("to_csv_table(@)").unwrap();
487        let data = json!([]);
488        let result = expr.search(&data).unwrap();
489        assert_eq!(result.as_str().unwrap(), "");
490    }
491
492    #[test]
493    fn test_to_csv_table_special_chars() {
494        let runtime = setup_runtime();
495        let expr = runtime.compile("to_csv_table(@)").unwrap();
496        let data = json!([{"name": "O'Brien, Jr.", "note": "said \"hi\""}]);
497        let result = expr.search(&data).unwrap();
498        // Should properly escape commas and quotes
499        assert!(result.as_str().unwrap().contains("\"O'Brien, Jr.\""));
500        assert!(result.as_str().unwrap().contains("\"said \"\"hi\"\"\""));
501    }
502
503    // =========================================================================
504    // from_csv tests
505    // =========================================================================
506
507    #[test]
508    fn test_from_csv_simple() {
509        let runtime = setup_runtime();
510        let data = json!({"csv": "a,b,c\n1,2,3"});
511        let expr = runtime.compile("from_csv(csv)").unwrap();
512        let result = expr.search(&data).unwrap();
513        let arr = result.as_array().unwrap();
514        assert_eq!(arr.len(), 2);
515        // First row
516        let row0 = arr[0].as_array().unwrap();
517        assert_eq!(row0[0].as_str().unwrap(), "a");
518        assert_eq!(row0[1].as_str().unwrap(), "b");
519        assert_eq!(row0[2].as_str().unwrap(), "c");
520        // Second row
521        let row1 = arr[1].as_array().unwrap();
522        assert_eq!(row1[0].as_str().unwrap(), "1");
523        assert_eq!(row1[1].as_str().unwrap(), "2");
524        assert_eq!(row1[2].as_str().unwrap(), "3");
525    }
526
527    #[test]
528    fn test_from_csv_quoted() {
529        let runtime = setup_runtime();
530        let data = json!({"csv": "\"hello, world\",test"});
531        let expr = runtime.compile("from_csv(csv)").unwrap();
532        let result = expr.search(&data).unwrap();
533        let arr = result.as_array().unwrap();
534        assert_eq!(arr.len(), 1);
535        let row0 = arr[0].as_array().unwrap();
536        assert_eq!(row0[0].as_str().unwrap(), "hello, world");
537        assert_eq!(row0[1].as_str().unwrap(), "test");
538    }
539
540    #[test]
541    fn test_from_csv_empty() {
542        let runtime = setup_runtime();
543        let data = json!({"csv": ""});
544        let expr = runtime.compile("from_csv(csv)").unwrap();
545        let result = expr.search(&data).unwrap();
546        let arr = result.as_array().unwrap();
547        assert_eq!(arr.len(), 0);
548    }
549
550    #[test]
551    fn test_from_csv_single_row() {
552        let runtime = setup_runtime();
553        let data = json!({"csv": "a,b,c"});
554        let expr = runtime.compile("from_csv(csv)").unwrap();
555        let result = expr.search(&data).unwrap();
556        let arr = result.as_array().unwrap();
557        assert_eq!(arr.len(), 1);
558        let row0 = arr[0].as_array().unwrap();
559        assert_eq!(row0.len(), 3);
560    }
561
562    // =========================================================================
563    // from_tsv tests
564    // =========================================================================
565
566    #[test]
567    fn test_from_tsv_simple() {
568        let runtime = setup_runtime();
569        let data = json!({"tsv": "a\tb\tc\n1\t2\t3"});
570        let expr = runtime.compile("from_tsv(tsv)").unwrap();
571        let result = expr.search(&data).unwrap();
572        let arr = result.as_array().unwrap();
573        assert_eq!(arr.len(), 2);
574        // First row
575        let row0 = arr[0].as_array().unwrap();
576        assert_eq!(row0[0].as_str().unwrap(), "a");
577        assert_eq!(row0[1].as_str().unwrap(), "b");
578        assert_eq!(row0[2].as_str().unwrap(), "c");
579    }
580
581    #[test]
582    fn test_from_tsv_empty() {
583        let runtime = setup_runtime();
584        let data = json!({"tsv": ""});
585        let expr = runtime.compile("from_tsv(tsv)").unwrap();
586        let result = expr.search(&data).unwrap();
587        let arr = result.as_array().unwrap();
588        assert_eq!(arr.len(), 0);
589    }
590
591    #[test]
592    fn test_from_tsv_spaces_preserved() {
593        let runtime = setup_runtime();
594        let data = json!({"tsv": "hello world\ttest"});
595        let expr = runtime.compile("from_tsv(tsv)").unwrap();
596        let result = expr.search(&data).unwrap();
597        let arr = result.as_array().unwrap();
598        let row0 = arr[0].as_array().unwrap();
599        assert_eq!(row0[0].as_str().unwrap(), "hello world");
600        assert_eq!(row0[1].as_str().unwrap(), "test");
601    }
602
603    // =========================================================================
604    // roundtrip tests
605    // =========================================================================
606
607    #[test]
608    fn test_csv_roundtrip() {
609        let runtime = setup_runtime();
610        // to_csv_rows then from_csv should give back similar structure
611        let data = json!([["a", "b"], ["1", "2"]]);
612        let expr = runtime.compile("to_csv_rows(@)").unwrap();
613        let csv_result = expr.search(&data).unwrap();
614
615        // Now parse it back
616        let parse_data = json!({"csv": csv_result.as_str().unwrap()});
617        let parse_expr = runtime.compile("from_csv(csv)").unwrap();
618        let parsed = parse_expr.search(&parse_data).unwrap();
619
620        let arr = parsed.as_array().unwrap();
621        assert_eq!(arr.len(), 2);
622        let row0 = arr[0].as_array().unwrap();
623        assert_eq!(row0[0].as_str().unwrap(), "a");
624        assert_eq!(row0[1].as_str().unwrap(), "b");
625    }
626}