Skip to main content

jpx_core/extensions/
format.rs

1//! CSV and TSV formatting functions.
2
3use std::collections::HashSet;
4
5use csv::WriterBuilder;
6use serde_json::Value;
7
8use crate::functions::{Function, custom_error};
9use crate::interpreter::SearchResult;
10use crate::registry::register_if_enabled;
11use crate::{Context, Runtime, arg, defn};
12
13/// Convert a serde_json::Value to a string suitable for CSV field.
14fn value_to_csv_string(value: &Value) -> String {
15    match value {
16        Value::Null => String::new(),
17        Value::Bool(b) => b.to_string(),
18        Value::Number(n) => n.to_string(),
19        Value::String(s) => s.clone(),
20        Value::Array(_) | Value::Object(_) => serde_json::to_string(value).unwrap_or_default(),
21    }
22}
23
24/// Write a single row using the csv crate's writer.
25fn write_csv_row(fields: &[String], delimiter: u8) -> Result<String, std::io::Error> {
26    let mut wtr = WriterBuilder::new()
27        .delimiter(delimiter)
28        .has_headers(false)
29        .from_writer(vec![]);
30
31    wtr.write_record(fields)?;
32    wtr.flush()?;
33
34    let data = wtr
35        .into_inner()
36        .map_err(|e| std::io::Error::other(e.to_string()))?;
37
38    let mut s = String::from_utf8(data).unwrap_or_default();
39    if s.ends_with('\n') {
40        s.pop();
41    }
42    if s.ends_with('\r') {
43        s.pop();
44    }
45    Ok(s)
46}
47
48/// Write multiple rows using the csv crate's writer.
49fn write_csv_rows(rows: &[Vec<String>], delimiter: u8) -> Result<String, std::io::Error> {
50    let mut wtr = WriterBuilder::new()
51        .delimiter(delimiter)
52        .has_headers(false)
53        .from_writer(vec![]);
54
55    for row in rows {
56        wtr.write_record(row)?;
57    }
58    wtr.flush()?;
59
60    let data = wtr
61        .into_inner()
62        .map_err(|e| std::io::Error::other(e.to_string()))?;
63
64    let mut s = String::from_utf8(data).unwrap_or_default();
65    if s.ends_with('\n') {
66        s.pop();
67    }
68    if s.ends_with('\r') {
69        s.pop();
70    }
71    Ok(s)
72}
73
74// =============================================================================
75// to_csv(array) -> string
76// =============================================================================
77
78defn!(ToCsvFn, vec![arg!(array)], None);
79
80impl Function for ToCsvFn {
81    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
82        self.signature.validate(args, ctx)?;
83
84        let arr = args[0].as_array().unwrap();
85
86        if arr.is_empty() {
87            return Ok(Value::String(String::new()));
88        }
89
90        let fields: Vec<String> = arr.iter().map(value_to_csv_string).collect();
91
92        match write_csv_row(&fields, b',') {
93            Ok(s) => Ok(Value::String(s)),
94            Err(e) => Err(custom_error(ctx, &format!("CSV write error: {}", e))),
95        }
96    }
97}
98
99// =============================================================================
100// to_tsv(array) -> string
101// =============================================================================
102
103defn!(ToTsvFn, vec![arg!(array)], None);
104
105impl Function for ToTsvFn {
106    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
107        self.signature.validate(args, ctx)?;
108
109        let arr = args[0].as_array().unwrap();
110
111        if arr.is_empty() {
112            return Ok(Value::String(String::new()));
113        }
114
115        let fields: Vec<String> = arr.iter().map(value_to_csv_string).collect();
116
117        match write_csv_row(&fields, b'\t') {
118            Ok(s) => Ok(Value::String(s)),
119            Err(e) => Err(custom_error(ctx, &format!("TSV write error: {}", e))),
120        }
121    }
122}
123
124// =============================================================================
125// to_csv_rows(array_of_arrays) -> string
126// =============================================================================
127
128defn!(ToCsvRowsFn, vec![arg!(array)], None);
129
130impl Function for ToCsvRowsFn {
131    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
132        self.signature.validate(args, ctx)?;
133
134        let rows_var = args[0].as_array().unwrap();
135
136        if rows_var.is_empty() {
137            return Ok(Value::String(String::new()));
138        }
139
140        let rows: Vec<Vec<String>> = rows_var
141            .iter()
142            .map(|row| {
143                if let Some(arr) = row.as_array() {
144                    arr.iter().map(value_to_csv_string).collect()
145                } else {
146                    vec![value_to_csv_string(row)]
147                }
148            })
149            .collect();
150
151        match write_csv_rows(&rows, b',') {
152            Ok(s) => Ok(Value::String(s)),
153            Err(e) => Err(custom_error(ctx, &format!("CSV write error: {}", e))),
154        }
155    }
156}
157
158// =============================================================================
159// to_csv_table(array_of_objects, columns?) -> string
160// =============================================================================
161
162defn!(ToCsvTableFn, vec![arg!(array)], Some(arg!(array)));
163
164impl Function for ToCsvTableFn {
165    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
166        self.signature.validate(args, ctx)?;
167
168        let rows = args[0].as_array().unwrap();
169
170        if rows.is_empty() {
171            return Ok(Value::String(String::new()));
172        }
173
174        // Determine columns: from second argument or from first object's keys
175        let columns: Vec<String> = if args.len() > 1 {
176            args[1]
177                .as_array()
178                .unwrap()
179                .iter()
180                .filter_map(|v| v.as_str().map(|s| s.to_string()))
181                .collect()
182        } else if let Some(obj) = rows[0].as_object() {
183            let mut keys: Vec<String> = obj.keys().cloned().collect();
184            keys.sort();
185            keys
186        } else {
187            return Ok(Value::String(String::new()));
188        };
189
190        if columns.is_empty() {
191            return Ok(Value::String(String::new()));
192        }
193
194        let mut all_rows: Vec<Vec<String>> = Vec::with_capacity(rows.len() + 1);
195
196        // Header row
197        all_rows.push(columns.clone());
198
199        // Data rows
200        for row in rows.iter() {
201            if let Some(obj) = row.as_object() {
202                let data_row: Vec<String> = columns
203                    .iter()
204                    .map(|col| obj.get(col).map(value_to_csv_string).unwrap_or_default())
205                    .collect();
206                all_rows.push(data_row);
207            } else {
208                all_rows.push(columns.iter().map(|_| String::new()).collect());
209            }
210        }
211
212        match write_csv_rows(&all_rows, b',') {
213            Ok(s) => Ok(Value::String(s)),
214            Err(e) => Err(custom_error(ctx, &format!("CSV write error: {}", e))),
215        }
216    }
217}
218
219// =============================================================================
220// from_csv(string) -> array of arrays
221// =============================================================================
222
223defn!(FromCsvFn, vec![arg!(string)], None);
224
225impl Function for FromCsvFn {
226    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
227        self.signature.validate(args, ctx)?;
228        let input = args[0].as_str().unwrap();
229        parse_delimited(input, b',', ctx)
230    }
231}
232
233// =============================================================================
234// from_tsv(string) -> array of arrays
235// =============================================================================
236
237defn!(FromTsvFn, vec![arg!(string)], None);
238
239impl Function for FromTsvFn {
240    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
241        self.signature.validate(args, ctx)?;
242        let input = args[0].as_str().unwrap();
243        parse_delimited(input, b'\t', ctx)
244    }
245}
246
247/// Parse a delimited string (CSV or TSV) into an array of arrays.
248fn parse_delimited(input: &str, delimiter: u8, ctx: &Context<'_>) -> SearchResult {
249    use csv::ReaderBuilder;
250
251    if input.trim().is_empty() {
252        return Ok(Value::Array(vec![]));
253    }
254
255    let mut reader = ReaderBuilder::new()
256        .delimiter(delimiter)
257        .has_headers(false)
258        .flexible(true)
259        .from_reader(input.as_bytes());
260
261    let mut rows: Vec<Value> = Vec::new();
262
263    for result in reader.records() {
264        match result {
265            Ok(record) => {
266                let row: Vec<Value> = record
267                    .iter()
268                    .map(|field| Value::String(field.to_string()))
269                    .collect();
270                rows.push(Value::Array(row));
271            }
272            Err(e) => {
273                return Err(custom_error(ctx, &format!("CSV parse error: {}", e)));
274            }
275        }
276    }
277
278    Ok(Value::Array(rows))
279}
280
281/// Register format functions filtered by the enabled set.
282pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
283    register_if_enabled(runtime, "to_csv", enabled, Box::new(ToCsvFn::new()));
284    register_if_enabled(runtime, "to_tsv", enabled, Box::new(ToTsvFn::new()));
285    register_if_enabled(
286        runtime,
287        "to_csv_rows",
288        enabled,
289        Box::new(ToCsvRowsFn::new()),
290    );
291    register_if_enabled(
292        runtime,
293        "to_csv_table",
294        enabled,
295        Box::new(ToCsvTableFn::new()),
296    );
297    register_if_enabled(runtime, "from_csv", enabled, Box::new(FromCsvFn::new()));
298    register_if_enabled(runtime, "from_tsv", enabled, Box::new(FromTsvFn::new()));
299}