Skip to main content

stryke/
native_data.rs

1//! Native CSV (`csv` crate), SQLite (`rusqlite`), and HTTP JSON (`ureq` + `serde_json`) helpers.
2
3use std::io::Read;
4use std::sync::Arc;
5use std::time::Duration;
6
7use indexmap::IndexMap;
8use jaq_core::data::JustLut;
9use num_traits::cast::ToPrimitive;
10use parking_lot::{Mutex, RwLock};
11use rayon::prelude::*;
12use rusqlite::{types::Value, Connection};
13use serde_json::Value as JsonValue;
14
15use crate::ast::StructDef;
16use crate::error::{StrykeError, StrykeResult};
17use crate::value::{HeapObject, PerlDataFrame, StructInstance, StrykeValue};
18
19/// Parallel row→hashref conversion after a sequential CSV parse (good CPU parallelism on wide files).
20pub(crate) fn par_csv_read(path: &str) -> StrykeResult<StrykeValue> {
21    let mut rdr = csv::Reader::from_path(path)
22        .map_err(|e| StrykeError::runtime(format!("par_csv_read: {}: {}", path, e), 0))?;
23    let headers: Vec<String> = rdr
24        .headers()
25        .map_err(|e| StrykeError::runtime(format!("par_csv_read: {}: {}", path, e), 0))?
26        .iter()
27        .map(|s| s.to_string())
28        .collect();
29    let mut raw_rows: Vec<csv::StringRecord> = Vec::new();
30    for rec in rdr.records() {
31        raw_rows.push(rec.map_err(|e| StrykeError::runtime(format!("par_csv_read: {}", e), 0))?);
32    }
33    let rows: Vec<StrykeValue> = raw_rows
34        .into_par_iter()
35        .map(|record| {
36            let mut map = IndexMap::new();
37            for (i, h) in headers.iter().enumerate() {
38                let cell = record.get(i).unwrap_or("");
39                map.insert(h.clone(), StrykeValue::string(cell.to_string()));
40            }
41            StrykeValue::hash_ref(Arc::new(RwLock::new(map)))
42        })
43        .collect();
44    Ok(StrykeValue::array(rows))
45}
46
47/// Columnar dataframe from a CSV path (header row + string cells; use `sum` etc. with numeric strings).
48pub(crate) fn dataframe_from_elements(val: &StrykeValue) -> StrykeResult<StrykeValue> {
49    let rows = val.map_flatten_outputs(true);
50    if rows.is_empty() {
51        return Ok(StrykeValue::dataframe(Arc::new(Mutex::new(
52            PerlDataFrame {
53                columns: vec![],
54                cols: vec![],
55                group_by: None,
56            },
57        ))));
58    }
59
60    // Detect format: list of hashrefs or list of arrayrefs
61    let first_row = &rows[0];
62    if let Some(first_row_map) = first_row.as_hash_ref() {
63        // List of hashrefs: use keys of the first row as columns
64        let columns: Vec<String> = first_row_map.read().keys().cloned().collect();
65        let mut cols: Vec<Vec<StrykeValue>> = (0..columns.len()).map(|_| Vec::new()).collect();
66        for row_val in rows {
67            if let Some(row_lock) = row_val.as_hash_ref() {
68                let row_map = row_lock.read();
69                for (i, col_name) in columns.iter().enumerate() {
70                    cols[i].push(row_map.get(col_name).cloned().unwrap_or(StrykeValue::UNDEF));
71                }
72            }
73        }
74        return Ok(StrykeValue::dataframe(Arc::new(Mutex::new(
75            PerlDataFrame {
76                columns,
77                cols,
78                group_by: None,
79            },
80        ))));
81    } else if let Some(first_row_lock) = first_row.as_array_ref() {
82        // List of arrayrefs: first row is headers
83        let first_row_arr = first_row_lock.read();
84        let columns: Vec<String> = first_row_arr.iter().map(|v| v.to_string()).collect();
85        let mut cols: Vec<Vec<StrykeValue>> = (0..columns.len()).map(|_| Vec::new()).collect();
86        for row_val in rows.iter().skip(1) {
87            if let Some(row_lock) = row_val.as_array_ref() {
88                let row_arr = row_lock.read();
89                for (i, col) in cols.iter_mut().enumerate().take(columns.len()) {
90                    col.push(row_arr.get(i).cloned().unwrap_or(StrykeValue::UNDEF));
91                }
92            }
93        }
94        return Ok(StrykeValue::dataframe(Arc::new(Mutex::new(
95            PerlDataFrame {
96                columns,
97                cols,
98                group_by: None,
99            },
100        ))));
101    }
102
103    Err(StrykeError::runtime(
104        "dataframe expects a file path or a list of hashrefs/arrayrefs",
105        0,
106    ))
107}
108
109pub(crate) fn dataframe_from_path(path: &str) -> StrykeResult<StrykeValue> {
110    let mut rdr = csv::Reader::from_path(path)
111        .map_err(|e| StrykeError::runtime(format!("dataframe: {}: {}", path, e), 0))?;
112    let headers: Vec<String> = rdr
113        .headers()
114        .map_err(|e| StrykeError::runtime(format!("dataframe: {}: {}", path, e), 0))?
115        .iter()
116        .map(|s| s.to_string())
117        .collect();
118    let ncols = headers.len();
119    let mut cols: Vec<Vec<StrykeValue>> = (0..ncols).map(|_| Vec::new()).collect();
120    for rec in rdr.records() {
121        let record = rec.map_err(|e| StrykeError::runtime(format!("dataframe: {}", e), 0))?;
122        for (i, col) in cols.iter_mut().enumerate().take(ncols) {
123            let cell = record.get(i).unwrap_or("");
124            col.push(StrykeValue::string(cell.to_string()));
125        }
126    }
127    let df = PerlDataFrame {
128        columns: headers,
129        cols,
130        group_by: None,
131    };
132    Ok(StrykeValue::dataframe(Arc::new(Mutex::new(df))))
133}
134
135pub(crate) fn csv_read(path: &str) -> StrykeResult<StrykeValue> {
136    let mut rdr = csv::Reader::from_path(path)
137        .map_err(|e| StrykeError::runtime(format!("csv_read: {}: {}", path, e), 0))?;
138    let headers: Vec<String> = rdr
139        .headers()
140        .map_err(|e| StrykeError::runtime(format!("csv_read: {}: {}", path, e), 0))?
141        .iter()
142        .map(|s| s.to_string())
143        .collect();
144    let mut rows = Vec::new();
145    for rec in rdr.records() {
146        let record = rec.map_err(|e| StrykeError::runtime(format!("csv_read: {}", e), 0))?;
147        let mut map = IndexMap::new();
148        for (i, h) in headers.iter().enumerate() {
149            let cell = record.get(i).unwrap_or("");
150            map.insert(h.clone(), StrykeValue::string(cell.to_string()));
151        }
152        rows.push(StrykeValue::hash_ref(Arc::new(RwLock::new(map))));
153    }
154    Ok(StrykeValue::array(rows))
155}
156
157/// Writes rows as CSV. Each row is a hash or hashref; header row is the union of keys
158/// (first-seen order, then keys from later rows in order).
159pub(crate) fn csv_write(path: &str, rows: &[StrykeValue]) -> StrykeResult<StrykeValue> {
160    let mut header: Vec<String> = Vec::new();
161    let mut seen = std::collections::HashSet::<String>::new();
162    let mut normalized: Vec<IndexMap<String, StrykeValue>> = Vec::new();
163
164    for row in rows {
165        let map = hash_like(row)?;
166        for k in map.keys() {
167            if seen.insert(k.clone()) {
168                header.push(k.clone());
169            }
170        }
171        normalized.push(map);
172    }
173
174    let mut wtr = csv::Writer::from_path(path)
175        .map_err(|e| StrykeError::runtime(format!("csv_write: {}: {}", path, e), 0))?;
176    wtr.write_record(&header)
177        .map_err(|e| StrykeError::runtime(format!("csv_write: {}", e), 0))?;
178    for map in &normalized {
179        let record: Vec<String> = header
180            .iter()
181            .map(|k| map.get(k).map(|v| v.to_string()).unwrap_or_default())
182            .collect();
183        wtr.write_record(&record)
184            .map_err(|e| StrykeError::runtime(format!("csv_write: {}", e), 0))?;
185    }
186    wtr.flush()
187        .map_err(|e| StrykeError::runtime(format!("csv_write: {}", e), 0))?;
188    Ok(StrykeValue::integer(normalized.len() as i64))
189}
190
191fn hash_like(v: &StrykeValue) -> StrykeResult<IndexMap<String, StrykeValue>> {
192    if let Some(h) = v.as_hash_map() {
193        return Ok(h);
194    }
195    if let Some(r) = v.as_hash_ref() {
196        return Ok(r.read().clone());
197    }
198    if let Some(b) = v.as_blessed_ref() {
199        let d = b.data.read();
200        if let Some(h) = d.as_hash_map() {
201            return Ok(h);
202        }
203    }
204    Err(StrykeError::runtime(
205        "csv_write: row must be hash or hashref",
206        0,
207    ))
208}
209
210pub(crate) fn sqlite_open(path: &str) -> StrykeResult<StrykeValue> {
211    let conn = Connection::open(path)
212        .map_err(|e| StrykeError::runtime(format!("sqlite: {}: {}", path, e), 0))?;
213    Ok(StrykeValue::sqlite_conn(Arc::new(Mutex::new(conn))))
214}
215
216pub(crate) fn sqlite_dispatch(
217    conn: &Arc<Mutex<Connection>>,
218    method: &str,
219    args: &[StrykeValue],
220    line: usize,
221) -> StrykeResult<StrykeValue> {
222    let c = conn.lock();
223    match method {
224        "exec" => {
225            if args.is_empty() {
226                return Err(StrykeError::runtime("sqlite->exec needs SQL string", line));
227            }
228            let sql = args[0].to_string();
229            let params: Vec<Value> = args[1..].iter().map(perl_to_sql_value).collect();
230            let n = exec_sql(&c, &sql, &params)?;
231            Ok(StrykeValue::integer(n as i64))
232        }
233        "query" => {
234            if args.is_empty() {
235                return Err(StrykeError::runtime("sqlite->query needs SQL string", line));
236            }
237            let sql = args[0].to_string();
238            let params: Vec<Value> = args[1..].iter().map(perl_to_sql_value).collect();
239            query_sql(&c, &sql, &params, line)
240        }
241        "last_insert_rowid" => {
242            if !args.is_empty() {
243                return Err(StrykeError::runtime(
244                    "sqlite->last_insert_rowid takes no arguments",
245                    line,
246                ));
247            }
248            Ok(StrykeValue::integer(c.last_insert_rowid()))
249        }
250        _ => Err(StrykeError::runtime(
251            format!("unknown sqlite method: {}", method),
252            line,
253        )),
254    }
255}
256
257pub(crate) fn exec_sql(conn: &Connection, sql: &str, params: &[Value]) -> StrykeResult<usize> {
258    conn.execute(sql, rusqlite::params_from_iter(params.iter()))
259        .map_err(|e| StrykeError::runtime(format!("sqlite exec: {}", e), 0))
260}
261
262pub(crate) fn query_sql(
263    conn: &Connection,
264    sql: &str,
265    params: &[Value],
266    line: usize,
267) -> StrykeResult<StrykeValue> {
268    let mut stmt = conn
269        .prepare(sql)
270        .map_err(|e| StrykeError::runtime(format!("sqlite query: {}", e), line))?;
271    let col_count = stmt.column_count();
272    let mut col_names = Vec::with_capacity(col_count);
273    for i in 0..col_count {
274        col_names.push(
275            stmt.column_name(i)
276                .map(|s| s.to_string())
277                .unwrap_or_else(|_| format!("col{}", i)),
278        );
279    }
280    let mut rows = stmt
281        .query(rusqlite::params_from_iter(params.iter()))
282        .map_err(|e| StrykeError::runtime(format!("sqlite query: {}", e), line))?;
283    let mut rows_out = Vec::new();
284    while let Some(row) = rows
285        .next()
286        .map_err(|e| StrykeError::runtime(format!("sqlite query: {}", e), line))?
287    {
288        let mut map = IndexMap::new();
289        for (i, col_name) in col_names.iter().enumerate().take(col_count) {
290            let v = row
291                .get::<_, Value>(i)
292                .map_err(|e| StrykeError::runtime(format!("sqlite query: {}", e), line))?;
293            map.insert(col_name.clone(), sqlite_value_to_perl(v));
294        }
295        rows_out.push(StrykeValue::hash_ref(Arc::new(RwLock::new(map))));
296    }
297    Ok(StrykeValue::array(rows_out))
298}
299
300pub(crate) fn perl_to_sql_value(v: &StrykeValue) -> Value {
301    if v.is_undef() {
302        return Value::Null;
303    }
304    if let Some(i) = v.as_integer() {
305        return Value::Integer(i);
306    }
307    if let Some(f) = v.as_float() {
308        return Value::Real(f);
309    }
310    if let Some(s) = v.as_str() {
311        return Value::Text(s);
312    }
313    if let Some(b) = v.as_bytes_arc() {
314        return Value::Blob((*b).clone());
315    }
316    Value::Text(v.to_string())
317}
318
319pub(crate) fn sqlite_value_to_perl(v: Value) -> StrykeValue {
320    match v {
321        Value::Null => StrykeValue::UNDEF,
322        Value::Integer(i) => StrykeValue::integer(i),
323        Value::Real(r) => StrykeValue::float(r),
324        Value::Text(s) => StrykeValue::string(s),
325        Value::Blob(b) => StrykeValue::bytes(Arc::new(b)),
326    }
327}
328
329/// Build a struct instance with defaults evaluated by the interpreter.
330/// Called from interpreter when constructing structs so default expressions can be evaluated.
331pub(crate) fn struct_new_with_defaults(
332    def: &Arc<StructDef>,
333    provided: &[(String, StrykeValue)],
334    defaults: &[Option<StrykeValue>],
335    line: usize,
336) -> StrykeResult<StrykeValue> {
337    let mut values = vec![StrykeValue::UNDEF; def.fields.len()];
338    for (k, v) in provided {
339        let idx = def.field_index(k).ok_or_else(|| {
340            StrykeError::runtime(format!("struct {}: unknown field `{}`", def.name, k), line)
341        })?;
342        let field = &def.fields[idx];
343        field.ty.check_value(v).map_err(|msg| {
344            StrykeError::type_error(format!("struct {} field `{}`: {}", def.name, k, msg), line)
345        })?;
346        values[idx] = v.clone();
347    }
348    for (idx, field) in def.fields.iter().enumerate() {
349        if values[idx].is_undef() {
350            if let Some(dv) = defaults.get(idx).and_then(|o| o.as_ref()) {
351                // Skip type check if default is undef (nullable field pattern)
352                if !dv.is_undef() {
353                    field.ty.check_value(dv).map_err(|msg| {
354                        StrykeError::type_error(
355                            format!(
356                                "struct {} field `{}` default: {}",
357                                def.name, field.name, msg
358                            ),
359                            line,
360                        )
361                    })?;
362                }
363                values[idx] = dv.clone();
364            } else if field.default.is_none() && !matches!(field.ty, crate::ast::PerlTypeName::Any)
365            {
366                return Err(StrykeError::runtime(
367                    format!(
368                        "struct {}: missing field `{}` ({})",
369                        def.name,
370                        field.name,
371                        field.ty.display_name()
372                    ),
373                    line,
374                ));
375            }
376        }
377    }
378    Ok(StrykeValue::struct_inst(Arc::new(StructInstance::new(
379        Arc::clone(def),
380        values,
381    ))))
382}
383
384/// GET `url` and return the response body as a UTF-8 string (invalid UTF-8 is lossy).
385pub(crate) fn fetch(url: &str) -> StrykeResult<StrykeValue> {
386    let s = http_get_body(url)?;
387    Ok(StrykeValue::string(s))
388}
389
390/// GET `url`, parse JSON, map to [`StrykeValue`] (objects → `HashRef`, arrays → `Array`, etc.).
391pub(crate) fn fetch_json(url: &str) -> StrykeResult<StrykeValue> {
392    let s = http_get_body(url)?;
393    let v: JsonValue = serde_json::from_str(&s)
394        .map_err(|e| StrykeError::runtime(format!("fetch_json: {}", e), 0))?;
395    Ok(json_to_perl(v))
396}
397
398fn http_get_body(url: &str) -> StrykeResult<String> {
399    ureq::get(url)
400        .call()
401        .map_err(|e| StrykeError::runtime(format!("fetch: {}", e), 0))?
402        .into_string()
403        .map_err(|e| StrykeError::runtime(format!("fetch: {}", e), 0))
404}
405
406fn perl_hash_lookup(v: &StrykeValue, key: &str) -> Option<StrykeValue> {
407    v.hash_get(key)
408        .or_else(|| v.as_hash_ref().and_then(|r| r.read().get(key).cloned()))
409}
410
411fn perl_opt_lookup(opts: Option<&StrykeValue>, key: &str) -> Option<StrykeValue> {
412    let o = opts?;
413    perl_hash_lookup(o, key)
414}
415
416fn perl_opt_bool(opts: Option<&StrykeValue>, key: &str) -> bool {
417    perl_opt_lookup(opts, key).is_some_and(|v| v.is_true())
418}
419
420fn perl_opt_u64(opts: Option<&StrykeValue>, key: &str) -> Option<u64> {
421    perl_opt_lookup(opts, key).map(|v| v.to_int().max(0) as u64)
422}
423
424fn body_bytes_from_perl(v: &StrykeValue) -> Vec<u8> {
425    if let Some(b) = v.as_bytes_arc() {
426        return b.as_ref().clone();
427    }
428    v.to_string().into_bytes()
429}
430
431fn headers_map_has_content_type(headers_val: &StrykeValue) -> bool {
432    if let Some(m) = headers_val.as_hash_map() {
433        return m.keys().any(|k| k.eq_ignore_ascii_case("content-type"));
434    }
435    if let Some(r) = headers_val.as_hash_ref() {
436        return r
437            .read()
438            .keys()
439            .any(|k| k.eq_ignore_ascii_case("content-type"));
440    }
441    false
442}
443
444fn apply_request_headers(
445    mut req: ureq::Request,
446    headers_val: &StrykeValue,
447) -> StrykeResult<ureq::Request> {
448    let pairs: Vec<(String, String)> = if let Some(m) = headers_val.as_hash_map() {
449        m.iter().map(|(k, v)| (k.clone(), v.to_string())).collect()
450    } else if let Some(r) = headers_val.as_hash_ref() {
451        r.read()
452            .iter()
453            .map(|(k, v)| (k.clone(), v.to_string()))
454            .collect()
455    } else {
456        return Err(StrykeError::runtime(
457            "http_request: headers must be a hash or hashref",
458            0,
459        ));
460    };
461    for (k, v) in pairs {
462        req = req.set(&k, &v);
463    }
464    Ok(req)
465}
466
467/// Full HTTP request: `opts` hash(ref) keys: `method` (default GET), `headers`, `body`, `json`
468/// (encodes body, sets `Content-Type` unless already in `headers`), `timeout` / `timeout_secs`
469/// (omit for 30s; `0` disables client timeout), `binary_response` (body as `BYTES` instead of decoded string).
470///
471/// Returns a hashref: `status`, `status_text`, `headers` (hashref, lowercased names), `body`.
472pub(crate) fn http_request(url: &str, opts: Option<&StrykeValue>) -> StrykeResult<StrykeValue> {
473    let method = perl_opt_lookup(opts, "method")
474        .map(|v| v.to_string())
475        .filter(|s| !s.is_empty())
476        .unwrap_or_else(|| "GET".to_string());
477    let method_uc = method.to_ascii_uppercase();
478    let timeout_secs = perl_opt_u64(opts, "timeout_secs").or_else(|| perl_opt_u64(opts, "timeout"));
479    let binary_response = perl_opt_bool(opts, "binary_response");
480
481    let mut req = ureq::request(method_uc.as_str(), url);
482    match timeout_secs {
483        None => {
484            req = req.timeout(Duration::from_secs(30));
485        }
486        Some(0) => {}
487        Some(n) => {
488            req = req.timeout(Duration::from_secs(n));
489        }
490    }
491
492    if let Some(hv) = opts.and_then(|o| perl_hash_lookup(o, "headers")) {
493        req = apply_request_headers(req, &hv)?;
494    }
495
496    let mut body: Vec<u8> = Vec::new();
497    if let Some(o) = opts {
498        if let Some(jv) = perl_hash_lookup(o, "json") {
499            let jstr = json_encode(&jv)?;
500            if let Some(hv) = perl_hash_lookup(o, "headers") {
501                if !headers_map_has_content_type(&hv) {
502                    req = req.set("Content-Type", "application/json; charset=utf-8");
503                }
504            } else {
505                req = req.set("Content-Type", "application/json; charset=utf-8");
506            }
507            body = jstr.into_bytes();
508        } else if let Some(bv) = perl_hash_lookup(o, "body") {
509            body = body_bytes_from_perl(&bv);
510        }
511    }
512
513    let resp = if body.is_empty() {
514        req.call()
515    } else {
516        req.send_bytes(&body)
517    }
518    .map_err(|e| StrykeError::runtime(format!("http_request: {}", e), 0))?;
519
520    let status = resp.status();
521    let status_text = resp.status_text().to_string();
522    let mut hdr_map = IndexMap::new();
523    let mut names = resp.headers_names();
524    names.sort();
525    names.dedup();
526    for n in names {
527        let vals: Vec<&str> = resp.all(&n);
528        if !vals.is_empty() {
529            hdr_map.insert(n, StrykeValue::string(vals.join(", ")));
530        }
531    }
532    let headers_ref = StrykeValue::hash_ref(Arc::new(RwLock::new(hdr_map)));
533
534    let body_val = if binary_response {
535        let mut buf = Vec::new();
536        resp.into_reader()
537            .read_to_end(&mut buf)
538            .map_err(|e| StrykeError::runtime(format!("http_request: body read: {}", e), 0))?;
539        StrykeValue::bytes(Arc::new(buf))
540    } else {
541        let s = resp
542            .into_string()
543            .map_err(|e| StrykeError::runtime(format!("http_request: body: {}", e), 0))?;
544        StrykeValue::string(s)
545    };
546
547    let mut out = IndexMap::new();
548    out.insert("status".into(), StrykeValue::integer(status as i64));
549    out.insert("status_text".into(), StrykeValue::string(status_text));
550    out.insert("headers".into(), headers_ref);
551    out.insert("body".into(), body_val);
552    Ok(StrykeValue::hash_ref(Arc::new(RwLock::new(out))))
553}
554
555/// Parse JSON from the `body` field of an [`http_request`] result hashref.
556pub(crate) fn http_response_json_body(res: &StrykeValue) -> StrykeResult<StrykeValue> {
557    let body = perl_hash_lookup(res, "body")
558        .ok_or_else(|| StrykeError::runtime("fetch_json: http response missing body", 0))?;
559    let s = if let Some(b) = body.as_bytes_arc() {
560        String::from_utf8_lossy(b.as_ref()).into_owned()
561    } else {
562        body.to_string()
563    };
564    json_decode(&s)
565}
566
567/// Serialize a [`StrykeValue`] to a JSON string (arrays, hashes, refs, structs, scalars; not code/refs/IO).
568pub(crate) fn json_encode(v: &StrykeValue) -> StrykeResult<String> {
569    let j = perl_to_json_value(v)?;
570    serde_json::to_string(&j).map_err(|e| StrykeError::runtime(format!("json_encode: {}", e), 0))
571}
572
573/// Parse a JSON string into [`StrykeValue`] (same mapping as [`fetch_json`]).
574pub(crate) fn json_decode(s: &str) -> StrykeResult<StrykeValue> {
575    let v: JsonValue = serde_json::from_str(s.trim())
576        .map_err(|e| StrykeError::runtime(format!("json_decode: {}", e), 0))?;
577    Ok(json_to_perl(v))
578}
579
580/// Run a [jq](https://jqlang.org/)-syntax filter (via [jaq](https://github.com/01mf02/jaq)) on JSON
581/// derived from `data` (same encodable shapes as [`json_encode`]).
582///
583/// Returns `undef` if the filter yields no values, a single Perl value if it yields one output,
584/// or an array of values if it yields more than one (e.g. `.items[]`).
585pub(crate) fn json_jq(data: &StrykeValue, filter_src: &str) -> StrykeResult<StrykeValue> {
586    let j = perl_to_json_value(data)?;
587    let input: jaq_json::Val = serde_json::from_value(j)
588        .map_err(|e| StrykeError::runtime(format!("json_jq: could not convert input: {}", e), 0))?;
589
590    let arena = jaq_core::load::Arena::default();
591    let defs = jaq_core::defs()
592        .chain(jaq_std::defs())
593        .chain(jaq_json::defs());
594    let loader = jaq_core::load::Loader::new(defs);
595    let file = jaq_core::load::File {
596        code: filter_src,
597        path: (),
598    };
599    let modules = loader
600        .load(&arena, file)
601        .map_err(|e| StrykeError::runtime(format!("json_jq: parse/load: {:?}", e), 0))?;
602
603    type JData = JustLut<jaq_json::Val>;
604    let filter = jaq_core::Compiler::default()
605        .with_funs(
606            jaq_core::funs::<JData>()
607                .chain(jaq_std::funs::<JData>())
608                .chain(jaq_json::funs::<JData>()),
609        )
610        .compile(modules)
611        .map_err(|e| StrykeError::runtime(format!("json_jq: compile: {:?}", e), 0))?;
612
613    let ctx = jaq_core::Ctx::<JData>::new(&filter.lut, jaq_core::Vars::new([]));
614    let mut results = Vec::new();
615    for x in filter.id.run((ctx, input)) {
616        match jaq_core::unwrap_valr(x) {
617            Ok(v) => results.push(jaq_json_val_to_perl(v)?),
618            Err(e) => {
619                return Err(StrykeError::runtime(format!("json_jq: {}", e), 0));
620            }
621        }
622    }
623
624    match results.len() {
625        0 => Ok(StrykeValue::UNDEF),
626        1 => Ok(results.pop().expect("one")),
627        _ => Ok(StrykeValue::array(results)),
628    }
629}
630
631fn jaq_json_val_to_perl(v: jaq_json::Val) -> StrykeResult<StrykeValue> {
632    use jaq_json::Val as Jv;
633    match v {
634        Jv::Null => Ok(StrykeValue::UNDEF),
635        Jv::Bool(b) => Ok(StrykeValue::integer(i64::from(b))),
636        Jv::Num(n) => jaq_num_to_perl(n),
637        Jv::BStr(b) => Ok(StrykeValue::string(
638            String::from_utf8_lossy(&b).into_owned(),
639        )),
640        Jv::TStr(b) => Ok(StrykeValue::string(
641            String::from_utf8_lossy(&b).into_owned(),
642        )),
643        Jv::Arr(a) => {
644            let v = a.as_ref();
645            let mut out = Vec::with_capacity(v.len());
646            for x in v.iter() {
647                out.push(jaq_json_val_to_perl(x.clone())?);
648            }
649            Ok(StrykeValue::array(out))
650        }
651        Jv::Obj(o) => {
652            let mut map = IndexMap::new();
653            for (k, val) in o.iter() {
654                map.insert(k.to_string(), jaq_json_val_to_perl(val.clone())?);
655            }
656            Ok(StrykeValue::hash_ref(Arc::new(RwLock::new(map))))
657        }
658    }
659}
660
661fn jaq_num_to_perl(n: jaq_json::Num) -> StrykeResult<StrykeValue> {
662    use jaq_json::Num as Jn;
663    match n {
664        Jn::Int(i) => Ok(StrykeValue::integer(i as i64)),
665        Jn::Float(f) => Ok(StrykeValue::float(f)),
666        Jn::BigInt(r) => {
667            let bi = (*r).clone();
668            if let Some(i) = bi.to_i64() {
669                Ok(StrykeValue::integer(i))
670            } else if let Some(f) = bi.to_f64() {
671                Ok(StrykeValue::float(f))
672            } else {
673                Ok(StrykeValue::string(bi.to_string()))
674            }
675        }
676        Jn::Dec(s) => {
677            let f: f64 = s.parse().unwrap_or(f64::NAN);
678            Ok(StrykeValue::float(f))
679        }
680    }
681}
682
683pub(crate) fn perl_to_json_value(v: &StrykeValue) -> StrykeResult<JsonValue> {
684    if v.is_undef() {
685        return Ok(JsonValue::Null);
686    }
687    if let Some(n) = v.as_integer() {
688        return Ok(JsonValue::Number(n.into()));
689    }
690    if let Some(f) = v.as_float() {
691        return serde_json::Number::from_f64(f)
692            .map(JsonValue::Number)
693            .ok_or_else(|| StrykeError::runtime("json_encode: non-finite float", 0));
694    }
695    if crate::nanbox::is_raw_float_bits(v.0) {
696        let f = f64::from_bits(v.0);
697        return serde_json::Number::from_f64(f)
698            .map(JsonValue::Number)
699            .ok_or_else(|| StrykeError::runtime("json_encode: non-finite float", 0));
700    }
701    if let Some(a) = v.as_array_vec() {
702        let mut out = Vec::with_capacity(a.len());
703        for x in &a {
704            out.push(perl_to_json_value(x)?);
705        }
706        return Ok(JsonValue::Array(out));
707    }
708    if let Some(h) = v.as_hash_map() {
709        let mut m = serde_json::Map::new();
710        for (k, val) in h.iter() {
711            m.insert(k.clone(), perl_to_json_value(val)?);
712        }
713        return Ok(JsonValue::Object(m));
714    }
715    if let Some(r) = v.as_array_ref() {
716        let g = r.read();
717        let mut out = Vec::with_capacity(g.len());
718        for x in g.iter() {
719            out.push(perl_to_json_value(x)?);
720        }
721        return Ok(JsonValue::Array(out));
722    }
723    if let Some(r) = v.as_hash_ref() {
724        let g = r.read();
725        let mut m = serde_json::Map::new();
726        for (k, val) in g.iter() {
727            m.insert(k.clone(), perl_to_json_value(val)?);
728        }
729        return Ok(JsonValue::Object(m));
730    }
731    if let Some(r) = v.as_scalar_ref() {
732        return perl_to_json_value(&r.read());
733    }
734    if let Some(a) = v.as_atomic_arc() {
735        return perl_to_json_value(&a.lock().clone());
736    }
737    if let Some(s) = v.as_str() {
738        return Ok(JsonValue::String(s));
739    }
740    if let Some(b) = v.as_bytes_arc() {
741        return Ok(JsonValue::String(String::from_utf8_lossy(&b).into_owned()));
742    }
743    if let Some(si) = v.as_struct_inst() {
744        let mut m = serde_json::Map::new();
745        let values = si.get_values();
746        for (i, field) in si.def.fields.iter().enumerate() {
747            if let Some(fv) = values.get(i) {
748                m.insert(field.name.clone(), perl_to_json_value(fv)?);
749            }
750        }
751        return Ok(JsonValue::Object(m));
752    }
753    if let Some(b) = v.as_blessed_ref() {
754        let inner = b.data.read().clone();
755        return perl_to_json_value(&inner);
756    }
757    if let Some(vals) = v
758        .with_heap(|h| match h {
759            HeapObject::Set(s) => Some(s.values().cloned().collect::<Vec<_>>()),
760            _ => None,
761        })
762        .flatten()
763    {
764        let mut out = Vec::with_capacity(vals.len());
765        for x in vals {
766            out.push(perl_to_json_value(&x)?);
767        }
768        return Ok(JsonValue::Array(out));
769    }
770    if let Some(vals) = v
771        .with_heap(|h| match h {
772            HeapObject::Deque(d) => Some(d.lock().iter().cloned().collect::<Vec<_>>()),
773            _ => None,
774        })
775        .flatten()
776    {
777        let mut out = Vec::with_capacity(vals.len());
778        for x in vals {
779            out.push(perl_to_json_value(&x)?);
780        }
781        return Ok(JsonValue::Array(out));
782    }
783
784    if let Some(df) = v.as_dataframe() {
785        let g = df.lock();
786        let n = g.nrows();
787        let mut rows = Vec::with_capacity(n);
788        for r in 0..n {
789            let mut m = serde_json::Map::new();
790            for (i, col) in g.columns.iter().enumerate() {
791                m.insert(col.clone(), perl_to_json_value(&g.cols[i][r])?);
792            }
793            rows.push(JsonValue::Object(m));
794        }
795        return Ok(JsonValue::Array(rows));
796    }
797
798    Err(StrykeError::runtime(
799        format!(
800            "json_encode: value cannot be encoded as JSON ({})",
801            v.type_name()
802        ),
803        0,
804    ))
805}
806
807fn json_to_perl(v: JsonValue) -> StrykeValue {
808    match v {
809        JsonValue::Null => StrykeValue::UNDEF,
810        JsonValue::Bool(b) => StrykeValue::integer(i64::from(b)),
811        JsonValue::Number(n) => {
812            if let Some(i) = n.as_i64() {
813                StrykeValue::integer(i)
814            } else if let Some(u) = n.as_u64() {
815                StrykeValue::integer(u as i64)
816            } else {
817                StrykeValue::float(n.as_f64().unwrap_or(0.0))
818            }
819        }
820        JsonValue::String(s) => StrykeValue::string(s),
821        JsonValue::Array(a) => StrykeValue::array(a.into_iter().map(json_to_perl).collect()),
822        JsonValue::Object(o) => {
823            let mut map = IndexMap::new();
824            for (k, v) in o {
825                map.insert(k, json_to_perl(v));
826            }
827            StrykeValue::hash_ref(Arc::new(RwLock::new(map)))
828        }
829    }
830}
831
832#[cfg(test)]
833mod http_json_tests {
834    use super::*;
835
836    #[test]
837    fn json_to_perl_object_hashref() {
838        let v: JsonValue = serde_json::from_str(r#"{"name":"a","n":1}"#).unwrap();
839        let p = json_to_perl(v);
840        let r = p.as_hash_ref().expect("expected HashRef");
841        let g = r.read();
842        assert_eq!(g.get("name").unwrap().to_string(), "a");
843        assert_eq!(g.get("n").unwrap().to_int(), 1);
844    }
845
846    #[test]
847    fn json_to_perl_array() {
848        let v: JsonValue = serde_json::from_str(r#"[1,"x",null]"#).unwrap();
849        let p = json_to_perl(v);
850        let a = p.as_array_vec().expect("expected Array");
851        assert_eq!(a.len(), 3);
852        assert_eq!(a[0].to_int(), 1);
853        assert_eq!(a[1].to_string(), "x");
854        assert!(a[2].is_undef());
855    }
856
857    #[test]
858    fn json_encode_decode_roundtrip() {
859        let p = StrykeValue::array(vec![
860            StrykeValue::integer(1),
861            StrykeValue::string("x".into()),
862            StrykeValue::UNDEF,
863        ]);
864        let s = json_encode(&p).expect("encode");
865        let back = json_decode(&s).expect("decode");
866        let a = back.as_array_vec().expect("array");
867        assert_eq!(a.len(), 3);
868        assert_eq!(a[0].to_int(), 1);
869        assert_eq!(a[1].to_string(), "x");
870        assert!(a[2].is_undef());
871    }
872
873    #[test]
874    fn json_encode_hash_roundtrip() {
875        let mut m = IndexMap::new();
876        m.insert("a".into(), StrykeValue::integer(2));
877        let p = StrykeValue::hash(m);
878        let s = json_encode(&p).expect("encode");
879        assert!(s.contains("\"a\""));
880        let back = json_decode(&s).expect("decode");
881        let h = back.as_hash_ref().expect("hashref");
882        assert_eq!(h.read().get("a").unwrap().to_int(), 2);
883    }
884
885    #[test]
886    fn json_jq_field_select() {
887        let p = json_decode(r#"{"a":1,"b":{"c":3}}"#).unwrap();
888        let out = json_jq(&p, ".b.c").unwrap();
889        assert_eq!(out.to_int(), 3);
890    }
891
892    #[test]
893    fn json_jq_map_select_multiple_yields_array() {
894        let p = json_decode(r#"[1,2,3,4]"#).unwrap();
895        let out = json_jq(&p, "map(select(. > 2))").unwrap();
896        let a = out.as_array_vec().expect("array");
897        assert_eq!(a.len(), 2);
898        assert_eq!(a[0].to_int(), 3);
899        assert_eq!(a[1].to_int(), 4);
900    }
901
902    #[test]
903    fn test_dataframe_from_path() {
904        let tmp = std::env::temp_dir().join(format!("test_df_{}.csv", std::process::id()));
905        let csv_data = "id,name,val\n1,alice,10.5\n2,bob,20.0\n";
906        std::fs::write(&tmp, csv_data).expect("write csv");
907
908        let df_val = dataframe_from_path(tmp.to_str().unwrap()).expect("dataframe_from_path");
909        let df_lock = df_val.as_dataframe().expect("as_dataframe");
910        let df = df_lock.lock();
911
912        assert_eq!(df.columns, vec!["id", "name", "val"]);
913        assert_eq!(df.cols.len(), 3);
914        assert_eq!(df.cols[0][0].to_string(), "1");
915        assert_eq!(df.cols[1][1].to_string(), "bob");
916        assert_eq!(df.cols[2][0].to_string(), "10.5");
917
918        let _ = std::fs::remove_file(&tmp);
919    }
920}