Skip to main content

jetro_core/eval/
builtins.rs

1//! Static builtin registry — zero vtable overhead via raw function pointers.
2//!
3//! All builtins are `fn(Val, &[Arg], &Env) -> Result<Val, EvalError>`.
4//! The registry is initialized once on first use via `OnceLock`.
5
6use std::collections::HashMap;
7use std::sync::OnceLock;
8use std::sync::Arc;
9
10use crate::ast::Arg;
11
12use super::{Env, EvalError, eval_pos, apply_item};
13use super::value::Val;
14use super::util::{val_to_string, val_str, field_exists_nested};
15use super::{func_strings, func_arrays, func_objects, func_paths, func_aggregates, func_csv, func_search};
16
17macro_rules! err {
18    ($($t:tt)*) => { Err(EvalError(format!($($t)*))) };
19}
20
21// ── Type alias ────────────────────────────────────────────────────────────────
22
23pub type BuiltinFn = fn(Val, &[Arg], &Env) -> Result<Val, EvalError>;
24
25// ── Registry ──────────────────────────────────────────────────────────────────
26
27pub struct BuiltinRegistry {
28    table: HashMap<&'static str, BuiltinFn>,
29}
30
31impl BuiltinRegistry {
32    #[inline]
33    pub fn get(&self, name: &str) -> Option<BuiltinFn> {
34        self.table.get(name).copied()
35    }
36
37    /// Iterate all registered builtin names (includes snake_case and camelCase aliases).
38    pub fn names(&self) -> impl Iterator<Item = &'static str> + '_ {
39        self.table.keys().copied()
40    }
41}
42
43/// Convenience — all builtin method names as a sorted `Vec<&'static str>`.
44pub fn all_names() -> Vec<&'static str> {
45    let mut v: Vec<&'static str> = global().names().collect();
46    v.sort_unstable();
47    v
48}
49
50static BUILTINS: OnceLock<BuiltinRegistry> = OnceLock::new();
51
52pub fn global() -> &'static BuiltinRegistry {
53    BUILTINS.get_or_init(build)
54}
55
56// ── Build ─────────────────────────────────────────────────────────────────────
57
58fn build() -> BuiltinRegistry {
59    let mut t: HashMap<&'static str, BuiltinFn> = HashMap::with_capacity(128);
60
61    // Basics
62    t.insert("len",        b_len);
63    t.insert("type",       b_type);
64    t.insert("to_string",  b_to_string);
65    t.insert("toString",   b_to_string);
66    t.insert("to_json",    b_to_json);
67    t.insert("toJson",     b_to_json);
68    t.insert("from_json",  b_from_json);
69    t.insert("fromJson",   b_from_json);
70
71    // Object
72    t.insert("keys",        b_keys);
73    t.insert("values",      b_values);
74    t.insert("entries",     b_entries);
75    t.insert("to_pairs",    b_to_pairs);
76    t.insert("toPairs",     b_to_pairs);
77    t.insert("from_pairs",  b_from_pairs);
78    t.insert("fromPairs",   b_from_pairs);
79    t.insert("invert",      b_invert);
80    t.insert("pick",        func_objects::pick);
81    t.insert("omit",        func_objects::omit);
82    t.insert("merge",       func_objects::merge);
83    t.insert("deep_merge",  func_objects::deep_merge_method);
84    t.insert("deepMerge",   func_objects::deep_merge_method);
85    t.insert("defaults",    func_objects::defaults);
86    t.insert("rename",      func_objects::rename);
87    t.insert("transform_keys",   func_objects::transform_keys);
88    t.insert("transformKeys",    func_objects::transform_keys);
89    t.insert("transform_values", func_objects::transform_values);
90    t.insert("transformValues",  func_objects::transform_values);
91    t.insert("filter_keys",   func_objects::filter_keys);
92    t.insert("filterKeys",    func_objects::filter_keys);
93    t.insert("filter_values", func_objects::filter_values);
94    t.insert("filterValues",  func_objects::filter_values);
95    t.insert("pivot",       func_objects::pivot);
96
97    // Arrays — full signature
98    t.insert("filter",    func_arrays::filter);
99    t.insert("find",      func_arrays::find);        // Tier 1 (shallow, multi-pred AND)
100    t.insert("find_all",  func_arrays::find);        // Tier 1 (shallow, multi-pred AND)
101    t.insert("findAll",   func_arrays::find);
102    t.insert("map",       func_arrays::map);
103    t.insert("flatMap",   func_arrays::flat_map);
104    t.insert("flat_map",  func_arrays::flat_map);
105    t.insert("sort",      func_arrays::sort);
106    t.insert("flatten",   func_arrays::flatten);
107    t.insert("join",      func_arrays::join);
108    t.insert("equi_join", func_arrays::equi_join);
109    t.insert("equiJoin",  func_arrays::equi_join);
110    t.insert("first",     func_arrays::first);
111    t.insert("last",      func_arrays::last);
112    t.insert("nth",       func_arrays::nth);
113    t.insert("append",    func_arrays::append);
114    t.insert("prepend",   func_arrays::prepend);
115    t.insert("remove",    func_arrays::remove);
116    t.insert("diff",      func_arrays::diff);
117    t.insert("intersect", func_arrays::intersect);
118    t.insert("union",     func_arrays::union);
119    t.insert("enumerate", func_arrays::enumerate);
120    t.insert("window",    func_arrays::window);
121    t.insert("chunk",     func_arrays::chunk);
122    t.insert("batch",     func_arrays::chunk);
123    t.insert("takewhile", func_arrays::takewhile);
124    t.insert("take_while",func_arrays::takewhile);
125    t.insert("dropwhile", func_arrays::dropwhile);
126    t.insert("drop_while",func_arrays::dropwhile);
127    t.insert("accumulate",func_arrays::accumulate);
128    t.insert("partition", func_arrays::partition);
129    t.insert("zip",       func_arrays::zip_method);
130    t.insert("zip_longest",  func_arrays::zip_longest_method);
131    t.insert("zipLongest",   func_arrays::zip_longest_method);
132
133    // Arrays — zero-extra-arg wrappers
134    t.insert("reverse",  b_reverse);
135    t.insert("unique",   b_unique);
136    t.insert("distinct", b_unique);
137    t.insert("compact",  b_compact);
138    t.insert("pairwise", b_pairwise);
139
140    // Tier 1 search / match / collect
141    t.insert("unique_by", func_search::unique_by);
142    t.insert("uniqueBy",  func_search::unique_by);
143    t.insert("collect",   func_search::collect);
144    t.insert("deep_find", func_search::deep_find);
145    t.insert("deepFind",  func_search::deep_find);
146    t.insert("deep_shape", func_search::deep_shape);
147    t.insert("deepShape",  func_search::deep_shape);
148    t.insert("deep_like",  func_search::deep_like);
149    t.insert("deepLike",   func_search::deep_like);
150    t.insert("walk",       func_search::walk);
151    t.insert("walk_pre",   func_search::walk_pre_fn);
152    t.insert("walkPre",    func_search::walk_pre_fn);
153    t.insert("schema",     func_objects::schema);
154    t.insert("rec",        func_search::rec);
155    t.insert("trace_path", func_search::trace_path);
156    t.insert("tracePath",  func_search::trace_path);
157    t.insert("fanout",     func_objects::fanout);
158    t.insert("zip_shape",  func_objects::zip_shape);
159    t.insert("zipShape",   func_objects::zip_shape);
160
161    // Aggregates — full signature
162    t.insert("sum",      func_aggregates::sum);
163    t.insert("avg",      func_aggregates::avg);
164    t.insert("count",    func_aggregates::count);
165    t.insert("groupBy",  func_aggregates::group_by);
166    t.insert("group_by", func_aggregates::group_by);
167    t.insert("countBy",  func_aggregates::count_by);
168    t.insert("count_by", func_aggregates::count_by);
169    t.insert("indexBy",  func_aggregates::index_by);
170    t.insert("index_by", func_aggregates::index_by);
171    t.insert("explode",     func_aggregates::explode);
172    t.insert("implode",     func_aggregates::implode);
173    t.insert("groupShape",  func_aggregates::group_shape);
174    t.insert("group_shape", func_aggregates::group_shape);
175
176    // Aggregates — bool-flag wrappers
177    t.insert("min", b_min);
178    t.insert("max", b_max);
179    t.insert("any", b_any);
180    t.insert("all", b_all);
181
182    // Numeric scalar ops
183    t.insert("ceil",  b_ceil);
184    t.insert("floor", b_floor);
185    t.insert("round", b_round);
186    t.insert("abs",   b_abs);
187
188    // Paths
189    t.insert("get_path",       func_paths::get_path);
190    t.insert("getPath",        func_paths::get_path);
191    t.insert("set_path",       func_paths::set_path);
192    t.insert("setPath",        func_paths::set_path);
193    t.insert("del_path",       func_paths::del_path);
194    t.insert("delPath",        func_paths::del_path);
195    t.insert("del_paths",      func_paths::del_paths);
196    t.insert("delPaths",       func_paths::del_paths);
197    t.insert("has_path",       func_paths::has_path);
198    t.insert("hasPath",        func_paths::has_path);
199    t.insert("flatten_keys",   func_paths::flatten_keys);
200    t.insert("flattenKeys",    func_paths::flatten_keys);
201    t.insert("unflatten_keys", func_paths::unflatten_keys);
202    t.insert("unflattenKeys",  func_paths::unflatten_keys);
203
204    // CSV
205    t.insert("to_csv", b_to_csv);
206    t.insert("toCsv",  b_to_csv);
207    t.insert("to_tsv", b_to_tsv);
208    t.insert("toTsv",  b_to_tsv);
209
210    // Null safety / existence
211    t.insert("or",       b_or);
212    t.insert("has",      b_has);
213    t.insert("missing",  b_missing);
214    t.insert("includes", b_includes);
215    t.insert("contains", b_includes);
216
217    // Update / set
218    t.insert("set",    b_set);
219    t.insert("update", b_update);
220
221    // String methods
222    t.insert("upper",          func_strings::upper);
223    t.insert("lower",          func_strings::lower);
224    t.insert("capitalize",     func_strings::capitalize);
225    t.insert("title_case",     func_strings::title_case);
226    t.insert("titleCase",      func_strings::title_case);
227    t.insert("trim",           func_strings::trim);
228    t.insert("trim_left",      func_strings::trim_left);
229    t.insert("trimLeft",       func_strings::trim_left);
230    t.insert("trim_right",     func_strings::trim_right);
231    t.insert("trimRight",      func_strings::trim_right);
232    t.insert("lines",          func_strings::lines);
233    t.insert("words",          func_strings::words);
234    t.insert("chars",          func_strings::chars);
235    t.insert("to_number",      func_strings::to_number);
236    t.insert("toNumber",       func_strings::to_number);
237    t.insert("to_bool",        func_strings::to_bool);
238    t.insert("toBool",         func_strings::to_bool);
239    t.insert("to_base64",      func_strings::to_base64);
240    t.insert("toBase64",       func_strings::to_base64);
241    t.insert("from_base64",    func_strings::from_base64);
242    t.insert("fromBase64",     func_strings::from_base64);
243    t.insert("url_encode",     func_strings::url_encode);
244    t.insert("urlEncode",      func_strings::url_encode);
245    t.insert("url_decode",     func_strings::url_decode);
246    t.insert("urlDecode",      func_strings::url_decode);
247    t.insert("html_escape",    func_strings::html_escape);
248    t.insert("htmlEscape",     func_strings::html_escape);
249    t.insert("html_unescape",  func_strings::html_unescape);
250    t.insert("htmlUnescape",   func_strings::html_unescape);
251    t.insert("repeat",         func_strings::repeat);
252    t.insert("pad_left",       func_strings::pad_left);
253    t.insert("padLeft",        func_strings::pad_left);
254    t.insert("pad_right",      func_strings::pad_right);
255    t.insert("padRight",       func_strings::pad_right);
256    t.insert("starts_with",    func_strings::starts_with);
257    t.insert("startsWith",     func_strings::starts_with);
258    t.insert("ends_with",      func_strings::ends_with);
259    t.insert("endsWith",       func_strings::ends_with);
260    t.insert("index_of",       func_strings::index_of);
261    t.insert("indexOf",        func_strings::index_of);
262    t.insert("last_index_of",  func_strings::last_index_of);
263    t.insert("lastIndexOf",    func_strings::last_index_of);
264    t.insert("replace",        func_strings::replace);
265    t.insert("replace_all",    func_strings::replace_all);
266    t.insert("replaceAll",     func_strings::replace_all);
267    t.insert("strip_prefix",   func_strings::strip_prefix);
268    t.insert("stripPrefix",    func_strings::strip_prefix);
269    t.insert("strip_suffix",   func_strings::strip_suffix);
270    t.insert("stripSuffix",    func_strings::strip_suffix);
271    t.insert("slice",          func_strings::str_slice);
272    t.insert("split",          func_strings::split);
273    t.insert("indent",         func_strings::indent);
274    t.insert("dedent",         func_strings::dedent);
275    t.insert("matches",        func_strings::str_matches);
276    t.insert("scan",           func_strings::scan);
277
278    BuiltinRegistry { table: t }
279}
280
281// ── Wrapper functions ─────────────────────────────────────────────────────────
282
283fn b_len(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
284    Ok(Val::Int(match &recv {
285        Val::Arr(a) => a.len() as i64,
286        Val::IntVec(a) => a.len() as i64,
287        Val::FloatVec(a) => a.len() as i64,
288        Val::Obj(m) => m.len() as i64,
289        Val::Str(s) => s.chars().count() as i64,
290        _ => return err!("len: unsupported type"),
291    }))
292}
293
294fn b_type(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
295    Ok(Val::Str(Arc::from(recv.type_name())))
296}
297
298fn b_to_string(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
299    Ok(val_str(&val_to_string(&recv)))
300}
301
302fn b_to_json(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
303    // Fast path: primitive scalars serialise without a serde_json::Value
304    // detour.  to_json() gets called per-element in map pipelines, and the
305    // Val -> serde_json::Value conversion was the dominant cost there.
306    match &recv {
307        Val::Int(n)  => return Ok(val_str(&n.to_string())),
308        Val::Float(f) => {
309            if f.is_finite() {
310                let v = serde_json::Value::from(*f);
311                return Ok(val_str(&serde_json::to_string(&v).unwrap_or_default()));
312            } else {
313                return Ok(val_str("null"));
314            }
315        }
316        Val::Bool(b) => return Ok(val_str(if *b { "true" } else { "false" })),
317        Val::Null    => return Ok(val_str("null")),
318        Val::Str(s)  => {
319            let v = serde_json::Value::String(s.to_string());
320            return Ok(val_str(&serde_json::to_string(&v).unwrap_or_default()));
321        }
322        _ => {}
323    }
324    let sv: serde_json::Value = recv.into();
325    Ok(val_str(&serde_json::to_string(&sv).unwrap_or_default()))
326}
327
328fn b_from_json(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
329    // Direct one-pass parse via `Val::deserialize` — no intermediate
330    // `serde_json::Value` tree.  For `Val::Str` receivers we skip the
331    // buffer deep-clone too.  With the `simd-json` feature enabled,
332    // route through simd-json's SIMD structural scanner instead; the
333    // str receiver path copies once into a mutable buffer (required by
334    // simd-json), still faster than serde_json on docs over ~4KB.
335    #[cfg(feature = "simd-json")]
336    {
337        let bytes_owned: Vec<u8> = match &recv {
338            Val::Str(s) => s.as_bytes().to_vec(),
339            _           => val_to_string(&recv).into_bytes(),
340        };
341        let mut bytes = bytes_owned;
342        return Val::from_json_simd(&mut bytes)
343            .map_err(|e| EvalError(format!("from_json: {}", e)));
344    }
345    #[cfg(not(feature = "simd-json"))]
346    match &recv {
347        Val::Str(s) => Val::from_json_str(s.as_ref())
348            .map_err(|e| EvalError(format!("from_json: {}", e))),
349        _ => {
350            let s = val_to_string(&recv);
351            Val::from_json_str(&s)
352                .map_err(|e| EvalError(format!("from_json: {}", e)))
353        }
354    }
355}
356
357fn b_keys(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
358    func_objects::keys(recv)
359}
360
361fn b_values(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
362    func_objects::values(recv)
363}
364
365fn b_entries(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
366    func_objects::entries(recv)
367}
368
369fn b_to_pairs(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
370    func_objects::to_pairs(recv)
371}
372
373fn b_from_pairs(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
374    func_objects::from_pairs(recv)
375}
376
377fn b_invert(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
378    func_objects::invert(recv)
379}
380
381fn b_reverse(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
382    func_arrays::reverse(recv)
383}
384
385fn b_unique(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
386    func_arrays::unique(recv)
387}
388
389fn b_compact(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
390    func_arrays::compact(recv)
391}
392
393fn b_pairwise(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
394    func_arrays::pairwise(recv)
395}
396
397fn b_min(recv: Val, args: &[Arg], env: &Env) -> Result<Val, EvalError> {
398    func_aggregates::minmax(recv, args, env, false)
399}
400
401fn b_max(recv: Val, args: &[Arg], env: &Env) -> Result<Val, EvalError> {
402    func_aggregates::minmax(recv, args, env, true)
403}
404
405fn b_any(recv: Val, args: &[Arg], env: &Env) -> Result<Val, EvalError> {
406    func_aggregates::any_all(recv, args, env, false)
407}
408
409fn b_all(recv: Val, args: &[Arg], env: &Env) -> Result<Val, EvalError> {
410    func_aggregates::any_all(recv, args, env, true)
411}
412
413fn b_ceil(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
414    match recv {
415        Val::Int(n)   => Ok(Val::Int(n)),
416        Val::Float(f) => Ok(Val::Int(f.ceil() as i64)),
417        _ => err!("ceil: expected number"),
418    }
419}
420
421fn b_floor(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
422    match recv {
423        Val::Int(n)   => Ok(Val::Int(n)),
424        Val::Float(f) => Ok(Val::Int(f.floor() as i64)),
425        _ => err!("floor: expected number"),
426    }
427}
428
429fn b_round(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
430    match recv {
431        Val::Int(n)   => Ok(Val::Int(n)),
432        // Banker's rounding is the IEEE default; jq uses `round()` which
433        // ties-away-from-zero.  Rust's `f64::round` matches jq here.
434        Val::Float(f) => Ok(Val::Int(f.round() as i64)),
435        _ => err!("round: expected number"),
436    }
437}
438
439fn b_abs(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
440    match recv {
441        Val::Int(n)   => Ok(Val::Int(n.wrapping_abs())),
442        Val::Float(f) => Ok(Val::Float(f.abs())),
443        _ => err!("abs: expected number"),
444    }
445}
446
447fn b_to_csv(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
448    Ok(val_str(&func_csv::to_csv(&recv)))
449}
450
451fn b_to_tsv(recv: Val, _: &[Arg], _: &Env) -> Result<Val, EvalError> {
452    Ok(val_str(&func_csv::to_tsv(&recv)))
453}
454
455fn b_or(recv: Val, args: &[Arg], env: &Env) -> Result<Val, EvalError> {
456    let default = args.first().map(|a| eval_pos(a, env)).transpose()?.unwrap_or(Val::Null);
457    if recv.is_null() { Ok(default) } else { Ok(recv) }
458}
459
460fn b_has(recv: Val, args: &[Arg], env: &Env) -> Result<Val, EvalError> {
461    let result = args.iter().all(|a| {
462        eval_pos(a, env).ok()
463            .and_then(|v| v.as_str().map(|s| s.to_string()))
464            .map(|key| field_exists_nested(&recv, &key))
465            .unwrap_or(false)
466    });
467    Ok(Val::Bool(result))
468}
469
470fn b_missing(recv: Val, args: &[Arg], env: &Env) -> Result<Val, EvalError> {
471    let key = args.first().map(|a| eval_pos(a, env)).transpose()?
472        .and_then(|v| v.as_str().map(|s| s.to_string()))
473        .unwrap_or_default();
474    Ok(Val::Bool(!field_exists_nested(&recv, &key)))
475}
476
477fn b_includes(recv: Val, args: &[Arg], env: &Env) -> Result<Val, EvalError> {
478    let item = args.first().map(|a| eval_pos(a, env)).transpose()?.unwrap_or(Val::Null);
479    use super::util::val_to_key;
480    let key = val_to_key(&item);
481    Ok(Val::Bool(match &recv {
482        Val::Arr(a)    => a.iter().any(|v| val_to_key(v) == key),
483        Val::IntVec(a) => a.iter().any(|n| val_to_key(&Val::Int(*n)) == key),
484        Val::FloatVec(a) => a.iter().any(|f| val_to_key(&Val::Float(*f)) == key),
485        Val::StrVec(a) => match item.as_str() {
486            Some(needle) => a.iter().any(|s| s.as_ref() == needle),
487            None => false,
488        },
489        Val::Str(s) => s.contains(item.as_str().unwrap_or_default()),
490        _ => false,
491    }))
492}
493
494fn b_set(recv: Val, args: &[Arg], env: &Env) -> Result<Val, EvalError> {
495    let _ = recv;
496    Ok(args.first().map(|a| eval_pos(a, env)).transpose()?.unwrap_or(Val::Null))
497}
498
499fn b_update(recv: Val, args: &[Arg], env: &Env) -> Result<Val, EvalError> {
500    let lam = args.first().ok_or_else(|| EvalError("update: requires lambda".into()))?;
501    apply_item(recv, lam, env)
502}