jaq_std/
lib.rs

1//! Standard library for the jq language.
2//!
3//! The standard library provides a set of filters.
4//! These filters are either implemented as definitions or as functions.
5//! For example, the standard library provides the `map(f)` filter,
6//! which is defined using the more elementary filter `[.[] | f]`.
7//!
8//! If you want to use the standard library in jaq, then
9//! you'll likely only need [`funs`] and [`defs`].
10//! Most other functions are relevant if you
11//! want to implement your own native filters.
12#![no_std]
13#![forbid(unsafe_code)]
14#![warn(missing_docs)]
15
16extern crate alloc;
17#[cfg(feature = "std")]
18extern crate std;
19
20pub mod input;
21#[cfg(feature = "math")]
22mod math;
23#[cfg(feature = "regex")]
24mod regex;
25#[cfg(feature = "time")]
26mod time;
27
28use alloc::string::{String, ToString};
29use alloc::{boxed::Box, vec::Vec};
30use bstr::{BStr, ByteSlice};
31use jaq_core::box_iter::{box_once, then, BoxIter};
32use jaq_core::{load, Bind, Cv, DataT, Error, Exn, Native, RunPtr, ValR, ValT as _, ValX, ValXs};
33
34/// Definitions of the standard library.
35pub fn defs() -> impl Iterator<Item = load::parse::Def<&'static str>> {
36    load::parse(include_str!("defs.jq"), |p| p.defs())
37        .unwrap()
38        .into_iter()
39}
40
41/// Name, arguments, and implementation of a filter.
42pub type Filter<F> = (&'static str, Box<[Bind]>, F);
43
44/// Named filters available by default in jaq
45/// which are implemented as native filters, such as `length`, `keys`, ...,
46/// but also `now`, `debug`, `fromdateiso8601`, ...
47///
48/// This is the combination of [`base_funs`] and [`extra_funs`].
49/// It does not include filters implemented by definition, such as `map`.
50#[cfg(all(
51    feature = "std",
52    feature = "format",
53    feature = "log",
54    feature = "math",
55    feature = "regex",
56    feature = "time",
57))]
58pub fn funs<D: DataT>() -> impl Iterator<Item = Filter<Native<D>>>
59where
60    for<'a> D::V<'a>: ValT,
61{
62    base_funs().chain(extra_funs())
63}
64
65/// Minimal set of filters that are generic over the value type.
66/// Return the minimal set of named filters available in jaq
67/// which are implemented as native filters, such as `length`, `keys`, ...,
68/// but not `now`, `debug`, `fromdateiso8601`, ...
69///
70/// Does not return filters from the standard library, such as `map`.
71pub fn base_funs<D: DataT>() -> impl Iterator<Item = Filter<Native<D>>>
72where
73    for<'a> D::V<'a>: ValT,
74{
75    let base_run = base_run().into_vec().into_iter().map(run);
76    let base_paths = base_paths().into_vec().into_iter().map(paths);
77    base_run.chain(base_paths)
78}
79
80/// Supplementary set of filters that are generic over the value type.
81#[cfg(all(
82    feature = "std",
83    feature = "format",
84    feature = "log",
85    feature = "math",
86    feature = "regex",
87    feature = "time",
88))]
89pub fn extra_funs<D: DataT>() -> impl Iterator<Item = Filter<Native<D>>>
90where
91    for<'a> D::V<'a>: ValT,
92{
93    [std(), format(), math(), regex(), time(), log()]
94        .into_iter()
95        .flat_map(|fs| fs.into_vec().into_iter().map(run))
96}
97
98/// Values that the standard library can operate on.
99pub trait ValT: jaq_core::ValT + Ord + From<f64> + From<usize> {
100    /// Convert an array into a sequence.
101    ///
102    /// This returns the original value as `Err` if it is not an array.
103    fn into_seq<S: FromIterator<Self>>(self) -> Result<S, Self>;
104
105    /// True if the value is integer.
106    fn is_int(&self) -> bool;
107
108    /// Use the value as machine-sized integer.
109    ///
110    /// If this function returns `Some(_)`, then [`Self::is_int`] must return true.
111    /// However, the other direction must not necessarily be the case, because
112    /// there may be integer values that are not representable by `isize`.
113    fn as_isize(&self) -> Option<isize>;
114
115    /// Use the value as floating-point number.
116    ///
117    /// This succeeds for all numeric values,
118    /// rounding too large/small ones to +/- Infinity.
119    fn as_f64(&self) -> Option<f64>;
120
121    /// True if the value is interpreted as UTF-8 string.
122    fn is_utf8_str(&self) -> bool;
123
124    /// If the value is a string (whatever its interpretation), return its bytes.
125    fn as_bytes(&self) -> Option<&[u8]>;
126
127    /// If the value is interpreted as UTF-8 string, return its bytes.
128    fn as_utf8_bytes(&self) -> Option<&[u8]> {
129        self.is_utf8_str().then(|| self.as_bytes()).flatten()
130    }
131
132    /// If the value is a string (whatever its interpretation), return its bytes, else fail.
133    fn try_as_bytes(&self) -> Result<&[u8], Error<Self>> {
134        self.as_bytes().ok_or_else(|| self.fail_str())
135    }
136
137    /// If the value is interpreted as UTF-8 string, return its bytes, else fail.
138    fn try_as_utf8_bytes(&self) -> Result<&[u8], Error<Self>> {
139        self.as_utf8_bytes().ok_or_else(|| self.fail_str())
140    }
141
142    /// If the value is a string and `sub` points to a slice of the string,
143    /// shorten the string to `sub`, else panic.
144    fn as_sub_str(&self, sub: &[u8]) -> Self;
145
146    /// Interpret bytes as UTF-8 string value.
147    fn from_utf8_bytes(b: impl AsRef<[u8]> + Send + 'static) -> Self;
148}
149
150/// Convenience trait for implementing the core functions.
151trait ValTx: ValT + Sized {
152    fn into_vec(self) -> Result<Vec<Self>, Error<Self>> {
153        self.into_seq().map_err(|v| Error::typ(v, "array"))
154    }
155
156    fn try_as_isize(&self) -> Result<isize, Error<Self>> {
157        self.as_isize()
158            .ok_or_else(|| Error::typ(self.clone(), "integer"))
159    }
160
161    #[cfg(feature = "math")]
162    /// Use as an i32 to be given as an argument to a libm function.
163    fn try_as_i32(&self) -> Result<i32, Error<Self>> {
164        self.try_as_isize()?.try_into().map_err(Error::str)
165    }
166
167    fn try_as_f64(&self) -> Result<f64, Error<Self>> {
168        self.as_f64()
169            .ok_or_else(|| Error::typ(self.clone(), "number"))
170    }
171
172    /// Apply a function to an array.
173    fn mutate_arr(self, f: impl FnOnce(&mut Vec<Self>)) -> ValR<Self> {
174        let mut a = self.into_vec()?;
175        f(&mut a);
176        Ok(Self::from_iter(a))
177    }
178
179    /// Apply a function to an array.
180    fn try_mutate_arr<F>(self, f: F) -> ValX<Self>
181    where
182        F: FnOnce(&mut Vec<Self>) -> Result<(), Exn<Self>>,
183    {
184        let mut a = self.into_vec()?;
185        f(&mut a)?;
186        Ok(Self::from_iter(a))
187    }
188
189    fn round(self, f: impl FnOnce(f64) -> f64) -> ValR<Self> {
190        Ok(if self.is_int() {
191            self
192        } else {
193            let f = f(self.try_as_f64()?);
194            if f.is_finite() {
195                if isize::MIN as f64 <= f && f <= isize::MAX as f64 {
196                    Self::from(f as isize)
197                } else {
198                    // print floating-point number without decimal places,
199                    // i.e. like an integer
200                    Self::from_num(&alloc::format!("{f:.0}"))?
201                }
202            } else {
203                Self::from(f)
204            }
205        })
206    }
207
208    /// If the value is interpreted as UTF-8 string,
209    /// return its `str` representation.
210    fn try_as_str(&self) -> Result<&str, Error<Self>> {
211        self.try_as_utf8_bytes()
212            .and_then(|s| core::str::from_utf8(s).map_err(Error::str))
213    }
214
215    fn map_utf8_str<B>(self, f: impl FnOnce(&[u8]) -> B) -> ValR<Self>
216    where
217        B: AsRef<[u8]> + Send + 'static,
218    {
219        Ok(Self::from_utf8_bytes(f(self.try_as_utf8_bytes()?)))
220    }
221
222    fn trim_utf8_with(&self, f: impl FnOnce(&[u8]) -> &[u8]) -> ValR<Self> {
223        Ok(self.as_sub_str(f(self.try_as_utf8_bytes()?)))
224    }
225
226    /// Helper function to strip away the prefix or suffix of a string.
227    fn strip_fix<F>(self, fix: &Self, f: F) -> Result<Self, Error<Self>>
228    where
229        F: for<'a> FnOnce(&'a [u8], &[u8]) -> Option<&'a [u8]>,
230    {
231        Ok(match f(self.try_as_bytes()?, fix.try_as_bytes()?) {
232            Some(sub) => self.as_sub_str(sub),
233            None => self,
234        })
235    }
236
237    fn fail_str(&self) -> Error<Self> {
238        Error::typ(self.clone(), "string")
239    }
240}
241impl<T: ValT> ValTx for T {}
242
243/// Convert a filter with a run pointer to a native filter.
244pub fn run<D: DataT>((name, arity, run): Filter<RunPtr<D>>) -> Filter<Native<D>> {
245    (name, arity, Native::new(run))
246}
247
248type RunPathsPtr<D> = (RunPtr<D>, jaq_core::PathsPtr<D>);
249
250/// Convert a filter with a run and an update pointer to a native filter.
251fn paths<D: DataT>((name, arity, (run, paths)): Filter<RunPathsPtr<D>>) -> Filter<Native<D>> {
252    (name, arity, Native::new(run).with_paths(paths))
253}
254
255/// Sort array by the given function.
256fn sort_by<'a, V: ValT>(xs: &mut [V], f: impl Fn(V) -> ValXs<'a, V>) -> Result<(), Exn<V>> {
257    // Some(e) iff an error has previously occurred
258    let mut err = None;
259    xs.sort_by_cached_key(|x| {
260        if err.is_some() {
261            return Vec::new();
262        };
263        match f(x.clone()).collect() {
264            Ok(y) => y,
265            Err(e) => {
266                err = Some(e);
267                Vec::new()
268            }
269        }
270    });
271    err.map_or(Ok(()), Err)
272}
273
274/// Group an array by the given function.
275fn group_by<'a, V: ValT>(xs: Vec<V>, f: impl Fn(V) -> ValXs<'a, V>) -> ValX<V> {
276    let mut yx: Vec<(Vec<V>, V)> = xs
277        .into_iter()
278        .map(|x| Ok((f(x.clone()).collect::<Result<_, _>>()?, x)))
279        .collect::<Result<_, Exn<_>>>()?;
280
281    yx.sort_by(|(y1, _), (y2, _)| y1.cmp(y2));
282
283    let mut grouped = Vec::new();
284    let mut yx = yx.into_iter();
285    if let Some((mut group_y, first_x)) = yx.next() {
286        let mut group = Vec::from([first_x]);
287        for (y, x) in yx {
288            if group_y != y {
289                grouped.push(V::from_iter(core::mem::take(&mut group)));
290                group_y = y;
291            }
292            group.push(x);
293        }
294        if !group.is_empty() {
295            grouped.push(V::from_iter(group));
296        }
297    }
298
299    Ok(V::from_iter(grouped))
300}
301
302/// Get the minimum or maximum element from an array according to the given function.
303fn cmp_by<'a, V: Clone, F, R>(xs: Vec<V>, f: F, replace: R) -> Result<Option<V>, Exn<V>>
304where
305    F: Fn(V) -> ValXs<'a, V>,
306    R: Fn(&[V], &[V]) -> bool,
307{
308    let iter = xs.into_iter();
309    let mut iter = iter.map(|x| (x.clone(), f(x).collect::<Result<Vec<_>, _>>()));
310    let (mut mx, mut my) = if let Some((x, y)) = iter.next() {
311        (x, y?)
312    } else {
313        return Ok(None);
314    };
315    for (x, y) in iter {
316        let y = y?;
317        if replace(&my, &y) {
318            (mx, my) = (x, y);
319        }
320    }
321    Ok(Some(mx))
322}
323
324/// Convert a string into an array of its Unicode codepoints (with negative integers representing UTF-8 errors).
325fn explode<V: ValT>(s: &[u8]) -> impl Iterator<Item = ValR<V>> + '_ {
326    let invalid = [].iter();
327    Explode { s, invalid }.map(|r| match r {
328        Err(b) => Ok((-(b as isize)).into()),
329        // conversion from u32 to isize may fail on 32-bit systems for high values of c
330        Ok(c) => Ok(isize::try_from(c as u32).map_err(Error::str)?.into()),
331    })
332}
333
334struct Explode<'a> {
335    s: &'a [u8],
336    invalid: core::slice::Iter<'a, u8>,
337}
338impl Iterator for Explode<'_> {
339    type Item = Result<char, u8>;
340    fn next(&mut self) -> Option<Self::Item> {
341        self.invalid.next().map(|next| Err(*next)).or_else(|| {
342            let (c, size) = bstr::decode_utf8(self.s);
343            let (consumed, rest) = self.s.split_at(size);
344            self.s = rest;
345            c.map(Ok).or_else(|| {
346                // invalid UTF-8 sequence, emit all invalid bytes
347                self.invalid = consumed.iter();
348                self.invalid.next().map(|next| Err(*next))
349            })
350        })
351    }
352    fn size_hint(&self) -> (usize, Option<usize>) {
353        let max = self.s.len();
354        let min = self.s.len() / 4;
355        let inv = self.invalid.as_slice().len();
356        (min + inv, Some(max + inv))
357    }
358}
359
360/// Convert an array of Unicode codepoints (with negative integers representing UTF-8 errors) into a string.
361fn implode<V: ValT>(xs: &[V]) -> Result<Vec<u8>, Error<V>> {
362    let mut v = Vec::with_capacity(xs.len());
363    for x in xs {
364        // on 32-bit systems, some high u32 values cannot be represented as isize
365        let i = x.try_as_isize()?;
366        if let Ok(b) = u8::try_from(-i) {
367            v.push(b)
368        } else {
369            // may fail e.g. on `[1114112] | implode`
370            let c = u32::try_from(i).ok().and_then(char::from_u32);
371            let c = c.ok_or_else(|| Error::str(format_args!("cannot use {i} as character")))?;
372            v.extend(c.encode_utf8(&mut [0; 4]).as_bytes())
373        }
374    }
375    Ok(v)
376}
377
378/// This implements a ~10x faster version of:
379/// ~~~ text
380/// def range($from; $to; $by): $from |
381///    if $by > 0 then while(.  < $to; . + $by)
382///  elif $by < 0 then while(.  > $to; . + $by)
383///    else            while(. != $to; . + $by)
384///    end;
385/// ~~~
386fn range<V: ValT>(mut from: ValX<V>, to: V, by: V) -> impl Iterator<Item = ValX<V>> {
387    use core::cmp::Ordering::{Equal, Greater, Less};
388    let cmp = by.partial_cmp(&V::from(0usize)).unwrap_or(Equal);
389    core::iter::from_fn(move || match from.clone() {
390        Ok(x) => match cmp {
391            Greater => x < to,
392            Less => x > to,
393            Equal => x != to,
394        }
395        .then(|| core::mem::replace(&mut from, (x + by.clone()).map_err(Exn::from))),
396        e @ Err(_) => {
397            // return None after the error
398            from = Ok(to.clone());
399            Some(e)
400        }
401    })
402}
403
404fn once_or_empty<'a, T: 'a, E: 'a>(r: Result<Option<T>, E>) -> BoxIter<'a, Result<T, E>> {
405    Box::new(r.transpose().into_iter())
406}
407
408/// Box Once and Map Errors to exceptions.
409pub fn bome<'a, V: 'a>(r: ValR<V>) -> ValXs<'a, V> {
410    box_once(r.map_err(Exn::from))
411}
412
413/// Create a filter that takes a single variable argument and whose output is given by
414/// the function `f` that takes the input value and the value of the variable.
415pub fn unary<'a, D: DataT>(
416    mut cv: Cv<'a, D>,
417    f: impl Fn(D::V<'a>, D::V<'a>) -> ValR<D::V<'a>> + 'a,
418) -> ValXs<'a, D::V<'a>> {
419    bome(f(cv.1, cv.0.pop_var()))
420}
421
422/// Creates `n` variable arguments.
423pub fn v(n: usize) -> Box<[Bind]> {
424    core::iter::repeat(Bind::Var(())).take(n).collect()
425}
426
427#[allow(clippy::unit_arg)]
428fn base_run<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
429where
430    for<'a> D::V<'a>: ValT,
431{
432    let f = || [Bind::Fun(())].into();
433    Box::new([
434        ("error_empty", v(0), (|cv| bome(Err(Error::new(cv.1))))),
435        ("path", f(), |mut cv| {
436            let (f, fc) = cv.0.pop_fun();
437            let cvp = (fc, (cv.1, Default::default()));
438            Box::new(f.paths(cvp).map(|vp| {
439                let path: Vec<_> = vp?.1.iter().cloned().collect();
440                Ok(path.into_iter().rev().collect())
441            }))
442        }),
443        ("path_value", f(), |mut cv| {
444            let (f, fc) = cv.0.pop_fun();
445            let cvp = (fc, (cv.1, Default::default()));
446            Box::new(f.paths(cvp).map(|vp| {
447                let (v, path) = vp?;
448                let path: Vec<_> = path.iter().cloned().collect();
449                Ok([path.into_iter().rev().collect(), v].into_iter().collect())
450            }))
451        }),
452        ("floor", v(0), |cv| bome(cv.1.round(f64::floor))),
453        ("round", v(0), |cv| bome(cv.1.round(f64::round))),
454        ("ceil", v(0), |cv| bome(cv.1.round(f64::ceil))),
455        ("utf8bytelength", v(0), |cv| {
456            bome(cv.1.try_as_utf8_bytes().map(|s| (s.len() as isize).into()))
457        }),
458        ("explode", v(0), |cv| {
459            bome(cv.1.try_as_utf8_bytes().and_then(|s| explode(s).collect()))
460        }),
461        ("implode", v(0), |cv| {
462            let implode = |s: Vec<_>| implode(&s);
463            bome(cv.1.into_vec().and_then(implode).map(D::V::from_utf8_bytes))
464        }),
465        ("ascii_downcase", v(0), |cv| {
466            bome(cv.1.map_utf8_str(ByteSlice::to_ascii_lowercase))
467        }),
468        ("ascii_upcase", v(0), |cv| {
469            bome(cv.1.map_utf8_str(ByteSlice::to_ascii_uppercase))
470        }),
471        ("reverse", v(0), |cv| bome(cv.1.mutate_arr(|a| a.reverse()))),
472        ("keys_unsorted", v(0), |cv| {
473            bome(cv.1.key_values().map(|kv| kv.map(|(k, _v)| k)).collect())
474        }),
475        ("sort", v(0), |cv| bome(cv.1.mutate_arr(|a| a.sort()))),
476        ("sort_by", f(), |mut cv| {
477            let (f, fc) = cv.0.pop_fun();
478            let f = move |v| f.run((fc.clone(), v));
479            box_once(cv.1.try_mutate_arr(|a| sort_by(a, f)))
480        }),
481        ("group_by", f(), |mut cv| {
482            let (f, fc) = cv.0.pop_fun();
483            let f = move |v| f.run((fc.clone(), v));
484            box_once((|| group_by(cv.1.into_vec()?, f))())
485        }),
486        ("min_by_or_empty", f(), |mut cv| {
487            let (f, fc) = cv.0.pop_fun();
488            let f = move |a| cmp_by(a, |v| f.run((fc.clone(), v)), |my, y| y < my);
489            once_or_empty(cv.1.into_vec().map_err(Exn::from).and_then(f))
490        }),
491        ("max_by_or_empty", f(), |mut cv| {
492            let (f, fc) = cv.0.pop_fun();
493            let f = move |a| cmp_by(a, |v| f.run((fc.clone(), v)), |my, y| y >= my);
494            once_or_empty(cv.1.into_vec().map_err(Exn::from).and_then(f))
495        }),
496        ("range", v(3), |mut cv| {
497            let by = cv.0.pop_var();
498            let to = cv.0.pop_var();
499            let from = cv.0.pop_var();
500            Box::new(range(Ok(from), to, by))
501        }),
502        ("startswith", v(1), |cv| {
503            unary(cv, |v, s| {
504                Ok(v.try_as_bytes()?.starts_with(s.try_as_bytes()?).into())
505            })
506        }),
507        ("endswith", v(1), |cv| {
508            unary(cv, |v, s| {
509                Ok(v.try_as_bytes()?.ends_with(s.try_as_bytes()?).into())
510            })
511        }),
512        ("ltrimstr", v(1), |cv| {
513            unary(cv, |v, pre| v.strip_fix(&pre, <[u8]>::strip_prefix))
514        }),
515        ("rtrimstr", v(1), |cv| {
516            unary(cv, |v, suf| v.strip_fix(&suf, <[u8]>::strip_suffix))
517        }),
518        ("trim", v(0), |cv| {
519            bome(cv.1.trim_utf8_with(ByteSlice::trim))
520        }),
521        ("ltrim", v(0), |cv| {
522            bome(cv.1.trim_utf8_with(ByteSlice::trim_start))
523        }),
524        ("rtrim", v(0), |cv| {
525            bome(cv.1.trim_utf8_with(ByteSlice::trim_end))
526        }),
527        ("escape_csv", v(0), |cv| {
528            bome(
529                cv.1.try_as_utf8_bytes()
530                    .map(|s| ValT::from_utf8_bytes(s.replace(b"\"", b"\"\""))),
531            )
532        }),
533        ("escape_sh", v(0), |cv| {
534            bome(
535                cv.1.try_as_utf8_bytes()
536                    .map(|s| ValT::from_utf8_bytes(s.replace(b"'", b"'\\''"))),
537            )
538        }),
539    ])
540}
541
542macro_rules! first {
543    ( $run:ident ) => {
544        |mut cv| {
545            let (f, fc) = cv.0.pop_fun();
546            Box::new(f.$run((fc, cv.1)).next().into_iter())
547        }
548    };
549}
550macro_rules! last {
551    ( $run:ident ) => {
552        |mut cv| {
553            let (f, fc) = cv.0.pop_fun();
554            once_or_empty(f.$run((fc, cv.1)).try_fold(None, |_, x| x.map(Some)))
555        }
556    };
557}
558macro_rules! limit {
559    ( $run:ident ) => {
560        |mut cv| {
561            let (f, fc) = cv.0.pop_fun();
562            let n = cv.0.pop_var();
563            let pos = |n: isize| n.try_into().unwrap_or(0usize);
564            then(n.try_as_isize().map_err(Exn::from), |n| match pos(n) {
565                0 => Box::new(core::iter::empty()),
566                n => Box::new(f.$run((fc, cv.1)).take(n)),
567            })
568        }
569    };
570}
571macro_rules! skip {
572    ( $run:ident ) => {
573        |mut cv| {
574            let (f, fc) = cv.0.pop_fun();
575            let n = cv.0.pop_var();
576            let pos = |n: isize| n.try_into().unwrap_or(0usize);
577            then(n.try_as_isize().map_err(Exn::from).map(pos), |n| {
578                let fm = move |(i, y): (usize, Result<_, _>)| (i >= n || y.is_err()).then_some(y);
579                Box::new(f.$run((fc, cv.1)).enumerate().filter_map(fm))
580            })
581        }
582    };
583}
584
585fn base_paths<D: DataT>() -> Box<[Filter<RunPathsPtr<D>>]>
586where
587    for<'a> D::V<'a>: ValT,
588{
589    let f = || [Bind::Fun(())].into();
590    let vf = || [Bind::Var(()), Bind::Fun(())].into();
591    Box::new([
592        ("first", f(), (first!(run), first!(paths))),
593        ("last", f(), (last!(run), last!(paths))),
594        ("limit", vf(), (limit!(run), limit!(paths))),
595        ("skip", vf(), (skip!(run), skip!(paths))),
596    ])
597}
598
599#[cfg(feature = "std")]
600fn now<V: From<String>>() -> Result<f64, Error<V>> {
601    use std::time::{SystemTime, UNIX_EPOCH};
602    SystemTime::now()
603        .duration_since(UNIX_EPOCH)
604        .map(|x| x.as_secs_f64())
605        .map_err(Error::str)
606}
607
608#[cfg(feature = "std")]
609fn std<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
610where
611    for<'a> D::V<'a>: ValT,
612{
613    use std::env::vars;
614    Box::new([
615        ("env", v(0), |_| {
616            bome(D::V::from_map(
617                vars().map(|(k, v)| (D::V::from(k), D::V::from(v))),
618            ))
619        }),
620        ("now", v(0), |_| bome(now().map(D::V::from))),
621        ("halt", v(1), |mut cv| {
622            let exit_code = cv.0.pop_var().try_as_isize();
623            bome(exit_code.map(|exit_code| std::process::exit(exit_code as i32)))
624        }),
625    ])
626}
627
628#[cfg(feature = "format")]
629fn replace(s: &[u8], patterns: &[&str], replacements: &[&str]) -> Vec<u8> {
630    let ac = aho_corasick::AhoCorasick::new(patterns).unwrap();
631    ac.replace_all_bytes(s, replacements)
632}
633
634#[cfg(feature = "format")]
635fn format<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
636where
637    for<'a> D::V<'a>: ValT,
638{
639    const HTML_PATS: [&str; 5] = ["<", ">", "&", "\'", "\""];
640    const HTML_REPS: [&str; 5] = ["&lt;", "&gt;", "&amp;", "&apos;", "&quot;"];
641    Box::new([
642        ("escape_html", v(0), |cv| {
643            bome(cv.1.map_utf8_str(|s| replace(s, &HTML_PATS, &HTML_REPS)))
644        }),
645        ("unescape_html", v(0), |cv| {
646            bome(cv.1.map_utf8_str(|s| replace(s, &HTML_REPS, &HTML_PATS)))
647        }),
648        ("escape_tsv", v(0), |cv| {
649            let pats = ["\n", "\r", "\t", "\\", "\0"];
650            let reps = ["\\n", "\\r", "\\t", "\\\\", "\\0"];
651            bome(cv.1.map_utf8_str(|s| replace(s, &pats, &reps)))
652        }),
653        ("encode_uri", v(0), |cv| {
654            bome(cv.1.map_utf8_str(|s| urlencoding::encode_binary(s).to_string()))
655        }),
656        ("decode_uri", v(0), |cv| {
657            bome(cv.1.map_utf8_str(|s| urlencoding::decode_binary(s).to_vec()))
658        }),
659        ("encode_base64", v(0), |cv| {
660            use base64::{engine::general_purpose::STANDARD, Engine};
661            bome(cv.1.map_utf8_str(|s| STANDARD.encode(s)))
662        }),
663        ("decode_base64", v(0), |cv| {
664            use base64::{engine::general_purpose::STANDARD, Engine};
665            bome(cv.1.try_as_utf8_bytes().and_then(|s| {
666                STANDARD
667                    .decode(s)
668                    .map_err(Error::str)
669                    .map(ValT::from_utf8_bytes)
670            }))
671        }),
672    ])
673}
674
675#[cfg(feature = "math")]
676fn math<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
677where
678    for<'a> D::V<'a>: ValT,
679{
680    let rename = |name, (_name, arity, f): Filter<RunPtr<D>>| (name, arity, f);
681    Box::new([
682        math::f_f!(acos),
683        math::f_f!(acosh),
684        math::f_f!(asin),
685        math::f_f!(asinh),
686        math::f_f!(atan),
687        math::f_f!(atanh),
688        math::f_f!(cbrt),
689        math::f_f!(cos),
690        math::f_f!(cosh),
691        math::f_f!(erf),
692        math::f_f!(erfc),
693        math::f_f!(exp),
694        math::f_f!(exp10),
695        math::f_f!(exp2),
696        math::f_f!(expm1),
697        math::f_f!(fabs),
698        math::f_fi!(frexp),
699        math::f_i!(ilogb),
700        math::f_f!(j0),
701        math::f_f!(j1),
702        math::f_f!(lgamma),
703        math::f_f!(log),
704        math::f_f!(log10),
705        math::f_f!(log1p),
706        math::f_f!(log2),
707        // logb is implemented in jaq-std
708        math::f_ff!(modf),
709        rename("nearbyint", math::f_f!(round)),
710        // pow10 is implemented in jaq-std
711        math::f_f!(rint),
712        // significand is implemented in jaq-std
713        math::f_f!(sin),
714        math::f_f!(sinh),
715        math::f_f!(sqrt),
716        math::f_f!(tan),
717        math::f_f!(tanh),
718        math::f_f!(tgamma),
719        math::f_f!(trunc),
720        math::f_f!(y0),
721        math::f_f!(y1),
722        math::ff_f!(atan2),
723        math::ff_f!(copysign),
724        // drem is implemented in jaq-std
725        math::ff_f!(fdim),
726        math::ff_f!(fmax),
727        math::ff_f!(fmin),
728        math::ff_f!(fmod),
729        math::ff_f!(hypot),
730        math::if_f!(jn),
731        math::fi_f!(ldexp),
732        math::ff_f!(nextafter),
733        // nexttoward is implemented in jaq-std
734        math::ff_f!(pow),
735        math::ff_f!(remainder),
736        // scalb is implemented in jaq-std
737        rename("scalbln", math::fi_f!(scalbn)),
738        math::if_f!(yn),
739        math::fff_f!(fma),
740    ])
741}
742
743#[cfg(feature = "regex")]
744fn re<'a, D: DataT>(s: bool, m: bool, mut cv: Cv<'a, D>) -> ValR<D::V<'a>>
745where
746    D::V<'a>: ValT,
747{
748    let flags = cv.0.pop_var();
749    let re = cv.0.pop_var();
750
751    use crate::regex::Part::{Matches, Mismatch};
752    let fail_flag = |e| Error::str(format_args!("invalid regex flag: {e}"));
753    let fail_re = |e| Error::str(format_args!("invalid regex: {e}"));
754
755    let flags = regex::Flags::new(flags.try_as_str()?).map_err(fail_flag)?;
756    let re = flags.regex(re.try_as_str()?).map_err(fail_re)?;
757    let out = regex::regex(cv.1.try_as_utf8_bytes()?, &re, flags, (s, m));
758    let sub = |s| cv.1.as_sub_str(s);
759    let out = out.into_iter().map(|out| match out {
760        Matches(ms) => ms
761            .into_iter()
762            .map(|m| D::V::from_map(m.fields(sub)))
763            .collect(),
764        Mismatch(s) => Ok(sub(s)),
765    });
766    out.collect()
767}
768
769#[cfg(feature = "regex")]
770fn regex<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
771where
772    for<'a> D::V<'a>: ValT,
773{
774    let vv = || [Bind::Var(()), Bind::Var(())].into();
775    Box::new([
776        ("matches", vv(), |cv| bome(re(false, true, cv))),
777        ("split_matches", vv(), |cv| bome(re(true, true, cv))),
778        ("split_", vv(), |cv| bome(re(true, false, cv))),
779    ])
780}
781
782#[cfg(feature = "time")]
783fn time<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
784where
785    for<'a> D::V<'a>: ValT,
786{
787    use jiff::tz::TimeZone;
788    Box::new([
789        ("fromdateiso8601", v(0), |cv| {
790            bome(cv.1.try_as_str().and_then(time::from_iso8601))
791        }),
792        ("todateiso8601", v(0), |cv| {
793            bome(time::to_iso8601(&cv.1).map(D::V::from))
794        }),
795        ("strftime", v(1), |cv| {
796            unary(cv, |v, fmt| {
797                time::strftime(&v, fmt.try_as_str()?, TimeZone::UTC)
798            })
799        }),
800        ("strflocaltime", v(1), |cv| {
801            unary(cv, |v, fmt| {
802                time::strftime(&v, fmt.try_as_str()?, TimeZone::system())
803            })
804        }),
805        ("gmtime", v(0), |cv| {
806            bome(time::gmtime(&cv.1, TimeZone::UTC))
807        }),
808        ("localtime", v(0), |cv| {
809            bome(time::gmtime(&cv.1, TimeZone::system()))
810        }),
811        ("strptime", v(1), |cv| {
812            unary(cv, |v, fmt| {
813                time::strptime(v.try_as_str()?, fmt.try_as_str()?)
814            })
815        }),
816        ("mktime", v(0), |cv| bome(time::mktime(&cv.1))),
817    ])
818}
819
820#[cfg(feature = "log")]
821fn log<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
822where
823    for<'a> D::V<'a>: ValT,
824{
825    fn eprint_raw<V: ValT>(v: &V) {
826        if let Some(s) = v.as_utf8_bytes() {
827            log::error!("{}", BStr::new(s))
828        } else {
829            log::error!("{v}")
830        }
831    }
832    /// Construct a filter that applies an effect function before returning nothing.
833    macro_rules! empty_with {
834        ( $eff:expr ) => {
835            |cv| {
836                $eff(&cv.1);
837                Box::new(core::iter::empty())
838            }
839        };
840    }
841    Box::new([
842        ("debug_empty", v(0), empty_with!(|x| log::debug!("{x}"))),
843        ("stderr_empty", v(0), empty_with!(eprint_raw)),
844    ])
845}