1#![no_std]
13#![forbid(unsafe_code)]
14#![warn(missing_docs)]
15
16extern crate alloc;
17#[cfg(feature = "std")]
18extern crate std;
19
20pub mod input;
21#[cfg(feature = "math")]
22mod math;
23#[cfg(feature = "regex")]
24mod regex;
25#[cfg(feature = "time")]
26mod time;
27
28use alloc::string::{String, ToString};
29use alloc::{boxed::Box, vec::Vec};
30use bstr::{BStr, ByteSlice};
31use jaq_core::box_iter::{box_once, BoxIter};
32use jaq_core::native::{bome, run, unary, v, Filter, Fun};
33use jaq_core::{load, Bind, Cv, DataT, Error, Exn, RunPtr, ValR, ValT as _, ValX, ValXs};
34
35pub fn defs() -> impl Iterator<Item = load::parse::Def<&'static str>> {
37 load::parse(include_str!("defs.jq"), |p| p.defs())
38 .unwrap()
39 .into_iter()
40}
41
42#[cfg(all(
49 feature = "std",
50 feature = "format",
51 feature = "log",
52 feature = "math",
53 feature = "regex",
54 feature = "time",
55))]
56pub fn funs<D: DataT>() -> impl Iterator<Item = Fun<D>>
57where
58 for<'a> D::V<'a>: ValT,
59{
60 base_funs().chain(extra_funs())
61}
62
63pub fn base_funs<D: DataT>() -> impl Iterator<Item = Fun<D>>
70where
71 for<'a> D::V<'a>: ValT,
72{
73 base_run().into_vec().into_iter().map(run)
74}
75
76#[cfg(all(
78 feature = "std",
79 feature = "format",
80 feature = "log",
81 feature = "math",
82 feature = "regex",
83 feature = "time",
84))]
85pub fn extra_funs<D: DataT>() -> impl Iterator<Item = Fun<D>>
86where
87 for<'a> D::V<'a>: ValT,
88{
89 [std(), format(), math(), regex(), time(), log()]
90 .into_iter()
91 .flat_map(|fs| fs.into_vec().into_iter().map(run))
92}
93
94pub trait ValT: jaq_core::ValT + Ord + From<f64> + From<usize> {
96 fn into_seq<S: FromIterator<Self>>(self) -> Result<S, Self>;
100
101 fn is_int(&self) -> bool;
103
104 fn as_isize(&self) -> Option<isize>;
110
111 fn as_f64(&self) -> Option<f64>;
116
117 fn is_utf8_str(&self) -> bool;
119
120 fn as_bytes(&self) -> Option<&[u8]>;
122
123 fn as_utf8_bytes(&self) -> Option<&[u8]> {
125 self.is_utf8_str().then(|| self.as_bytes()).flatten()
126 }
127
128 fn try_as_bytes(&self) -> Result<&[u8], Error<Self>> {
130 self.as_bytes().ok_or_else(|| self.fail_str())
131 }
132
133 fn try_as_utf8_bytes(&self) -> Result<&[u8], Error<Self>> {
135 self.as_utf8_bytes().ok_or_else(|| self.fail_str())
136 }
137
138 fn as_sub_str(&self, sub: &[u8]) -> Self;
141
142 fn from_utf8_bytes(b: impl AsRef<[u8]> + Send + 'static) -> Self;
144}
145
146trait ValTx: ValT + Sized {
148 fn into_vec(self) -> Result<Vec<Self>, Error<Self>> {
149 self.into_seq().map_err(|v| Error::typ(v, "array"))
150 }
151
152 fn try_as_isize(&self) -> Result<isize, Error<Self>> {
153 self.as_isize()
154 .ok_or_else(|| Error::typ(self.clone(), "integer"))
155 }
156
157 #[cfg(feature = "math")]
158 fn try_as_i32(&self) -> Result<i32, Error<Self>> {
160 self.try_as_isize()?.try_into().map_err(Error::str)
161 }
162
163 fn try_as_f64(&self) -> Result<f64, Error<Self>> {
164 self.as_f64()
165 .ok_or_else(|| Error::typ(self.clone(), "number"))
166 }
167
168 fn mutate_arr(self, f: impl FnOnce(&mut Vec<Self>)) -> ValR<Self> {
170 let mut a = self.into_vec()?;
171 f(&mut a);
172 Ok(Self::from_iter(a))
173 }
174
175 fn try_mutate_arr<F>(self, f: F) -> ValX<Self>
177 where
178 F: FnOnce(&mut Vec<Self>) -> Result<(), Exn<Self>>,
179 {
180 let mut a = self.into_vec()?;
181 f(&mut a)?;
182 Ok(Self::from_iter(a))
183 }
184
185 fn round(self, f: impl FnOnce(f64) -> f64) -> ValR<Self> {
186 Ok(if self.is_int() {
187 self
188 } else {
189 let f = f(self.try_as_f64()?);
190 if f.is_finite() {
191 if isize::MIN as f64 <= f && f <= isize::MAX as f64 {
192 Self::from(f as isize)
193 } else {
194 Self::from_num(&alloc::format!("{f:.0}"))?
197 }
198 } else {
199 Self::from(f)
200 }
201 })
202 }
203
204 fn try_as_str(&self) -> Result<&str, Error<Self>> {
207 self.try_as_utf8_bytes()
208 .and_then(|s| core::str::from_utf8(s).map_err(Error::str))
209 }
210
211 fn map_utf8_str<B>(self, f: impl FnOnce(&[u8]) -> B) -> ValR<Self>
212 where
213 B: AsRef<[u8]> + Send + 'static,
214 {
215 Ok(Self::from_utf8_bytes(f(self.try_as_utf8_bytes()?)))
216 }
217
218 fn trim_utf8_with(&self, f: impl FnOnce(&[u8]) -> &[u8]) -> ValR<Self> {
219 Ok(self.as_sub_str(f(self.try_as_utf8_bytes()?)))
220 }
221
222 fn strip_fix<F>(self, fix: &Self, f: F) -> Result<Self, Error<Self>>
224 where
225 F: for<'a> FnOnce(&'a [u8], &[u8]) -> Option<&'a [u8]>,
226 {
227 Ok(match f(self.try_as_bytes()?, fix.try_as_bytes()?) {
228 Some(sub) => self.as_sub_str(sub),
229 None => self,
230 })
231 }
232
233 fn fail_str(&self) -> Error<Self> {
234 Error::typ(self.clone(), "string")
235 }
236}
237impl<T: ValT> ValTx for T {}
238
239fn sort_by<'a, V: ValT>(xs: &mut [V], f: impl Fn(V) -> ValXs<'a, V>) -> Result<(), Exn<V>> {
241 let mut err = None;
243 xs.sort_by_cached_key(|x| {
244 if err.is_some() {
245 return Vec::new();
246 };
247 match f(x.clone()).collect() {
248 Ok(y) => y,
249 Err(e) => {
250 err = Some(e);
251 Vec::new()
252 }
253 }
254 });
255 err.map_or(Ok(()), Err)
256}
257
258fn group_by<'a, V: ValT>(xs: Vec<V>, f: impl Fn(V) -> ValXs<'a, V>) -> ValX<V> {
260 let mut yx: Vec<(Vec<V>, V)> = xs
261 .into_iter()
262 .map(|x| Ok((f(x.clone()).collect::<Result<_, _>>()?, x)))
263 .collect::<Result<_, Exn<_>>>()?;
264
265 yx.sort_by(|(y1, _), (y2, _)| y1.cmp(y2));
266
267 let mut grouped = Vec::new();
268 let mut yx = yx.into_iter();
269 if let Some((mut group_y, first_x)) = yx.next() {
270 let mut group = Vec::from([first_x]);
271 for (y, x) in yx {
272 if group_y != y {
273 grouped.push(V::from_iter(core::mem::take(&mut group)));
274 group_y = y;
275 }
276 group.push(x);
277 }
278 if !group.is_empty() {
279 grouped.push(V::from_iter(group));
280 }
281 }
282
283 Ok(V::from_iter(grouped))
284}
285
286fn cmp_by<'a, V: Clone, F, R>(xs: Vec<V>, f: F, replace: R) -> Result<Option<V>, Exn<V>>
288where
289 F: Fn(V) -> ValXs<'a, V>,
290 R: Fn(&[V], &[V]) -> bool,
291{
292 let iter = xs.into_iter();
293 let mut iter = iter.map(|x| (x.clone(), f(x).collect::<Result<Vec<_>, _>>()));
294 let (mut mx, mut my) = if let Some((x, y)) = iter.next() {
295 (x, y?)
296 } else {
297 return Ok(None);
298 };
299 for (x, y) in iter {
300 let y = y?;
301 if replace(&my, &y) {
302 (mx, my) = (x, y);
303 }
304 }
305 Ok(Some(mx))
306}
307
308fn explode<V: ValT>(s: &[u8]) -> impl Iterator<Item = ValR<V>> + '_ {
310 let invalid = [].iter();
311 Explode { s, invalid }.map(|r| match r {
312 Err(b) => Ok((-(b as isize)).into()),
313 Ok(c) => Ok(isize::try_from(c as u32).map_err(Error::str)?.into()),
315 })
316}
317
318struct Explode<'a> {
319 s: &'a [u8],
320 invalid: core::slice::Iter<'a, u8>,
321}
322impl Iterator for Explode<'_> {
323 type Item = Result<char, u8>;
324 fn next(&mut self) -> Option<Self::Item> {
325 self.invalid.next().map(|next| Err(*next)).or_else(|| {
326 let (c, size) = bstr::decode_utf8(self.s);
327 let (consumed, rest) = self.s.split_at(size);
328 self.s = rest;
329 c.map(Ok).or_else(|| {
330 self.invalid = consumed.iter();
332 self.invalid.next().map(|next| Err(*next))
333 })
334 })
335 }
336 fn size_hint(&self) -> (usize, Option<usize>) {
337 let max = self.s.len();
338 let min = self.s.len() / 4;
339 let inv = self.invalid.as_slice().len();
340 (min + inv, Some(max + inv))
341 }
342}
343
344fn implode<V: ValT>(xs: &[V]) -> Result<Vec<u8>, Error<V>> {
346 let mut v = Vec::with_capacity(xs.len());
347 for x in xs {
348 let i = x.try_as_isize()?;
350 if let Ok(b) = u8::try_from(-i) {
351 v.push(b)
352 } else {
353 let c = u32::try_from(i).ok().and_then(char::from_u32);
355 let c = c.ok_or_else(|| Error::str(format_args!("cannot use {i} as character")))?;
356 v.extend(c.encode_utf8(&mut [0; 4]).as_bytes())
357 }
358 }
359 Ok(v)
360}
361
362fn once_or_empty<'a, T: 'a, E: 'a>(r: Result<Option<T>, E>) -> BoxIter<'a, Result<T, E>> {
363 Box::new(r.transpose().into_iter())
364}
365
366#[allow(clippy::unit_arg)]
367fn base_run<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
368where
369 for<'a> D::V<'a>: ValT,
370{
371 let f = || [Bind::Fun(())].into();
372 Box::new([
373 ("floor", v(0), |cv| bome(cv.1.round(f64::floor))),
374 ("round", v(0), |cv| bome(cv.1.round(f64::round))),
375 ("ceil", v(0), |cv| bome(cv.1.round(f64::ceil))),
376 ("utf8bytelength", v(0), |cv| {
377 bome(cv.1.try_as_utf8_bytes().map(|s| (s.len() as isize).into()))
378 }),
379 ("explode", v(0), |cv| {
380 bome(cv.1.try_as_utf8_bytes().and_then(|s| explode(s).collect()))
381 }),
382 ("implode", v(0), |cv| {
383 let implode = |s: Vec<_>| implode(&s);
384 bome(cv.1.into_vec().and_then(implode).map(D::V::from_utf8_bytes))
385 }),
386 ("ascii_downcase", v(0), |cv| {
387 bome(cv.1.map_utf8_str(ByteSlice::to_ascii_lowercase))
388 }),
389 ("ascii_upcase", v(0), |cv| {
390 bome(cv.1.map_utf8_str(ByteSlice::to_ascii_uppercase))
391 }),
392 ("reverse", v(0), |cv| bome(cv.1.mutate_arr(|a| a.reverse()))),
393 ("sort", v(0), |cv| bome(cv.1.mutate_arr(|a| a.sort()))),
394 ("sort_by", f(), |mut cv| {
395 let (f, fc) = cv.0.pop_fun();
396 let f = move |v| f.run((fc.clone(), v));
397 box_once(cv.1.try_mutate_arr(|a| sort_by(a, f)))
398 }),
399 ("group_by", f(), |mut cv| {
400 let (f, fc) = cv.0.pop_fun();
401 let f = move |v| f.run((fc.clone(), v));
402 box_once((|| group_by(cv.1.into_vec()?, f))())
403 }),
404 ("min_by_or_empty", f(), |mut cv| {
405 let (f, fc) = cv.0.pop_fun();
406 let f = move |a| cmp_by(a, |v| f.run((fc.clone(), v)), |my, y| y < my);
407 once_or_empty(cv.1.into_vec().map_err(Exn::from).and_then(f))
408 }),
409 ("max_by_or_empty", f(), |mut cv| {
410 let (f, fc) = cv.0.pop_fun();
411 let f = move |a| cmp_by(a, |v| f.run((fc.clone(), v)), |my, y| y >= my);
412 once_or_empty(cv.1.into_vec().map_err(Exn::from).and_then(f))
413 }),
414 ("startswith", v(1), |cv| {
415 unary(cv, |v, s| {
416 Ok(v.try_as_bytes()?.starts_with(s.try_as_bytes()?).into())
417 })
418 }),
419 ("endswith", v(1), |cv| {
420 unary(cv, |v, s| {
421 Ok(v.try_as_bytes()?.ends_with(s.try_as_bytes()?).into())
422 })
423 }),
424 ("ltrimstr", v(1), |cv| {
425 unary(cv, |v, pre| v.strip_fix(&pre, <[u8]>::strip_prefix))
426 }),
427 ("rtrimstr", v(1), |cv| {
428 unary(cv, |v, suf| v.strip_fix(&suf, <[u8]>::strip_suffix))
429 }),
430 ("trim", v(0), |cv| {
431 bome(cv.1.trim_utf8_with(ByteSlice::trim))
432 }),
433 ("ltrim", v(0), |cv| {
434 bome(cv.1.trim_utf8_with(ByteSlice::trim_start))
435 }),
436 ("rtrim", v(0), |cv| {
437 bome(cv.1.trim_utf8_with(ByteSlice::trim_end))
438 }),
439 ("escape_csv", v(0), |cv| {
440 bome(
441 cv.1.try_as_utf8_bytes()
442 .map(|s| ValT::from_utf8_bytes(s.replace(b"\"", b"\"\""))),
443 )
444 }),
445 ("escape_sh", v(0), |cv| {
446 bome(
447 cv.1.try_as_utf8_bytes()
448 .map(|s| ValT::from_utf8_bytes(s.replace(b"'", b"'\\''"))),
449 )
450 }),
451 ])
452}
453
454#[cfg(feature = "std")]
455fn now<V: From<String>>() -> Result<f64, Error<V>> {
456 use std::time::{SystemTime, UNIX_EPOCH};
457 SystemTime::now()
458 .duration_since(UNIX_EPOCH)
459 .map(|x| x.as_secs_f64())
460 .map_err(Error::str)
461}
462
463#[cfg(feature = "std")]
464fn std<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
465where
466 for<'a> D::V<'a>: ValT,
467{
468 use std::env::vars;
469 Box::new([
470 ("env", v(0), |_| {
471 bome(D::V::from_map(
472 vars().map(|(k, v)| (D::V::from(k), D::V::from(v))),
473 ))
474 }),
475 ("now", v(0), |_| bome(now().map(D::V::from))),
476 ("halt", v(1), |mut cv| {
477 let exit_code = cv.0.pop_var().try_as_isize();
478 bome(exit_code.map(|exit_code| std::process::exit(exit_code as i32)))
479 }),
480 ])
481}
482
483#[cfg(feature = "format")]
484fn replace(s: &[u8], patterns: &[&str], replacements: &[&str]) -> Vec<u8> {
485 let ac = aho_corasick::AhoCorasick::new(patterns).unwrap();
486 ac.replace_all_bytes(s, replacements)
487}
488
489#[cfg(feature = "format")]
490fn format<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
491where
492 for<'a> D::V<'a>: ValT,
493{
494 const HTML_PATS: [&str; 5] = ["<", ">", "&", "\'", "\""];
495 const HTML_REPS: [&str; 5] = ["<", ">", "&", "'", """];
496 Box::new([
497 ("escape_html", v(0), |cv| {
498 bome(cv.1.map_utf8_str(|s| replace(s, &HTML_PATS, &HTML_REPS)))
499 }),
500 ("unescape_html", v(0), |cv| {
501 bome(cv.1.map_utf8_str(|s| replace(s, &HTML_REPS, &HTML_PATS)))
502 }),
503 ("escape_tsv", v(0), |cv| {
504 let pats = ["\n", "\r", "\t", "\\", "\0"];
505 let reps = ["\\n", "\\r", "\\t", "\\\\", "\\0"];
506 bome(cv.1.map_utf8_str(|s| replace(s, &pats, &reps)))
507 }),
508 ("encode_uri", v(0), |cv| {
509 bome(cv.1.map_utf8_str(|s| urlencoding::encode_binary(s).to_string()))
510 }),
511 ("decode_uri", v(0), |cv| {
512 bome(cv.1.map_utf8_str(|s| urlencoding::decode_binary(s).to_vec()))
513 }),
514 ("encode_base64", v(0), |cv| {
515 use base64::{engine::general_purpose::STANDARD, Engine};
516 bome(cv.1.map_utf8_str(|s| STANDARD.encode(s)))
517 }),
518 ("decode_base64", v(0), |cv| {
519 use base64::{engine::general_purpose::STANDARD, Engine};
520 bome(cv.1.try_as_utf8_bytes().and_then(|s| {
521 STANDARD
522 .decode(s)
523 .map_err(Error::str)
524 .map(ValT::from_utf8_bytes)
525 }))
526 }),
527 ])
528}
529
530#[cfg(feature = "math")]
531fn math<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
532where
533 for<'a> D::V<'a>: ValT,
534{
535 let rename = |name, (_name, arity, f): Filter<RunPtr<D>>| (name, arity, f);
536 Box::new([
537 math::f_f!(acos),
538 math::f_f!(acosh),
539 math::f_f!(asin),
540 math::f_f!(asinh),
541 math::f_f!(atan),
542 math::f_f!(atanh),
543 math::f_f!(cbrt),
544 math::f_f!(cos),
545 math::f_f!(cosh),
546 math::f_f!(erf),
547 math::f_f!(erfc),
548 math::f_f!(exp),
549 math::f_f!(exp10),
550 math::f_f!(exp2),
551 math::f_f!(expm1),
552 math::f_f!(fabs),
553 math::f_fi!(frexp),
554 math::f_i!(ilogb),
555 math::f_f!(j0),
556 math::f_f!(j1),
557 math::f_f!(lgamma),
558 math::f_f!(log),
559 math::f_f!(log10),
560 math::f_f!(log1p),
561 math::f_f!(log2),
562 math::f_ff!(modf),
564 rename("nearbyint", math::f_f!(round)),
565 math::f_f!(rint),
567 math::f_f!(sin),
569 math::f_f!(sinh),
570 math::f_f!(sqrt),
571 math::f_f!(tan),
572 math::f_f!(tanh),
573 math::f_f!(tgamma),
574 math::f_f!(trunc),
575 math::f_f!(y0),
576 math::f_f!(y1),
577 math::ff_f!(atan2),
578 math::ff_f!(copysign),
579 math::ff_f!(fdim),
581 math::ff_f!(fmax),
582 math::ff_f!(fmin),
583 math::ff_f!(fmod),
584 math::ff_f!(hypot),
585 math::if_f!(jn),
586 math::fi_f!(ldexp),
587 math::ff_f!(nextafter),
588 math::ff_f!(pow),
590 math::ff_f!(remainder),
591 rename("scalbln", math::fi_f!(scalbn)),
593 math::if_f!(yn),
594 math::fff_f!(fma),
595 ])
596}
597
598#[cfg(feature = "regex")]
599fn re<'a, D: DataT>(s: bool, m: bool, mut cv: Cv<'a, D>) -> ValR<D::V<'a>>
600where
601 D::V<'a>: ValT,
602{
603 let flags = cv.0.pop_var();
604 let re = cv.0.pop_var();
605
606 use crate::regex::Part::{Matches, Mismatch};
607 let fail_flag = |e| Error::str(format_args!("invalid regex flag: {e}"));
608 let fail_re = |e| Error::str(format_args!("invalid regex: {e}"));
609
610 let flags = regex::Flags::new(flags.try_as_str()?).map_err(fail_flag)?;
611 let re = flags.regex(re.try_as_str()?).map_err(fail_re)?;
612 let out = regex::regex(cv.1.try_as_utf8_bytes()?, &re, flags, (s, m));
613 let sub = |s| cv.1.as_sub_str(s);
614 let out = out.into_iter().map(|out| match out {
615 Matches(ms) => ms
616 .into_iter()
617 .map(|m| D::V::from_map(m.fields(sub)))
618 .collect(),
619 Mismatch(s) => Ok(sub(s)),
620 });
621 out.collect()
622}
623
624#[cfg(feature = "regex")]
625fn regex<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
626where
627 for<'a> D::V<'a>: ValT,
628{
629 let vv = || [Bind::Var(()), Bind::Var(())].into();
630 Box::new([
631 ("matches", vv(), |cv| bome(re(false, true, cv))),
632 ("split_matches", vv(), |cv| bome(re(true, true, cv))),
633 ("split_", vv(), |cv| bome(re(true, false, cv))),
634 ])
635}
636
637#[cfg(feature = "time")]
638fn time<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
639where
640 for<'a> D::V<'a>: ValT,
641{
642 use jiff::tz::TimeZone;
643 Box::new([
644 ("fromdateiso8601", v(0), |cv| {
645 bome(cv.1.try_as_str().and_then(time::from_iso8601))
646 }),
647 ("todateiso8601", v(0), |cv| {
648 bome(time::to_iso8601(&cv.1).map(D::V::from))
649 }),
650 ("strftime", v(1), |cv| {
651 unary(cv, |v, fmt| {
652 time::strftime(&v, fmt.try_as_str()?, TimeZone::UTC)
653 })
654 }),
655 ("strflocaltime", v(1), |cv| {
656 unary(cv, |v, fmt| {
657 time::strftime(&v, fmt.try_as_str()?, TimeZone::system())
658 })
659 }),
660 ("gmtime", v(0), |cv| {
661 bome(time::gmtime(&cv.1, TimeZone::UTC))
662 }),
663 ("localtime", v(0), |cv| {
664 bome(time::gmtime(&cv.1, TimeZone::system()))
665 }),
666 ("strptime", v(1), |cv| {
667 unary(cv, |v, fmt| {
668 time::strptime(v.try_as_str()?, fmt.try_as_str()?)
669 })
670 }),
671 ("mktime", v(0), |cv| bome(time::mktime(&cv.1))),
672 ])
673}
674
675#[cfg(feature = "log")]
676fn log<D: DataT>() -> Box<[Filter<RunPtr<D>>]>
677where
678 for<'a> D::V<'a>: ValT,
679{
680 fn eprint_raw<V: ValT>(v: &V) {
681 if let Some(s) = v.as_utf8_bytes() {
682 log::error!("{}", BStr::new(s))
683 } else {
684 log::error!("{v}")
685 }
686 }
687 macro_rules! empty_with {
689 ( $eff:expr ) => {
690 |cv| {
691 $eff(&cv.1);
692 Box::new(core::iter::empty())
693 }
694 };
695 }
696 Box::new([
697 ("debug_empty", v(0), empty_with!(|x| log::debug!("{x}"))),
698 ("stderr_empty", v(0), empty_with!(eprint_raw)),
699 ])
700}