Skip to main content

jaq_core/
compile.rs

1//! Program compilation.
2
3use crate::load::{self, lex, parse};
4use crate::{ops, Bind as Arg};
5use alloc::collections::{BTreeMap, BTreeSet};
6use alloc::{boxed::Box, string::String, vec::Vec};
7
8type NativeId = usize;
9type ModId = usize;
10type VarId = usize;
11type VarSkip = usize;
12type Arity = usize;
13
14/// Function from a value to a stream of value results.
15#[derive(Debug, Clone)]
16pub struct Filter<F> {
17    /// Program graph, implemented as lookup table
18    pub lut: Lut<F>,
19    /// Entry point into the program graph
20    pub id: TermId,
21}
22
23/// Index of a term in the look-up table.
24#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
25pub struct TermId(pub(crate) usize);
26
27/// Look-up table for terms and functions.
28#[derive(Clone, Debug)]
29pub struct Lut<F> {
30    /// `terms[tid]` yields the term corresponding to the term ID `tid`
31    pub(crate) terms: Vec<Term>,
32    pub(crate) funs: Vec<F>,
33}
34
35impl<F> Default for Filter<F> {
36    fn default() -> Self {
37        Self {
38            id: TermId(0),
39            lut: Lut::new([Term::Id].into()),
40        }
41    }
42}
43
44impl<F> Lut<F> {
45    fn new(terms: Vec<Term>) -> Self {
46        let funs = Vec::new();
47        Self { terms, funs }
48    }
49
50    fn map_funs<F2>(self, f: impl Fn(F) -> F2) -> Lut<F2> {
51        Lut {
52            funs: self.funs.into_iter().map(f).collect(),
53            terms: self.terms,
54        }
55    }
56
57    fn insert_term(&mut self, t: Term) -> TermId {
58        let tid = self.terms.len();
59        self.terms.push(t);
60        TermId(tid)
61    }
62}
63
64/// How to call a filter implemented by definition.
65///
66/// This is important to achieve tail-call optimisation (TCO).
67/// In particular, a *tail call* is any call to an ancestor from
68/// a position where the output is returned as-is by the ancestor.
69///
70/// See
71/// [`Compiler::term`] for a list of filters that return outputs of sub-filters as-is, and
72/// [`Locals::call`] for how the [`CallType`] is determined.
73#[derive(Copy, Clone, Debug, PartialEq, Eq)]
74pub(crate) enum CallType {
75    /// Run filter and return its outputs as-is
76    Inline,
77    /// Return exception that encodes filter ID, arguments and input (tail call)
78    Throw,
79    /// Run filter and expand thrown exceptions that match the filter ID
80    CatchOne,
81    /// Run filter and expand thrown exceptions
82    CatchAll,
83}
84
85#[derive(Clone, Debug, Default)]
86pub(crate) enum Term<T = TermId> {
87    #[default]
88    /// Identity (`.`)
89    Id,
90    /// Recursion (`..`)
91    Recurse,
92    ToString,
93
94    Int(isize),
95    Num(String),
96    Str(String),
97    /// Array construction (`[f]`)
98    Arr(T),
99    /// Empty object (`{}`)
100    ObjEmpty,
101    /// Singleton object (`{f: g}`)
102    ObjSingle(T, T),
103
104    /// Bound variable (`$x`), label (`label $x`), or filter argument (`a`)
105    Var(VarId),
106
107    /// Call to a filter (`filter`, `filter(…)`)
108    CallDef(TermId, Box<[Arg<T>]>, VarSkip, CallType),
109
110    Native(NativeId, Box<[Arg<T>]>),
111
112    /// Binding of a break label (`label $x | f`)
113    Label(T),
114    /// Negation operation (`-f`)
115    Neg(T),
116    /// Variable binding (`f as $x | g`) if identifier (`x`) is given, otherwise
117    /// application (`f | g`)
118    Pipe(T, Option<Pattern<T>>, T),
119    /// Concatenation (`f, g`)
120    Comma(T, T),
121    /// Assignment (`f = g`)
122    Assign(T, T),
123    /// Update-assignment (`f |= g`)
124    Update(T, T),
125    /// Arithmetical update-assignment (`f += g`, `f -= g`, `f *= g`, `f /= g`, `f %= g`)
126    UpdateMath(T, ops::Math, T),
127    /// Alternation update-assignment (`f //= g`)
128    UpdateAlt(T, T),
129    /// Logical operation (`f and g`, `f or g`)
130    Logic(T, bool, T),
131    /// Arithmetical operation (`f + g`, `f - g`, `f * g`, `f / g`, `f % g`)
132    Math(T, ops::Math, T),
133    /// Comparison operation (`f < g`, `f <= g`, `f > g`, `f >= g`, `f == g`, `f != g`)
134    Cmp(T, ops::Cmp, T),
135    /// Alternation (`f // g`)
136    Alt(T, T),
137    /// Try-catch (`try f catch g`)
138    TryCatch(T, T),
139    /// If-then-else (`if f then g else h end`)
140    Ite(T, T, T),
141    /// `reduce` and `foreach`
142    ///
143    ///  Assuming that `xs` evaluates to `x0`, `x1`, ..., `xn`,
144    /// `reduce xs as $x (init; f)` evaluates to
145    ///
146    /// ~~~ text
147    /// init
148    /// | x0 as $x | f
149    /// | ...
150    /// | xn as $x | f
151    /// ~~~
152    ///
153    /// and `foreach xs as $x (init; f; project)` evaluates to
154    ///
155    /// ~~~ text
156    /// init |
157    /// x0 as $x | f | project,
158    /// ...
159    /// xn as $x | f | project,
160    /// empty
161    /// ~~~
162    Fold(T, Pattern<T>, T, T, Fold<T>),
163    Path(T, crate::path::Path<T>),
164}
165
166#[derive(Clone, Debug)]
167pub(crate) enum Fold<T> {
168    Reduce,
169    Foreach(Option<T>),
170}
171
172#[derive(Clone, Debug)]
173pub(crate) enum Pattern<F> {
174    Var,
175    Idx(Vec<(F, Self)>),
176}
177
178/// Compilation error.
179pub type Error<S> = (S, Undefined);
180
181/// Compilation errors.
182pub type Errors<S, P> = load::Errors<S, P, Vec<Error<S>>>;
183
184/// Type of an undefined symbol.
185#[derive(Debug)]
186#[non_exhaustive]
187pub enum Undefined {
188    /// module
189    Mod,
190    /// variable
191    Var,
192    /// label variable
193    Label,
194    /// filter with arity
195    Filter(Arity),
196}
197
198impl Undefined {
199    /// String representation of an unexpected symbol type.
200    pub fn as_str(&self) -> &'static str {
201        match self {
202            Self::Var => "variable",
203            Self::Mod => "module",
204            Self::Label => "label",
205            Self::Filter(_arity) => "filter",
206        }
207    }
208}
209
210/// jq program compiler.
211///
212/// This contains strings of type `S` and native functions of type `F`.
213pub struct Compiler<S, F> {
214    lut: Lut<(Sig<S>, F)>,
215
216    /// `mod_map[mid]` yields all top-level definitions contained inside a module with ID `mid`
217    mod_map: Vec<Vec<Def<S>>>,
218
219    imported_mods: Vec<(ModId, S)>,
220    included_mods: Vec<ModId>,
221
222    global_vars: Vec<S>,
223    imported_vars: Vec<(S, ModId)>,
224
225    locals: Locals<S>,
226
227    errs: Vec<Error<S>>,
228}
229
230type Def<S> = (Sig<S>, TermId, Tr);
231
232impl<S, F> Default for Compiler<S, F> {
233    fn default() -> Self {
234        Self {
235            lut: Lut {
236                terms: Vec::new(),
237                funs: Vec::new(),
238            },
239            mod_map: Vec::new(),
240            imported_mods: Vec::new(),
241            included_mods: Vec::new(),
242            global_vars: Vec::new(),
243            imported_vars: Vec::new(),
244            locals: Locals::default(),
245            errs: Vec::new(),
246        }
247    }
248}
249
250#[derive(Clone, Debug)]
251struct Sig<S, A = Arg> {
252    name: S,
253    // TODO: we could analyse for each argument whether it is TR, and
254    // use this when converting args at callsite
255    args: Box<[A]>,
256}
257
258fn bind_from<T>(s: &str, x: T) -> Arg<T> {
259    if s.starts_with('$') {
260        Arg::Var(x)
261    } else {
262        Arg::Fun(x)
263    }
264}
265
266fn binds<T, U: Copy>(binds: &[Arg<T>], args: &[U]) -> Box<[Arg<U>]> {
267    assert!(binds.len() == args.len());
268    let args = binds.iter().zip(args);
269    args.map(|(bind, id)| bind.as_ref().map(|_| *id)).collect()
270}
271
272impl<S: Eq, A> Sig<S, A> {
273    fn matches(&self, name: S, args: &[TermId]) -> bool {
274        name == self.name && args.len() == self.args.len()
275    }
276}
277
278/// Store a map of vectors plus the sum of the lengths of all vectors.
279struct MapVecLen<S> {
280    bound: MapVec<S, usize>,
281    total: usize,
282}
283
284impl<S> Default for MapVecLen<S> {
285    fn default() -> Self {
286        Self {
287            bound: MapVec::default(),
288            total: 0,
289        }
290    }
291}
292
293impl<S: Ord> MapVecLen<S> {
294    fn push(&mut self, name: S) {
295        self.total += 1;
296        self.bound.push(name, self.total)
297    }
298
299    fn pop(&mut self, name: &S) {
300        assert_eq!(self.bound.pop(name), Some(self.total));
301        self.total -= 1;
302    }
303
304    fn is_empty(&self) -> bool {
305        self.bound.is_empty() && self.total == 0
306    }
307}
308
309enum Fun<S> {
310    Arg,
311    Parent(Box<[Arg<S>]>, TermId),
312    /// Tr stores which tail-recursive calls the sibling can return
313    Sibling(Box<[Arg<S>]>, TermId, Tr),
314}
315
316/// Single binding.
317#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
318pub(crate) enum Bind<V, L = V, F = V> {
319    /// binding to a variable
320    Var(V),
321    /// binding to a break label
322    Label(L),
323    /// binding to a filter
324    Fun(F),
325}
326
327struct Locals<S> {
328    // usize = number of vars
329    funs: MapVec<(S, Arity), (Fun<S>, usize)>,
330    vars: MapVecLen<Bind<S>>,
331}
332
333impl<S> Default for Locals<S> {
334    fn default() -> Self {
335        Self {
336            funs: MapVec::default(),
337            vars: MapVecLen::default(),
338        }
339    }
340}
341
342/// Mapping from `K` to sequence of `V`.
343struct MapVec<K, V>(BTreeMap<K, Vec<V>>);
344
345impl<K, V> Default for MapVec<K, V> {
346    fn default() -> Self {
347        Self(BTreeMap::new())
348    }
349}
350
351impl<K: Ord, V> MapVec<K, V> {
352    fn push(&mut self, k: K, v: V) {
353        self.0.entry(k).or_default().push(v)
354    }
355
356    fn pop(&mut self, k: &K) -> Option<V> {
357        let vs = self.0.get_mut(k)?;
358        let v = vs.pop()?;
359        if vs.is_empty() {
360            self.0.remove(k);
361        }
362        Some(v)
363    }
364
365    fn get_last(&self, k: &K) -> Option<&V> {
366        self.0.get(k)?.last()
367    }
368
369    fn is_empty(&self) -> bool {
370        self.0.is_empty()
371    }
372}
373
374impl<S: Copy + Ord> Locals<S> {
375    fn push_sibling(&mut self, name: S, args: Box<[Arg<S>]>, id: TermId, tr: Tr) {
376        self.funs.push(
377            (name, args.len()),
378            (Fun::Sibling(args, id, tr), self.vars.total),
379        );
380    }
381
382    fn pop_sibling(&mut self, name: S, arity: Arity) -> (Box<[Arg<S>]>, TermId, Tr) {
383        let (y, vars) = match self.funs.pop(&(name, arity)) {
384            Some((Fun::Sibling(args, id, tr), vars)) => ((args, id, tr), vars),
385            _ => panic!(),
386        };
387        assert_eq!(self.vars.total, vars);
388        y
389    }
390
391    fn push_parent(&mut self, name: S, args: Box<[Arg<S>]>, id: TermId) {
392        let vars = self.vars.total;
393
394        for arg in args.iter() {
395            match arg {
396                Arg::Var(v) => self.vars.push(Bind::Var(*v)),
397                Arg::Fun(f) => self.push_arg(*f),
398            }
399        }
400
401        self.funs
402            .push((name, args.len()), (Fun::Parent(args, id), vars));
403    }
404
405    fn pop_parent(&mut self, name: S, arity: Arity) -> TermId {
406        let (args, id, vars) = match self.funs.pop(&(name, arity)) {
407            Some((Fun::Parent(args, def), vars)) => (args, def, vars),
408            _ => panic!(),
409        };
410        for arg in args.iter().rev() {
411            match arg {
412                Arg::Var(v) => self.vars.pop(&Bind::Var(*v)),
413                Arg::Fun(f) => self.pop_arg(*f),
414            }
415        }
416        assert_eq!(self.vars.total, vars);
417        id
418    }
419
420    fn push_arg(&mut self, name: S) {
421        self.vars.push(Bind::Fun(name));
422        self.funs.push((name, 0), (Fun::Arg, self.vars.total));
423    }
424
425    fn pop_arg(&mut self, name: S) {
426        let vars = match self.funs.pop(&(name, 0)) {
427            Some((Fun::Arg, vars)) => vars,
428            _ => panic!(),
429        };
430        assert_eq!(self.vars.total, vars);
431        self.vars.pop(&Bind::Fun(name));
432    }
433
434    /// Call `f(f_1; ...; f_n)`, allowing tail-recursive calls to `tr`.
435    fn call(&self, name: S, args: &[TermId], tr: &Tr) -> Option<(Term, Tr)> {
436        Some(match self.funs.get_last(&(name, args.len()))? {
437            (Fun::Arg, vars) => (Term::Var(self.vars.total - *vars), Tr::new()),
438            // we   may only return tail-recursive calls to `tr` and call a sibling
439            // that may only return tail-recursive calls to `tr_`
440            (Fun::Sibling(args_, id, tr_), vars) => {
441                let mut tr_ = tr_.clone();
442                // does the sibling return tail-recursive calls to itself?
443                let rec = tr_.remove(id);
444                // are all remaining tail-recursive calls from the sibling permitted?
445                let typ = if tr_.is_subset(tr) {
446                    if rec {
447                        CallType::CatchOne
448                    } else {
449                        CallType::Inline
450                    }
451                } else {
452                    tr_ = Tr::new();
453                    CallType::CatchAll
454                };
455                let vars = self.vars.total - *vars;
456                (Term::CallDef(*id, binds(args_, args), vars, typ), tr_)
457            }
458            (Fun::Parent(args_, id), vars) => {
459                // if we can return a tail-recursive call to the parent
460                let (typ, tr_) = if tr.contains(id) {
461                    (CallType::Throw, Tr::from([*id]))
462                } else {
463                    (CallType::CatchAll, Tr::new())
464                };
465                let vars = self.vars.total - *vars;
466                (Term::CallDef(*id, binds(args_, args), vars, typ), tr_)
467            }
468        })
469    }
470
471    fn is_empty(&self) -> bool {
472        self.funs.is_empty() && self.vars.is_empty()
473    }
474}
475
476// any ID in Tr is an ID of a function f, and
477// any call to some `f` in Tr from the current position is *not* tail-recursive
478type Tr = BTreeSet<TermId>;
479
480impl<'s, F> Compiler<&'s str, F> {
481    /// Supply functions with given signatures.
482    pub fn with_funs(mut self, funs: impl IntoIterator<Item = (&'s str, Box<[Arg]>, F)>) -> Self {
483        self.lut.funs = funs
484            .into_iter()
485            .map(|(name, args, f)| (Sig { name, args }, f))
486            .collect();
487        self
488    }
489
490    /// Assume the existence of global variables with given names.
491    ///
492    /// The names all have to start with `$`.
493    /// For execution, the corresponding values have to be provided via [`crate::Ctx::new`].
494    pub fn with_global_vars(self, global_vars: impl IntoIterator<Item = &'s str>) -> Self {
495        Self {
496            global_vars: global_vars.into_iter().collect(),
497            ..self
498        }
499    }
500
501    /// Compile the given modules.
502    pub fn compile<P>(
503        mut self,
504        mods: load::Modules<&'s str, P>,
505    ) -> Result<Filter<F>, Errors<&'s str, P>> {
506        self.imported_vars = mods
507            .file_vars()
508            .enumerate()
509            .flat_map(|(mid, (_file, vars))| vars.iter().map(move |(_path, x, _meta)| (*x, mid)))
510            .collect();
511
512        let mut errs = Vec::new();
513        for (file, module) in mods.deps {
514            let defs = self.open_module(module);
515            let defs = self.module(defs);
516            self.mod_map.push(defs);
517            self.close_module(file, &mut errs)
518        }
519        let (file, module) = mods.main;
520        let main = self.open_module(module);
521        let id = self.iterm(main);
522        self.close_module(file, &mut errs);
523
524        /*
525        for (i, t) in self.lut.terms.iter().enumerate() {
526            std::println!("{i} -> {t:?}");
527        }
528        */
529
530        let lut = self.lut.map_funs(|(_sig, f)| f);
531        errs.is_empty().then(|| Filter { id, lut }).ok_or(errs)
532    }
533
534    fn with_label<T>(&mut self, label: &'s str, f: impl FnOnce(&mut Self) -> T) -> T {
535        self.locals.vars.push(Bind::Label(label));
536        let y = f(self);
537        self.locals.vars.pop(&Bind::Label(label));
538        y
539    }
540
541    fn with_vars<T>(&mut self, vars: &[&'s str], f: impl FnOnce(&mut Self) -> T) -> T {
542        vars.iter()
543            .for_each(|v| self.locals.vars.push(Bind::Var(v)));
544        let y = f(self);
545        vars.iter()
546            .rev()
547            .for_each(|v| self.locals.vars.pop(&Bind::Var(v)));
548        y
549    }
550
551    fn open_module<B>(&mut self, m: load::Module<&'s str, B>) -> B {
552        self.imported_mods.clear();
553        self.included_mods.clear();
554        for (mid, as_) in m.mods {
555            match as_ {
556                None => self.included_mods.push(mid),
557                Some(as_) => self.imported_mods.push((mid, as_)),
558            }
559        }
560        m.body
561    }
562
563    fn close_module<P>(&mut self, file: load::File<&'s str, P>, errs: &mut Errors<&'s str, P>) {
564        if !self.errs.is_empty() {
565            errs.push((file, core::mem::take(&mut self.errs)));
566        }
567        assert!(self.locals.is_empty());
568    }
569
570    fn open_def(&mut self, def: parse::Def<&'s str>, tr: &Tr) -> (&'s str, usize) {
571        let args: Box<[_]> = def.args.iter().map(|a| bind_from(a, *a)).collect();
572        let arity = args.len();
573        let (sig, def, tr_) = self.def(def, tr);
574        self.locals.push_sibling(sig.name, args, def, tr_);
575        (sig.name, arity)
576    }
577
578    fn close_def(&mut self, (name, arity): (&'s str, usize)) -> Def<&'s str> {
579        let (args, id, tr) = self.locals.pop_sibling(name, arity);
580        let args = args.into_vec().into_iter().map(|a| a.map(|_| ())).collect();
581        (Sig { name, args }, id, tr)
582    }
583
584    fn module(&mut self, defs: Vec<parse::Def<&'s str>>) -> Vec<Def<&'s str>> {
585        let siblings = defs.into_iter().map(|def| self.open_def(def, &Tr::new()));
586        let siblings: Vec<_> = siblings.collect();
587
588        let defs = siblings.into_iter().rev().map(|sig| self.close_def(sig));
589        let mut defs: Vec<_> = defs.collect();
590
591        defs.reverse();
592        defs
593    }
594
595    /// Compile a definition LHS.
596    fn def(&mut self, d: parse::Def<&'s str>, tr: &Tr) -> (Sig<&'s str>, TermId, Tr) {
597        // insert placeholder
598        let id = self.lut.insert_term(Term::default());
599        let args = d.args.iter().map(|a| bind_from(a, *a)).collect();
600
601        self.locals.push_parent(d.name, args, id);
602        let mut tr = tr.clone();
603        // any function can call itself tail-recursively
604        assert!(tr.insert(id));
605        let (body, tr_) = self.term(d.body, &tr);
606        self.lut.terms[id.0] = body;
607        assert_eq!(self.locals.pop_parent(d.name, d.args.len()), id);
608        let sig = Sig {
609            name: d.name,
610            args: d.args.iter().map(|a| bind_from(a, ())).collect(),
611        };
612        (sig, id, tr_)
613    }
614
615    /// Compile a term that may call any function in `tr` tail-recursively.
616    ///
617    /// Returns which of the functions in `tr` it actually calls tail-recursively.
618    /// The output `Tr` must be a subset of the input `Tr`.
619    ///
620    /// The following filters return the outputs of the filters `F` or `G` as-is:
621    ///
622    /// - `F, G`
623    /// - `f // G`
624    /// - `f as ... | G`
625    /// - `f        | G`
626    /// - `if p then F else G end`
627    /// - `foreach fx as ... (init; f; G)`
628    /// - `def x: f; G`
629    ///
630    /// All other filters do not return outputs of sub-filters as-is.
631    fn term(&mut self, t: parse::Term<&'s str>, tr: &Tr) -> (Term, Tr) {
632        use parse::Term::*;
633        let t = match t {
634            Id => Term::Id,
635            Recurse => Term::Recurse,
636            Arr(t) => Term::Arr(self.iterm(t.map_or_else(|| Call("!empty", Vec::new()), |t| *t))),
637            Neg(t) => Term::Neg(self.iterm(*t)),
638            Label(x, t) => Term::Label(self.with_label(x, |c| c.iterm(*t))),
639            Break(x) => self.break_(x),
640            IfThenElse(if_thens, else_) => {
641                let f = |(if_, then_)| (self.iterm(if_), self.iterm_tr(then_, tr));
642                let if_thens: Vec<_> = if_thens.into_iter().map(f).collect();
643                let else_ = else_.map_or((Term::Id, Tr::new()), |else_| self.term(*else_, tr));
644                return if_thens.into_iter().rev().fold(else_, |acc, (if_, then_)| {
645                    let else_ = self.lut.insert_term(acc.0);
646                    let tr_ = then_.1.union(&acc.1).copied().collect();
647                    (Term::Ite(if_, then_.0, else_), tr_)
648                });
649            }
650            Var(x) => self.var(x),
651            Call(name, args) => {
652                let args: Box<[_]> = args.into_iter().map(|t| self.iterm(t)).collect();
653                if let Some((module, name)) = name.split_once("::") {
654                    self.call_mod(module, name, &args)
655                } else {
656                    return self.call(name, &args, tr);
657                }
658            }
659            Def(defs, t) => {
660                let siblings: Vec<_> = defs.into_iter().map(|def| self.open_def(def, tr)).collect();
661                let (t, tr_) = self.term(*t, tr);
662                siblings.into_iter().rev().for_each(|sig| {
663                    self.close_def(sig);
664                });
665
666                return (t, tr_);
667            }
668            Num(n) => n.parse().map_or_else(|_| Term::Num(n.into()), Term::Int),
669            TryCatch(try_, catch) => {
670                let catch = catch.map_or_else(|| Call("!empty", Vec::new()), |t| *t);
671                Term::TryCatch(self.iterm(*try_), self.iterm(catch))
672            }
673            Fold(name, xs, pat, args) => {
674                use self::Fold::{Foreach, Reduce};
675                let arity = args.len();
676                let mut args = args.into_iter();
677                let (init, update) = match (args.next(), args.next()) {
678                    (Some(init), Some(update)) => (init, update),
679                    _ => return (self.fail(name, Undefined::Filter(arity)), Tr::new()),
680                };
681                let vars: Vec<_> = pat.vars().copied().collect();
682                let xs = self.iterm(*xs);
683                let pat = self.pattern(pat);
684                let init = self.iterm(init);
685                let update = self.with_vars(&vars, |c| c.iterm(update));
686
687                match (name, args.next(), args.next()) {
688                    ("reduce", None, None) => Term::Fold(xs, pat, init, update, Reduce),
689                    ("foreach", proj, None) => {
690                        let (proj, tr_) = proj
691                            .map(|p| self.with_vars(&vars, |c| c.iterm_tr(p, tr)))
692                            .unzip();
693                        let tr_ = tr_.unwrap_or_default();
694                        return (Term::Fold(xs, pat, init, update, Foreach(proj)), tr_);
695                    }
696                    _ => self.fail(name, Undefined::Filter(arity)),
697                }
698            }
699            BinOp(l, op, r) => {
700                use parse::BinaryOp::*;
701                let (l, (r, tr_)) = match op {
702                    Comma => {
703                        let (l, trl) = self.iterm_tr(*l, tr);
704                        let (r, trr) = self.iterm_tr(*r, tr);
705                        (l, (r, trl.union(&trr).copied().collect()))
706                    }
707                    Alt => (self.iterm(*l), self.iterm_tr(*r, tr)),
708                    Pipe(ref pat) => {
709                        let l = self.iterm(*l);
710                        let vars: Vec<_> = pat.iter().flat_map(|p| p.vars()).copied().collect();
711                        (l, self.with_vars(&vars, |c| c.iterm_tr(*r, tr)))
712                    }
713                    _ => (self.iterm(*l), (self.iterm(*r), Tr::new())),
714                };
715                let t = match op {
716                    Pipe(pat) => Term::Pipe(l, pat.map(|pat| self.pattern(pat)), r),
717                    Comma => Term::Comma(l, r),
718                    Math(op) => Term::Math(l, op, r),
719                    Assign => Term::Assign(l, r),
720                    Update => Term::Update(l, r),
721                    UpdateMath(op) => Term::UpdateMath(l, op, r),
722                    Cmp(op) => Term::Cmp(l, op, r),
723                    Or => Term::Logic(l, true, r),
724                    And => Term::Logic(l, false, r),
725                    Alt => Term::Alt(l, r),
726                    UpdateAlt => Term::UpdateAlt(l, r),
727                };
728                return (t, tr_);
729            }
730            Path(t, path) => {
731                let t = self.iterm(*t);
732                let path = path.0.into_iter();
733                let path = path.map(|(p, opt)| (p.map(|f| self.iterm(f)), opt));
734                Term::Path(t, crate::path::Path(path.collect()))
735            }
736            Str(fmt, parts) => {
737                use lex::StrPart;
738                let fmt = match fmt {
739                    Some(fmt) => self.iterm(Call(fmt, Vec::new())),
740                    None => self.lut.insert_term(Term::ToString),
741                };
742                let parts = parts.into_iter().map(|part| match part {
743                    StrPart::Str(s) => Term::Str(s.into()),
744                    StrPart::Char(c) => Term::Str(c.into()),
745                    StrPart::Term(f) => Term::Pipe(self.iterm(f), None, fmt),
746                });
747                let parts = parts.collect();
748                self.sum_or(|| Term::Str(String::new()), parts)
749            }
750            Obj(o) => {
751                let kvs = o.into_iter().map(|(k, v)| self.obj_entry(k, v)).collect();
752                self.sum_or(|| Term::ObjEmpty, kvs)
753            }
754        };
755        (t, Tr::new())
756    }
757
758    /// Compile a term in a context that does *not* permit tail-recursion.
759    ///
760    /// One example of such a term is `t` in `1 + t` or `t | .+1`.
761    fn iterm(&mut self, t: parse::Term<&'s str>) -> TermId {
762        self.iterm_tr(t, &Tr::new()).0
763    }
764
765    fn iterm_tr(&mut self, t: parse::Term<&'s str>, tr: &Tr) -> (TermId, Tr) {
766        let id = self.lut.insert_term(Term::default());
767        let (t, tr_) = self.term(t, tr);
768        debug_assert!(tr_.is_subset(tr));
769        self.lut.terms[id.0] = t;
770        (id, tr_)
771    }
772
773    fn pattern(&mut self, p: parse::Pattern<&'s str>) -> Pattern<TermId> {
774        match p {
775            parse::Pattern::Var(_) => Pattern::Var,
776            parse::Pattern::Arr(a) => {
777                let iter = a
778                    .into_iter()
779                    .enumerate()
780                    .map(|(i, p)| (self.lut.insert_term(Term::Int(i as isize)), self.pattern(p)));
781                Pattern::Idx(iter.collect())
782            }
783            parse::Pattern::Obj(o) => {
784                let iter = o.into_iter().map(|(k, p)| (self.iterm(k), self.pattern(p)));
785                Pattern::Idx(iter.collect())
786            }
787        }
788    }
789
790    fn fail(&mut self, name: &'s str, undef: Undefined) -> Term {
791        self.errs.push((name, undef));
792        Term::default()
793    }
794
795    fn call_mod_id(&self, mid: ModId, name: &'s str, args: &[TermId]) -> Option<Term> {
796        let mut sig_defs = self.mod_map[mid].iter().rev();
797        let (sig, id, tr) = sig_defs.find(|(sig, ..)| sig.matches(name, args))?;
798        debug_assert!(tr.len() <= 1);
799        let typ = if tr.contains(id) {
800            CallType::CatchOne
801        } else {
802            CallType::Inline
803        };
804        let vars = self.locals.vars.total;
805        Some(Term::CallDef(*id, binds(&sig.args, args), vars, typ))
806    }
807
808    /// Resolve call to `mod::filter(a1, ..., an)`.
809    fn call_mod(&mut self, module: &'s str, name: &'s str, args: &[TermId]) -> Term {
810        let mut imported_mods = self.imported_mods.iter().rev();
811        let mid = match imported_mods.find(|(_mid, module_)| module == *module_) {
812            Some((mid, _module)) => mid,
813            None => return self.fail(module, Undefined::Mod),
814        };
815        if let Some(call) = self.call_mod_id(*mid, name, args) {
816            return call;
817        }
818        self.fail(name, Undefined::Filter(args.len()))
819    }
820
821    /// Resolve call to `filter(a1, ..., an)`.
822    fn call(&mut self, name: &'s str, args: &[TermId], tr: &Tr) -> (Term, Tr) {
823        if let Some(t) = self.locals.call(name, args, tr) {
824            return t;
825        }
826        for mid in self.included_mods.iter().rev() {
827            if let Some(call) = self.call_mod_id(*mid, name, args) {
828                return (call, Tr::new());
829            }
830        }
831        for (nid, (sig, _f)) in self.lut.funs.iter().enumerate() {
832            if sig.matches(name, args) {
833                return (Term::Native(nid, binds(&sig.args, args)), Tr::new());
834            }
835        }
836
837        (self.fail(name, Undefined::Filter(args.len())), Tr::new())
838    }
839
840    fn var(&mut self, x: &'s str) -> Term {
841        let mut i = self.locals.vars.total;
842
843        if let Some(v) = self.locals.vars.bound.get_last(&Bind::Var(x)) {
844            return Term::Var(i - v);
845        }
846        for (x_, mid) in self.imported_vars.iter().rev() {
847            if x == *x_ && *mid == self.mod_map.len() {
848                return Term::Var(i);
849            } else {
850                i += 1;
851            }
852        }
853        for x_ in self.global_vars.iter().rev() {
854            if x == *x_ {
855                return Term::Var(i);
856            } else {
857                i += 1;
858            }
859        }
860        self.fail(x, Undefined::Var)
861    }
862
863    fn break_(&mut self, x: &'s str) -> Term {
864        if let Some(l) = self.locals.vars.bound.get_last(&Bind::Label(x)) {
865            return Term::Var(self.locals.vars.total - l);
866        }
867        self.fail(x, Undefined::Label)
868    }
869
870    fn obj_entry(&mut self, k: parse::Term<&'s str>, v: Option<parse::Term<&'s str>>) -> Term {
871        let (k, v) = match (k, v) {
872            (parse::Term::Var(x), None) => (
873                self.lut.insert_term(Term::Str(x[1..].into())),
874                self.iterm(parse::Term::Var(x)),
875            ),
876            (k, None) => {
877                use crate::path::{Part, Path};
878                let k = self.iterm(k);
879                let path = Path::from(Part::Index(k));
880                let path = Term::Path(self.lut.insert_term(Term::Id), path);
881                (k, self.lut.insert_term(path))
882            }
883            (k, Some(v)) => (self.iterm(k), self.iterm(v)),
884        };
885        Term::ObjSingle(k, v)
886    }
887
888    fn sum_or(&mut self, f: impl FnOnce() -> Term, terms: Vec<Term>) -> Term {
889        use ops::Math::Add;
890        let mut iter = terms.into_iter().rev();
891        let last = iter.next().unwrap_or_else(f);
892        iter.fold(last, |acc, x| {
893            Term::Math(self.lut.insert_term(x), Add, self.lut.insert_term(acc))
894        })
895    }
896}
897
898/// Tail Call Optimisation.
899#[test]
900fn tco() {
901    let calls_in = |filter| -> Vec<CallType> {
902        let tk = lex::Lexer::new(filter).lex().unwrap();
903        let tm = parse::Parser::new(&tk).term().unwrap();
904        let mut c = Compiler::<_, ()>::default();
905        let _id = c.iterm(tm);
906        let calls = c.lut.terms.iter().filter_map(|tm| match tm {
907            Term::CallDef(.., typ) => Some(*typ),
908            _ => None,
909        });
910        calls.collect()
911    };
912
913    use CallType::*;
914
915    // no calls to definitions here
916    assert_eq!(*calls_in("1+1"), []);
917
918    // The call to `f` in `.+1 | f` is a tail call ([`CallType::Throw`]), because:
919    //
920    // - `|` returns all outputs of the right-hand side as-is, and
921    // - `f` is an ancestor of the filter `.+1 | f`.
922    //
923    // We can analyse tail call behaviour of filters by annotating each
924    // definition by what tail calls it might return.
925    let f = r#"
926      def f:
927        .+1 | f;  # f    -> {f} (f is called with Throw)
928      f           # main -> { } (f is called with CatchOne)
929    "#;
930    assert_eq!(*calls_in(f), [CatchOne, Throw]);
931
932    // In contexts where actual filter outputs are required,
933    // filters that may return tail calls need to be called with [`CallType::CatchAll`].
934    let f = r#"
935      def f:
936        f+1;  # f    -> {} (f is called with CatchAll)
937      f       # main -> {} (f is called with Inline)
938    "#;
939    // This leads to a stack overflow in jaq and an OOM error in jq.
940    assert_eq!(*calls_in(f), [Inline, CatchAll]);
941
942    // Here, it is very important that
943    // CatchOne is used instead of CatchAll ---
944    // using the latter in the call to `g` would prevent
945    // tail-recursive calls to `f` being handled by the outer call to `f`,
946    // and would thus lead to a stack overflow.
947    let f = r#"
948      def f:
949        def g:
950          f, g;  # g    -> {f, g} (f and g are called with Throw)
951        g;       # f    -> {f   } (g is called with CatchOne)
952      f          # main -> {    } (f is called with CatchOne)
953    "#;
954    assert_eq!(*calls_in(f), [CatchOne, CatchOne, Throw, Throw]);
955
956    // Here, we do not need to catch outputs from `g` when we call it in `f`, because:
957    //
958    // - `g` does not return tail-calls to itself and
959    // - at the call site of `g`, we may return tail calls to `f`,
960    //   which is what `g` returns
961    let f = r#"
962      def f:
963        def g:
964          f;     # g    -> {f} (f is called with Throw)
965        g;       # f    -> {f} (g is called with *Inline*)
966      f          # main -> { } (f is called with CatchOne)
967    "#;
968    assert_eq!(*calls_in(f), [CatchOne, Inline, Throw]);
969
970    // If a definition is at a place that prohibits tail calls to a parent,
971    // then calls to the parent are wrapped with [`CallType::CatchAll`]:
972    let f = r#"
973      def f:
974        1 + (
975          def g:
976            g;   # g    -> {g} (g is called with Throw)
977          g      #             (g is called with CatchOne)
978        );       # f    -> { }
979      f          # main -> { } (f is called with Inline)
980    "#;
981    assert_eq!(*calls_in(f), [Inline, CatchOne, Throw]);
982
983    let f = r#"
984      def f:
985        f | f;   # f    -> {f} (f is called with CatchAll and Throw)
986      f          # main -> { } (f is called with CatchOne)
987    "#;
988    assert_eq!(*calls_in(f), [CatchOne, CatchAll, Throw]);
989}