Skip to main content

jaq_core/load/
mod.rs

1//! Combined file loading, lexing, and parsing for multiple modules.
2
3#[cfg(feature = "arbitrary")]
4mod arbitrary;
5pub mod lex;
6pub mod parse;
7mod prec_climb;
8
9use crate::{ops, path};
10#[cfg(feature = "std")]
11use alloc::boxed::Box;
12use alloc::{string::String, vec::Vec};
13pub use lex::Lexer;
14use lex::Token;
15pub use parse::Parser;
16use parse::{Def, Term};
17#[cfg(feature = "std")]
18use std::path::{Path, PathBuf};
19
20#[cfg(feature = "std")]
21extern crate std;
22
23/// Storage for loaded modules.
24///
25/// Once Rust has [internal references](https://smallcultfollowing.com/babysteps/blog/2024/06/02/the-borrow-checker-within/#step-4-internal-references),
26/// this should become unnecessary.
27/// I can't wait for it to happen!
28pub type Arena = typed_arena::Arena<String>;
29
30/// Combined file loader, lexer, and parser for multiple modules.
31pub struct Loader<S, P, R> {
32    #[allow(clippy::type_complexity)]
33    mods: Vec<(File<S, P>, Result<Module<S>, Error<S>>)>,
34    /// function to read module file contents from a path
35    read: R,
36    /// currently processed modules
37    ///
38    /// This is used to detect circular dependencies between modules.
39    open: Vec<P>,
40}
41
42/// Contents `C` and path `P` of a (module) file.
43///
44/// This is useful for creating precise error messages.
45#[derive(Clone, Debug, Default)]
46pub struct File<C, P> {
47    /// contents of the file
48    pub code: C,
49    /// path of the file
50    pub path: P,
51}
52
53/// Information to resolve module/data imports.
54pub struct Import<'a, S, P> {
55    /// absolute path of the module where the import/include directive appears
56    ///
57    /// This is a path `P`, not a string `S`, because it usually does not appear in the source.
58    pub parent: &'a P,
59    /// relative path of the imported/included module, as given in the source
60    pub path: &'a S,
61    /// metadata attached to the import/include directive
62    pub meta: &'a Option<Term<S>>,
63}
64
65impl<C, P> File<C, P> {
66    /// Apply a function to the contents of a file.
67    ///
68    /// This is useful to go from a reference `&str` to an owned `String`,
69    /// in order to save the `File` without its corresponding [`Arena`].
70    pub fn map_code<C2>(self, f: impl Fn(C) -> C2) -> File<C2, P> {
71        File {
72            code: f(self.code),
73            path: self.path,
74        }
75    }
76
77    /// Apply a function to the path of a file.
78    pub fn map_path<P2>(self, f: impl Fn(P) -> P2) -> File<C, P2> {
79        File {
80            code: self.code,
81            path: f(self.path),
82        }
83    }
84}
85
86/// Error occurring during loading of a single module.
87#[derive(Debug)]
88pub enum Error<S> {
89    /// input/output errors, for example when trying to load a module that does not exist
90    Io(Vec<(S, String)>),
91    /// lex   errors, for example when loading a module `($) (`
92    Lex(Vec<lex::Error<S>>),
93    /// parse errors, for example when loading a module `(+) *`
94    Parse(Vec<parse::Error<S>>),
95}
96
97type Vars<S> = Vec<(S, S, Option<Term<S>>)>;
98
99/// Module containing strings `S` and a body `B`.
100#[derive(Default)]
101pub struct Module<S, B = Vec<Def<S>>> {
102    /// metadata (optional)
103    #[allow(dead_code)]
104    meta: Option<Term<S>>,
105    /// included and imported modules
106    ///
107    /// Suppose that we have [`Modules`] `mods` and the current [`Module`] is `mods[id]`.
108    /// Then for every `(id_, name)` in `mods[id].1.mods`, we have that
109    /// the included/imported module is stored in `mods[id_]` (`id_ < id`), and
110    /// the module is included if `name` is `None` and imported if `name` is `Some(name)`.
111    pub(crate) mods: Vec<(usize, Option<S>)>,
112    /// imported variables, storing path and name (always starts with `$`)
113    pub(crate) vars: Vars<S>,
114    /// everything that comes after metadata and includes/imports
115    pub(crate) body: B,
116}
117
118/// Tree of modules containing definitions, and a main module.
119pub struct Modules<S, P> {
120    pub(crate) deps: Vec<(File<S, P>, Module<S>)>,
121    pub(crate) main: (File<S, P>, Module<S, Term<S>>),
122}
123
124impl<S, P> Modules<S, P> {
125    pub(crate) fn file_vars(&self) -> impl Iterator<Item = (&File<S, P>, &Vars<S>)> {
126        let mod_vars = self.deps.iter().map(|(file, module)| (file, &module.vars));
127        mod_vars.chain([(&self.main.0, &self.main.1.vars)])
128    }
129}
130
131/// Errors occurring during loading of multiple modules.
132///
133/// For example, suppose that we have
134/// a file `l.jq` that yields a lex error,
135/// a file `p.jq` that yields a parse error, and
136/// a file `i.jq` that includes a non-existing module.
137/// If we then include all these files in our main program,
138/// [`Errors`] will contain each file with a different [`Error`].
139pub type Errors<S, P, E = Error<S>> = Vec<(File<S, P>, E)>;
140
141impl<S: core::ops::Deref<Target = str>, B> parse::Module<S, B> {
142    fn map(
143        self,
144        mut f: impl FnMut(&S, Option<Term<S>>) -> Result<usize, String>,
145    ) -> Result<Module<S, B>, Error<S>> {
146        // the prelude module is included implicitly in every module (except itself)
147        let mut mods = Vec::from([(0, None)]);
148        let mut vars = Vec::new();
149        let mut errs = Vec::new();
150        for (path, as_, meta) in self.deps {
151            match as_ {
152                Some(x) if x.starts_with('$') => vars.push((path, x, meta)),
153                as_ => match f(&path, meta) {
154                    Ok(mid) => mods.push((mid, as_)),
155                    Err(e) => errs.push((path, e)),
156                },
157            }
158        }
159        if errs.is_empty() {
160            Ok(Module {
161                meta: self.meta,
162                mods,
163                vars,
164                body: self.body,
165            })
166        } else {
167            Err(Error::Io(errs))
168        }
169    }
170}
171
172type ReadResult<P> = Result<File<String, P>, String>;
173type ReadFn<P> = fn(Import<&str, P>) -> ReadResult<P>;
174
175impl<'s, P: Default> Loader<&'s str, P, ReadFn<P>> {
176    /// Initialise the loader with prelude definitions.
177    ///
178    /// The prelude is a special module that is implicitly included by all other modules
179    /// (including the main module).
180    /// That means that all filters defined in the prelude can be called from any module.
181    ///
182    /// The prelude is normally initialised with filters like `map` or `true`.
183    pub fn new(prelude: impl IntoIterator<Item = Def<&'s str>>) -> Self {
184        let defs = [Def::new("!empty", Vec::new(), Term::empty())];
185
186        let prelude = Module {
187            body: defs.into_iter().chain(prelude).collect(),
188            ..Module::default()
189        };
190
191        Self {
192            // the first module is reserved for the prelude
193            mods: Vec::from([(File::default(), Ok(prelude))]),
194            read: |_path| Err("module loading not supported".into()),
195            open: Vec::new(),
196        }
197    }
198}
199
200#[cfg(feature = "std")]
201impl<S: PartialEq> Term<S> {
202    fn obj_key(&self, key: S) -> Option<&Self> {
203        if let Term::Obj(kvs) = self {
204            kvs.iter().find_map(|(k, v)| {
205                if *k.as_str()? == key {
206                    v.as_ref()
207                } else {
208                    None
209                }
210            })
211        } else {
212            None
213        }
214    }
215
216    fn unconcat(&self) -> Box<dyn Iterator<Item = &Self> + '_> {
217        match self {
218            Self::BinOp(l, parse::BinaryOp::Comma, r) => Box::new(l.unconcat().chain(r.unconcat())),
219            _ => Box::new(core::iter::once(self)),
220        }
221    }
222}
223
224#[cfg(feature = "std")]
225fn expand_prefix(path: &Path, pre: &str, f: impl FnOnce() -> Option<PathBuf>) -> Option<PathBuf> {
226    let rest = path.strip_prefix(pre).ok()?;
227    let mut replace = f()?;
228    replace.push(rest);
229    Some(replace)
230}
231
232#[cfg(feature = "std")]
233impl<'a> Import<'a, &'a str, PathBuf> {
234    fn meta_paths(&self) -> impl Iterator<Item = PathBuf> + '_ {
235        let paths = self.meta.as_ref().and_then(|meta| {
236            let v = meta.obj_key("search")?;
237            let iter = if let Term::Arr(Some(a)) = v {
238                Box::new(a.unconcat().filter_map(|v| v.as_str()))
239            } else if let Some(s) = v.as_str() {
240                Box::new(core::iter::once(s))
241            } else {
242                Box::new(core::iter::empty()) as Box<dyn Iterator<Item = _>>
243            };
244            Some(iter.map(|s| Path::new(*s).to_path_buf()))
245        });
246        paths.into_iter().flatten()
247    }
248
249    /// Try to find a file with given extension in the given search paths.
250    pub fn find(self, paths: &[PathBuf], ext: &str) -> Result<PathBuf, String> {
251        let parent = Path::new(self.parent).parent().unwrap_or(Path::new("."));
252
253        let mut rel = Path::new(self.path).to_path_buf();
254        if !rel.is_relative() {
255            Err("non-relative path")?
256        }
257        rel.set_extension(ext);
258
259        #[cfg(target_os = "windows")]
260        let home = "USERPROFILE";
261        #[cfg(not(target_os = "windows"))]
262        let home = "HOME";
263
264        use std::env;
265        let home = || env::var_os(home).map(PathBuf::from);
266        let origin = || env::current_exe().ok()?.parent().map(PathBuf::from);
267        let expand = |path: &PathBuf| {
268            let home = expand_prefix(path, "~", home);
269            let orig = expand_prefix(path, "$ORIGIN", origin);
270            home.or(orig).unwrap_or_else(|| path.clone())
271        };
272
273        // search paths given in the metadata are relative to the parent file, whereas
274        // search paths given on the command-line (`paths`, via `-L`) are not
275        let meta = self.meta_paths().map(|p| parent.join(expand(&p)));
276        meta.chain(paths.iter().map(expand))
277            .map(|path| path.join(&rel))
278            .filter_map(|path| path.canonicalize().ok())
279            .find(|path| path.is_file())
280            .ok_or_else(|| "file not found".into())
281    }
282
283    fn read(self, paths: &[PathBuf], ext: &str) -> ReadResult<PathBuf> {
284        use alloc::string::ToString;
285        let path = self.find(paths, ext)?;
286        let code = std::fs::read_to_string(&path).map_err(|e| e.to_string())?;
287        Ok(File { code, path })
288    }
289}
290
291/// Apply function to path of every imported data file, accumulating errors.
292pub fn import<S: Copy, P: Clone>(
293    mods: &Modules<S, P>,
294    mut f: impl FnMut(Import<S, P>) -> Result<(), String>,
295) -> Result<(), Errors<S, P>> {
296    let mut errs = Vec::new();
297    let mut vals = Vec::new();
298    for (mod_file, vars) in mods.file_vars() {
299        let mut mod_errs = Vec::new();
300        for (path, _name, meta) in vars {
301            let parent = &mod_file.path;
302            match f(Import { parent, path, meta }) {
303                Ok(v) => vals.push(v),
304                Err(e) => mod_errs.push((*path, e)),
305            }
306        }
307        if !mod_errs.is_empty() {
308            errs.push((mod_file.clone(), Error::Io(mod_errs)));
309        }
310    }
311    errs.is_empty().then_some(()).ok_or(errs)
312}
313
314impl<S, P, R> Loader<S, P, R> {
315    /// Provide a function to return the contents of included/imported module files.
316    ///
317    /// For every included/imported module, the loader will call this function to
318    /// obtain the contents of the module.
319    /// For example, if we have `include "foo"`, the loader calls `read("foo")`.
320    pub fn with_read<R2>(self, read: R2) -> Loader<S, P, R2> {
321        let Self { mods, open, .. } = self;
322        Loader { mods, read, open }
323    }
324}
325
326#[cfg(feature = "std")]
327impl<S, R> Loader<S, PathBuf, R> {
328    /// Read the contents of included/imported module files by performing file I/O.
329    pub fn with_std_read(
330        self,
331        paths: &[PathBuf],
332    ) -> Loader<S, PathBuf, impl FnMut(Import<&str, PathBuf>) -> ReadResult<PathBuf> + '_> {
333        self.with_read(|import: Import<&str, PathBuf>| import.read(paths, "jq"))
334    }
335}
336
337impl<'s, P: Clone + Eq, R: FnMut(Import<&'s str, P>) -> ReadResult<P>> Loader<&'s str, P, R> {
338    /// Load a set of modules, starting from a given file.
339    pub fn load(
340        mut self,
341        arena: &'s Arena,
342        file: File<&'s str, P>,
343    ) -> Result<Modules<&'s str, P>, Errors<&'s str, P>> {
344        let result = parse_main(file.code).and_then(|m| {
345            m.map(|path, meta| {
346                let (parent, meta) = (&file.path, &meta);
347                self.find(arena, Import { parent, path, meta })
348            })
349        });
350
351        let mut main = None;
352        let mut deps = Vec::new();
353        let mut errs = Vec::new();
354
355        match result {
356            Ok(m) => main = Some((file, m)),
357            Err(e) => errs.push((file, e)),
358        };
359        for (file, result) in self.mods {
360            match result {
361                Ok(m) => deps.push((file, m)),
362                Err(e) => errs.push((file, e)),
363            }
364        }
365
366        match main {
367            Some(main) if errs.is_empty() => Ok(Modules { main, deps }),
368            _ => Err(errs),
369        }
370    }
371
372    fn find(&mut self, arena: &'s Arena, import: Import<&'s str, P>) -> Result<usize, String> {
373        let file = (self.read)(import)?;
374
375        let mut mods = self.mods.iter();
376        if let Some(id) = mods.position(|(file_, _)| file.path == file_.path) {
377            return Ok(id);
378        };
379        if self.open.contains(&file.path) {
380            return Err("circular include/import".into());
381        }
382
383        let code = &**arena.alloc(file.code);
384        self.open.push(file.path.clone());
385        let defs = parse_defs(code).and_then(|m| {
386            m.map(|path, meta| {
387                let (parent, meta) = (&file.path, &meta);
388                self.find(arena, Import { parent, path, meta })
389            })
390        });
391        assert!(self.open.pop().as_ref() == Some(&file.path));
392
393        let id = self.mods.len();
394        let path = file.path;
395        self.mods.push((File { path, code }, defs));
396        Ok(id)
397    }
398}
399
400fn parse_main(code: &str) -> Result<parse::Module<&str, Term<&str>>, Error<&str>> {
401    let tokens = lex::Lexer::new(code).lex().map_err(Error::Lex)?;
402    let conv_err = |(expected, found)| (expected, Token::opt_as_str(found, code));
403    parse::Parser::new(&tokens)
404        .parse(|p| p.module(|p| p.term()))
405        .map_err(|e| Error::Parse(e.into_iter().map(conv_err).collect()))
406}
407
408fn parse_defs(code: &str) -> Result<parse::Module<&str, Vec<Def<&str>>>, Error<&str>> {
409    let tokens = lex::Lexer::new(code).lex().map_err(Error::Lex)?;
410    let conv_err = |(expected, found)| (expected, Token::opt_as_str(found, code));
411    parse::Parser::new(&tokens)
412        .parse(|p| p.module(|p| p.defs()))
413        .map_err(|e| Error::Parse(e.into_iter().map(conv_err).collect()))
414}
415
416/// Lex a string and parse resulting tokens, returning [`None`] if any error occurred.
417///
418/// Example:
419///
420/// ~~~
421/// # use jaq_core::load::parse;
422/// let t = parse("[] | .[]", |p| p.term());
423/// ~~~
424pub fn parse<'s, T: Default, F>(s: &'s str, f: F) -> Option<T>
425where
426    F: for<'t> FnOnce(&mut Parser<'s, 't>) -> parse::Result<'s, 't, T>,
427{
428    Parser::new(&Lexer::new(s).lex().ok()?).parse(f).ok()
429}
430
431/// Return the span of a string slice `part` relative to a string slice `whole`.
432///
433/// The caller must ensure that `part` is fully contained inside `whole`.
434pub fn span(whole: &str, part: &str) -> core::ops::Range<usize> {
435    let start = part.as_ptr() as usize - whole.as_ptr() as usize;
436    start..start + part.len()
437}