jaq_core/load/
mod.rs

1//! Combined file loading, lexing, and parsing for multiple modules.
2
3#[cfg(feature = "arbitrary")]
4mod arbitrary;
5pub mod lex;
6pub mod parse;
7mod prec_climb;
8pub mod test;
9
10use crate::{ops, path};
11#[cfg(feature = "std")]
12use alloc::boxed::Box;
13use alloc::{string::String, vec::Vec};
14pub use lex::Lexer;
15use lex::Token;
16pub use parse::Parser;
17use parse::{Def, Term};
18#[cfg(feature = "std")]
19use std::path::{Path, PathBuf};
20
21#[cfg(feature = "std")]
22extern crate std;
23
24/// Storage for loaded modules.
25///
26/// Once Rust has [internal references](https://smallcultfollowing.com/babysteps/blog/2024/06/02/the-borrow-checker-within/#step-4-internal-references),
27/// this should become unnecessary.
28/// I can't wait for it to happen!
29#[derive(Default)]
30pub struct Arena(typed_arena::Arena<String>);
31
32/// Combined file loader, lexer, and parser for multiple modules.
33pub struct Loader<S, P, R> {
34    #[allow(clippy::type_complexity)]
35    mods: Vec<(File<S, P>, Result<Module<S>, Error<S>>)>,
36    /// function to read module file contents from a path
37    read: R,
38    /// currently processed modules
39    ///
40    /// This is used to detect circular dependencies between modules.
41    open: Vec<P>,
42}
43
44/// Contents `C` and path `P` of a (module) file.
45///
46/// This is useful for creating precise error messages.
47#[derive(Clone, Debug, Default)]
48pub struct File<C, P> {
49    /// contents of the file
50    pub code: C,
51    /// path of the file
52    pub path: P,
53}
54
55/// Information to resolve module/data imports.
56pub struct Import<'a, S, P> {
57    /// absolute path of the module where the import/include directive appears
58    ///
59    /// This is a path `P`, not a string `S`, because it usually does not appear in the source.
60    pub parent: &'a P,
61    /// relative path of the imported/included module, as given in the source
62    pub path: &'a S,
63    /// metadata attached to the import/include directive
64    pub meta: &'a Option<Term<S>>,
65}
66
67impl<C, P> File<C, P> {
68    /// Apply a function to the contents of a file.
69    ///
70    /// This is useful to go from a reference `&str` to an owned `String`,
71    /// in order to save the `File` without its corresponding [`Arena`].
72    pub fn map_code<C2>(self, f: impl Fn(C) -> C2) -> File<C2, P> {
73        File {
74            code: f(self.code),
75            path: self.path,
76        }
77    }
78}
79
80/// Error occurring during loading of a single module.
81#[derive(Debug)]
82pub enum Error<S> {
83    /// input/output errors, for example when trying to load a module that does not exist
84    Io(Vec<(S, String)>),
85    /// lex   errors, for example when loading a module `($) (`
86    Lex(Vec<lex::Error<S>>),
87    /// parse errors, for example when loading a module `(+) *`
88    Parse(Vec<parse::Error<S>>),
89}
90
91/// Module containing strings `S` and a body `B`.
92#[derive(Default)]
93pub struct Module<S, B = Vec<Def<S>>> {
94    /// metadata (optional)
95    pub(crate) meta: Option<Term<S>>,
96    /// included and imported modules
97    ///
98    /// Suppose that we have [`Modules`] `mods` and the current [`Module`] is `mods[id]`.
99    /// Then for every `(id_, name)` in `mods[id].1.mods`, we have that
100    /// the included/imported module is stored in `mods[id_]` (`id_ < id`), and
101    /// the module is included if `name` is `None` and imported if `name` is `Some(name)`.
102    pub(crate) mods: Vec<(usize, Option<S>)>,
103    /// imported variables, storing path and name (always starts with `$`)
104    pub(crate) vars: Vec<(S, S, Option<Term<S>>)>,
105    /// everything that comes after metadata and includes/imports
106    pub(crate) body: B,
107}
108
109/// Tree of modules containing definitions.
110///
111/// By convention, the last module contains a single definition that is the `main` filter.
112pub type Modules<S, P> = Vec<(File<S, P>, Module<S>)>;
113
114/// Errors occurring during loading of multiple modules.
115///
116/// For example, suppose that we have
117/// a file `l.jq` that yields a lex error,
118/// a file `p.jq` that yields a parse error, and
119/// a file `i.jq` that includes a non-existing module.
120/// If we then include all these files in our main program,
121/// [`Errors`] will contain each file with a different [`Error`].
122pub type Errors<S, P, E = Error<S>> = Vec<(File<S, P>, E)>;
123
124impl<S: core::ops::Deref<Target = str>, B> parse::Module<S, B> {
125    fn map(
126        self,
127        mut f: impl FnMut(&S, Option<Term<S>>) -> Result<usize, String>,
128    ) -> Result<Module<S, B>, Error<S>> {
129        // the prelude module is included implicitly in every module (except itself)
130        let mut mods = Vec::from([(0, None)]);
131        let mut vars = Vec::new();
132        let mut errs = Vec::new();
133        for (path, as_, meta) in self.deps {
134            match as_ {
135                Some(x) if x.starts_with('$') => vars.push((path, x, meta)),
136                as_ => match f(&path, meta) {
137                    Ok(mid) => mods.push((mid, as_)),
138                    Err(e) => errs.push((path, e)),
139                },
140            }
141        }
142        if errs.is_empty() {
143            Ok(Module {
144                meta: self.meta,
145                mods,
146                vars,
147                body: self.body,
148            })
149        } else {
150            Err(Error::Io(errs))
151        }
152    }
153}
154
155impl<S, B> Module<S, B> {
156    fn map_body<B2>(self, f: impl FnOnce(B) -> B2) -> Module<S, B2> {
157        Module {
158            meta: self.meta,
159            mods: self.mods,
160            vars: self.vars,
161            body: f(self.body),
162        }
163    }
164}
165
166type ReadResult<P> = Result<File<String, P>, String>;
167type ReadFn<P> = fn(Import<&str, P>) -> ReadResult<P>;
168
169impl<'s, P: Default> Loader<&'s str, P, ReadFn<P>> {
170    /// Initialise the loader with prelude definitions.
171    ///
172    /// The prelude is a special module that is implicitly included by all other modules
173    /// (including the main module).
174    /// That means that all filters defined in the prelude can be called from any module.
175    ///
176    /// The prelude is normally initialised with filters like `map` or `true`.
177    pub fn new(prelude: impl IntoIterator<Item = Def<&'s str>>) -> Self {
178        let defs = [
179            Def::new("!recurse", Vec::new(), Term::recurse("!recurse")),
180            Def::new("!empty", Vec::new(), Term::empty()),
181        ];
182
183        let prelude = Module {
184            body: defs.into_iter().chain(prelude).collect(),
185            ..Module::default()
186        };
187
188        Self {
189            // the first module is reserved for the prelude
190            mods: Vec::from([(File::default(), Ok(prelude))]),
191            read: |_path| Err("module loading not supported".into()),
192            open: Vec::new(),
193        }
194    }
195}
196
197#[cfg(feature = "std")]
198impl<S: PartialEq> Term<S> {
199    fn obj_key(&self, key: S) -> Option<&Self> {
200        if let Term::Obj(kvs) = self {
201            kvs.iter().find_map(|(k, v)| {
202                if *k.as_str()? == key {
203                    v.as_ref()
204                } else {
205                    None
206                }
207            })
208        } else {
209            None
210        }
211    }
212
213    fn unconcat(&self) -> Box<dyn Iterator<Item = &Self> + '_> {
214        match self {
215            Self::BinOp(l, parse::BinaryOp::Comma, r) => Box::new(l.unconcat().chain(r.unconcat())),
216            _ => Box::new(core::iter::once(self)),
217        }
218    }
219}
220
221#[cfg(feature = "std")]
222fn expand_prefix(path: &Path, pre: &str, f: impl FnOnce() -> Option<PathBuf>) -> Option<PathBuf> {
223    let rest = path.strip_prefix(pre).ok()?;
224    let mut replace = f()?;
225    replace.push(rest);
226    Some(replace)
227}
228
229#[cfg(feature = "std")]
230impl<'a> Import<'a, &'a str, PathBuf> {
231    fn meta_paths(&self) -> impl Iterator<Item = PathBuf> + '_ {
232        let paths = self.meta.as_ref().and_then(|meta| {
233            let v = meta.obj_key("search")?;
234            let iter = if let Term::Arr(Some(a)) = v {
235                Box::new(a.unconcat().filter_map(|v| v.as_str()))
236            } else if let Some(s) = v.as_str() {
237                Box::new(core::iter::once(s))
238            } else {
239                Box::new(core::iter::empty()) as Box<dyn Iterator<Item = _>>
240            };
241            Some(iter.map(|s| Path::new(*s).to_path_buf()))
242        });
243        paths.into_iter().flatten()
244    }
245
246    /// Try to find a file with given extension in the given search paths.
247    pub fn find(self, paths: &[PathBuf], ext: &str) -> Result<PathBuf, String> {
248        let parent = Path::new(self.parent).parent().unwrap_or(Path::new("."));
249
250        let mut rel = Path::new(self.path).to_path_buf();
251        if !rel.is_relative() {
252            Err("non-relative path")?
253        }
254        rel.set_extension(ext);
255
256        #[cfg(target_os = "windows")]
257        let home = "USERPROFILE";
258        #[cfg(not(target_os = "windows"))]
259        let home = "HOME";
260
261        use std::env;
262        let home = || env::var_os(home).map(PathBuf::from);
263        let origin = || env::current_exe().ok()?.parent().map(PathBuf::from);
264        let expand = |path: &PathBuf| {
265            let home = expand_prefix(path, "~", home);
266            let orig = expand_prefix(path, "$ORIGIN", origin);
267            home.or(orig).unwrap_or_else(|| path.clone())
268        };
269
270        // search paths given in the metadata are relative to the parent file, whereas
271        // search paths given on the command-line (`paths`, via `-L`) are not
272        let meta = self.meta_paths().map(|p| parent.join(expand(&p)));
273        meta.chain(paths.iter().map(expand))
274            .map(|path| path.join(&rel))
275            .filter_map(|path| path.canonicalize().ok())
276            .find(|path| path.is_file())
277            .ok_or_else(|| "file not found".into())
278    }
279
280    fn read(self, paths: &[PathBuf], ext: &str) -> ReadResult<PathBuf> {
281        use alloc::string::ToString;
282        let path = self.find(paths, ext)?;
283        let code = std::fs::read_to_string(&path).map_err(|e| e.to_string())?;
284        Ok(File { code, path })
285    }
286}
287
288/// Apply function to path of every imported data file, accumulating errors.
289pub fn import<S: Copy, P: Clone>(
290    mods: &Modules<S, P>,
291    mut f: impl FnMut(Import<S, P>) -> Result<(), String>,
292) -> Result<(), Errors<S, P>> {
293    let mut errs = Vec::new();
294    let mut vals = Vec::new();
295    for (mod_file, module) in mods {
296        let mut mod_errs = Vec::new();
297        for (path, _name, meta) in &module.vars {
298            let parent = &mod_file.path;
299            match f(Import { parent, path, meta }) {
300                Ok(v) => vals.push(v),
301                Err(e) => mod_errs.push((*path, e)),
302            }
303        }
304        if !mod_errs.is_empty() {
305            errs.push((mod_file.clone(), Error::Io(mod_errs)));
306        }
307    }
308    if errs.is_empty() {
309        Ok(())
310    } else {
311        Err(errs)
312    }
313}
314
315impl<S, P, R> Loader<S, P, R> {
316    /// Provide a function to return the contents of included/imported module files.
317    ///
318    /// For every included/imported module, the loader will call this function to
319    /// obtain the contents of the module.
320    /// For example, if we have `include "foo"`, the loader calls `read("foo")`.
321    pub fn with_read<R2>(self, read: R2) -> Loader<S, P, R2> {
322        let Self { mods, open, .. } = self;
323        Loader { mods, read, open }
324    }
325}
326
327#[cfg(feature = "std")]
328impl<S, R> Loader<S, PathBuf, R> {
329    /// Read the contents of included/imported module files by performing file I/O.
330    pub fn with_std_read(
331        self,
332        paths: &[PathBuf],
333    ) -> Loader<S, PathBuf, impl FnMut(Import<&str, PathBuf>) -> ReadResult<PathBuf> + '_> {
334        self.with_read(|import: Import<&str, PathBuf>| import.read(paths, "jq"))
335    }
336}
337
338impl<'s, P: Clone + Eq, R: FnMut(Import<&'s str, P>) -> ReadResult<P>> Loader<&'s str, P, R> {
339    /// Load a set of modules, starting from a given file.
340    pub fn load(
341        mut self,
342        arena: &'s Arena,
343        file: File<&'s str, P>,
344    ) -> Result<Modules<&'s str, P>, Errors<&'s str, P>> {
345        let result = parse_main(file.code)
346            .and_then(|m| {
347                m.map(|path, meta| {
348                    let (parent, meta) = (&file.path, &meta);
349                    self.find(arena, Import { parent, path, meta })
350                })
351            })
352            .map(|m| m.map_body(|body| Vec::from([Def::new("main", Vec::new(), body)])));
353        self.mods.push((file, result));
354
355        let mut mods = Vec::new();
356        let mut errs = Vec::new();
357        for (file, result) in self.mods {
358            match result {
359                Ok(m) => mods.push((file, m)),
360                Err(e) => errs.push((file, e)),
361            }
362        }
363        if errs.is_empty() {
364            Ok(mods)
365        } else {
366            Err(errs)
367        }
368    }
369
370    fn find(&mut self, arena: &'s Arena, import: Import<&'s str, P>) -> Result<usize, String> {
371        let file = (self.read)(import)?;
372
373        let mut mods = self.mods.iter();
374        if let Some(id) = mods.position(|(file_, _)| file.path == file_.path) {
375            return Ok(id);
376        };
377        if self.open.contains(&file.path) {
378            return Err("circular include/import".into());
379        }
380
381        let code = &**arena.0.alloc(file.code);
382        self.open.push(file.path.clone());
383        let defs = parse_defs(code).and_then(|m| {
384            m.map(|path, meta| {
385                let (parent, meta) = (&file.path, &meta);
386                self.find(arena, Import { parent, path, meta })
387            })
388        });
389        assert!(self.open.pop().as_ref() == Some(&file.path));
390
391        let id = self.mods.len();
392        let path = file.path;
393        self.mods.push((File { path, code }, defs));
394        Ok(id)
395    }
396}
397
398fn parse_main(code: &str) -> Result<parse::Module<&str, Term<&str>>, Error<&str>> {
399    let tokens = lex::Lexer::new(code).lex().map_err(Error::Lex)?;
400    let conv_err = |(expected, found)| (expected, Token::opt_as_str(found, code));
401    parse::Parser::new(&tokens)
402        .parse(|p| p.module(|p| p.term()))
403        .map_err(|e| Error::Parse(e.into_iter().map(conv_err).collect()))
404}
405
406fn parse_defs(code: &str) -> Result<parse::Module<&str, Vec<Def<&str>>>, Error<&str>> {
407    let tokens = lex::Lexer::new(code).lex().map_err(Error::Lex)?;
408    let conv_err = |(expected, found)| (expected, Token::opt_as_str(found, code));
409    parse::Parser::new(&tokens)
410        .parse(|p| p.module(|p| p.defs()))
411        .map_err(|e| Error::Parse(e.into_iter().map(conv_err).collect()))
412}
413
414/// Lex a string and parse resulting tokens, returning [`None`] if any error occurred.
415///
416/// Example:
417///
418/// ~~~
419/// # use jaq_core::load::parse;
420/// let t = parse("[] | .[]", |p| p.term());
421/// ~~~
422pub fn parse<'s, T: Default, F>(s: &'s str, f: F) -> Option<T>
423where
424    F: for<'t> FnOnce(&mut Parser<'s, 't>) -> parse::Result<'s, 't, T>,
425{
426    Parser::new(&Lexer::new(s).lex().ok()?).parse(f).ok()
427}
428
429/// Return the span of a string slice `part` relative to a string slice `whole`.
430///
431/// The caller must ensure that `part` is fully contained inside `whole`.
432pub fn span(whole: &str, part: &str) -> core::ops::Range<usize> {
433    let start = part.as_ptr() as usize - whole.as_ptr() as usize;
434    start..start + part.len()
435}