jaq_core/load/
mod.rs

1//! Combined file loading, lexing, and parsing for multiple modules.
2
3#[cfg(feature = "arbitrary")]
4mod arbitrary;
5pub mod lex;
6pub mod parse;
7mod prec_climb;
8pub mod test;
9
10use crate::{ops, path};
11#[cfg(feature = "std")]
12use alloc::boxed::Box;
13use alloc::{string::String, vec::Vec};
14pub use lex::Lexer;
15use lex::Token;
16pub use parse::Parser;
17use parse::{Def, Term};
18#[cfg(feature = "std")]
19use std::path::{Path, PathBuf};
20
21#[cfg(feature = "std")]
22extern crate std;
23
24/// Storage for loaded modules.
25///
26/// Once Rust has [internal references](https://smallcultfollowing.com/babysteps/blog/2024/06/02/the-borrow-checker-within/#step-4-internal-references),
27/// this should become unnecessary.
28/// I can't wait for it to happen!
29#[derive(Default)]
30pub struct Arena(typed_arena::Arena<String>);
31
32/// Combined file loader, lexer, and parser for multiple modules.
33pub struct Loader<S, P, R> {
34    #[allow(clippy::type_complexity)]
35    mods: Vec<(File<S, P>, Result<Module<S>, Error<S>>)>,
36    /// function to read module file contents from a path
37    read: R,
38    /// currently processed modules
39    ///
40    /// This is used to detect circular dependencies between modules.
41    open: Vec<P>,
42}
43
44/// Contents `C` and path `P` of a (module) file.
45///
46/// This is useful for creating precise error messages.
47#[derive(Clone, Debug, Default)]
48pub struct File<C, P> {
49    /// contents of the file
50    pub code: C,
51    /// path of the file
52    pub path: P,
53}
54
55/// Information to resolve module/data imports.
56pub struct Import<'a, S, P> {
57    /// absolute path of the module where the import/include directive appears
58    ///
59    /// This is a path `P`, not a string `S`, because it usually does not appear in the source.
60    pub parent: &'a P,
61    /// relative path of the imported/included module, as given in the source
62    pub path: &'a S,
63    /// metadata attached to the import/include directive
64    pub meta: &'a Option<Term<S>>,
65}
66
67impl<C, P> File<C, P> {
68    /// Apply a function to the contents of a file.
69    ///
70    /// This is useful to go from a reference `&str` to an owned `String`,
71    /// in order to save the `File` without its corresponding [`Arena`].
72    pub fn map_code<C2>(self, f: impl Fn(C) -> C2) -> File<C2, P> {
73        File {
74            code: f(self.code),
75            path: self.path,
76        }
77    }
78}
79
80/// Error occurring during loading of a single module.
81#[derive(Debug)]
82pub enum Error<S> {
83    /// input/output errors, for example when trying to load a module that does not exist
84    Io(Vec<(S, String)>),
85    /// lex   errors, for example when loading a module `($) (`
86    Lex(Vec<lex::Error<S>>),
87    /// parse errors, for example when loading a module `(+) *`
88    Parse(Vec<parse::Error<S>>),
89}
90
91/// Module containing strings `S` and a body `B`.
92#[derive(Default)]
93pub struct Module<S, B = Vec<Def<S>>> {
94    /// metadata (optional)
95    pub(crate) meta: Option<Term<S>>,
96    /// included and imported modules
97    ///
98    /// Suppose that we have [`Modules`] `mods` and the current [`Module`] is `mods[id]`.
99    /// Then for every `(id_, name)` in `mods[id].1.mods`, we have that
100    /// the included/imported module is stored in `mods[id_]` (`id_ < id`), and
101    /// the module is included if `name` is `None` and imported if `name` is `Some(name)`.
102    pub(crate) mods: Vec<(usize, Option<S>)>,
103    /// imported variables, storing path and name (always starts with `$`)
104    pub(crate) vars: Vec<(S, S, Option<Term<S>>)>,
105    /// everything that comes after metadata and includes/imports
106    pub(crate) body: B,
107}
108
109/// Tree of modules containing definitions.
110///
111/// By convention, the last module contains a single definition that is the `main` filter.
112pub type Modules<S, P> = Vec<(File<S, P>, Module<S>)>;
113
114/// Errors occurring during loading of multiple modules.
115///
116/// For example, suppose that we have
117/// a file `l.jq` that yields a lex error,
118/// a file `p.jq` that yields a parse error, and
119/// a file `i.jq` that includes a non-existing module.
120/// If we then include all these files in our main program,
121/// [`Errors`] will contain each file with a different [`Error`].
122pub type Errors<S, P, E = Error<S>> = Vec<(File<S, P>, E)>;
123
124impl<S: core::ops::Deref<Target = str>, B> parse::Module<S, B> {
125    fn map(
126        self,
127        mut f: impl FnMut(&S, Option<Term<S>>) -> Result<usize, String>,
128    ) -> Result<Module<S, B>, Error<S>> {
129        // the prelude module is included implicitly in every module (except itself)
130        let mut mods = Vec::from([(0, None)]);
131        let mut vars = Vec::new();
132        let mut errs = Vec::new();
133        for (path, as_, meta) in self.deps {
134            match as_ {
135                Some(x) if x.starts_with('$') => vars.push((path, x, meta)),
136                as_ => match f(&path, meta) {
137                    Ok(mid) => mods.push((mid, as_)),
138                    Err(e) => errs.push((path, e)),
139                },
140            }
141        }
142        if errs.is_empty() {
143            Ok(Module {
144                meta: self.meta,
145                mods,
146                vars,
147                body: self.body,
148            })
149        } else {
150            Err(Error::Io(errs))
151        }
152    }
153}
154
155impl<S, B> Module<S, B> {
156    fn map_body<B2>(self, f: impl FnOnce(B) -> B2) -> Module<S, B2> {
157        Module {
158            meta: self.meta,
159            mods: self.mods,
160            vars: self.vars,
161            body: f(self.body),
162        }
163    }
164}
165
166type ReadResult<P> = Result<File<String, P>, String>;
167type ReadFn<P> = fn(Import<&str, P>) -> ReadResult<P>;
168
169impl<'s, P: Default> Loader<&'s str, P, ReadFn<P>> {
170    /// Initialise the loader with prelude definitions.
171    ///
172    /// The prelude is a special module that is implicitly included by all other modules
173    /// (including the main module).
174    /// That means that all filters defined in the prelude can be called from any module.
175    ///
176    /// The prelude is normally initialised with filters like `map` or `true`.
177    pub fn new(prelude: impl IntoIterator<Item = Def<&'s str>>) -> Self {
178        let defs = [Def::new("!empty", Vec::new(), Term::empty())];
179
180        let prelude = Module {
181            body: defs.into_iter().chain(prelude).collect(),
182            ..Module::default()
183        };
184
185        Self {
186            // the first module is reserved for the prelude
187            mods: Vec::from([(File::default(), Ok(prelude))]),
188            read: |_path| Err("module loading not supported".into()),
189            open: Vec::new(),
190        }
191    }
192}
193
194#[cfg(feature = "std")]
195impl<S: PartialEq> Term<S> {
196    fn obj_key(&self, key: S) -> Option<&Self> {
197        if let Term::Obj(kvs) = self {
198            kvs.iter().find_map(|(k, v)| {
199                if *k.as_str()? == key {
200                    v.as_ref()
201                } else {
202                    None
203                }
204            })
205        } else {
206            None
207        }
208    }
209
210    fn unconcat(&self) -> Box<dyn Iterator<Item = &Self> + '_> {
211        match self {
212            Self::BinOp(l, parse::BinaryOp::Comma, r) => Box::new(l.unconcat().chain(r.unconcat())),
213            _ => Box::new(core::iter::once(self)),
214        }
215    }
216}
217
218#[cfg(feature = "std")]
219fn expand_prefix(path: &Path, pre: &str, f: impl FnOnce() -> Option<PathBuf>) -> Option<PathBuf> {
220    let rest = path.strip_prefix(pre).ok()?;
221    let mut replace = f()?;
222    replace.push(rest);
223    Some(replace)
224}
225
226#[cfg(feature = "std")]
227impl<'a> Import<'a, &'a str, PathBuf> {
228    fn meta_paths(&self) -> impl Iterator<Item = PathBuf> + '_ {
229        let paths = self.meta.as_ref().and_then(|meta| {
230            let v = meta.obj_key("search")?;
231            let iter = if let Term::Arr(Some(a)) = v {
232                Box::new(a.unconcat().filter_map(|v| v.as_str()))
233            } else if let Some(s) = v.as_str() {
234                Box::new(core::iter::once(s))
235            } else {
236                Box::new(core::iter::empty()) as Box<dyn Iterator<Item = _>>
237            };
238            Some(iter.map(|s| Path::new(*s).to_path_buf()))
239        });
240        paths.into_iter().flatten()
241    }
242
243    /// Try to find a file with given extension in the given search paths.
244    pub fn find(self, paths: &[PathBuf], ext: &str) -> Result<PathBuf, String> {
245        let parent = Path::new(self.parent).parent().unwrap_or(Path::new("."));
246
247        let mut rel = Path::new(self.path).to_path_buf();
248        if !rel.is_relative() {
249            Err("non-relative path")?
250        }
251        rel.set_extension(ext);
252
253        #[cfg(target_os = "windows")]
254        let home = "USERPROFILE";
255        #[cfg(not(target_os = "windows"))]
256        let home = "HOME";
257
258        use std::env;
259        let home = || env::var_os(home).map(PathBuf::from);
260        let origin = || env::current_exe().ok()?.parent().map(PathBuf::from);
261        let expand = |path: &PathBuf| {
262            let home = expand_prefix(path, "~", home);
263            let orig = expand_prefix(path, "$ORIGIN", origin);
264            home.or(orig).unwrap_or_else(|| path.clone())
265        };
266
267        // search paths given in the metadata are relative to the parent file, whereas
268        // search paths given on the command-line (`paths`, via `-L`) are not
269        let meta = self.meta_paths().map(|p| parent.join(expand(&p)));
270        meta.chain(paths.iter().map(expand))
271            .map(|path| path.join(&rel))
272            .filter_map(|path| path.canonicalize().ok())
273            .find(|path| path.is_file())
274            .ok_or_else(|| "file not found".into())
275    }
276
277    fn read(self, paths: &[PathBuf], ext: &str) -> ReadResult<PathBuf> {
278        use alloc::string::ToString;
279        let path = self.find(paths, ext)?;
280        let code = std::fs::read_to_string(&path).map_err(|e| e.to_string())?;
281        Ok(File { code, path })
282    }
283}
284
285/// Apply function to path of every imported data file, accumulating errors.
286pub fn import<S: Copy, P: Clone>(
287    mods: &Modules<S, P>,
288    mut f: impl FnMut(Import<S, P>) -> Result<(), String>,
289) -> Result<(), Errors<S, P>> {
290    let mut errs = Vec::new();
291    let mut vals = Vec::new();
292    for (mod_file, module) in mods {
293        let mut mod_errs = Vec::new();
294        for (path, _name, meta) in &module.vars {
295            let parent = &mod_file.path;
296            match f(Import { parent, path, meta }) {
297                Ok(v) => vals.push(v),
298                Err(e) => mod_errs.push((*path, e)),
299            }
300        }
301        if !mod_errs.is_empty() {
302            errs.push((mod_file.clone(), Error::Io(mod_errs)));
303        }
304    }
305    if errs.is_empty() {
306        Ok(())
307    } else {
308        Err(errs)
309    }
310}
311
312impl<S, P, R> Loader<S, P, R> {
313    /// Provide a function to return the contents of included/imported module files.
314    ///
315    /// For every included/imported module, the loader will call this function to
316    /// obtain the contents of the module.
317    /// For example, if we have `include "foo"`, the loader calls `read("foo")`.
318    pub fn with_read<R2>(self, read: R2) -> Loader<S, P, R2> {
319        let Self { mods, open, .. } = self;
320        Loader { mods, read, open }
321    }
322}
323
324#[cfg(feature = "std")]
325impl<S, R> Loader<S, PathBuf, R> {
326    /// Read the contents of included/imported module files by performing file I/O.
327    pub fn with_std_read(
328        self,
329        paths: &[PathBuf],
330    ) -> Loader<S, PathBuf, impl FnMut(Import<&str, PathBuf>) -> ReadResult<PathBuf> + '_> {
331        self.with_read(|import: Import<&str, PathBuf>| import.read(paths, "jq"))
332    }
333}
334
335impl<'s, P: Clone + Eq, R: FnMut(Import<&'s str, P>) -> ReadResult<P>> Loader<&'s str, P, R> {
336    /// Load a set of modules, starting from a given file.
337    pub fn load(
338        mut self,
339        arena: &'s Arena,
340        file: File<&'s str, P>,
341    ) -> Result<Modules<&'s str, P>, Errors<&'s str, P>> {
342        let result = parse_main(file.code)
343            .and_then(|m| {
344                m.map(|path, meta| {
345                    let (parent, meta) = (&file.path, &meta);
346                    self.find(arena, Import { parent, path, meta })
347                })
348            })
349            .map(|m| m.map_body(|body| Vec::from([Def::new("main", Vec::new(), body)])));
350        self.mods.push((file, result));
351
352        let mut mods = Vec::new();
353        let mut errs = Vec::new();
354        for (file, result) in self.mods {
355            match result {
356                Ok(m) => mods.push((file, m)),
357                Err(e) => errs.push((file, e)),
358            }
359        }
360        if errs.is_empty() {
361            Ok(mods)
362        } else {
363            Err(errs)
364        }
365    }
366
367    fn find(&mut self, arena: &'s Arena, import: Import<&'s str, P>) -> Result<usize, String> {
368        let file = (self.read)(import)?;
369
370        let mut mods = self.mods.iter();
371        if let Some(id) = mods.position(|(file_, _)| file.path == file_.path) {
372            return Ok(id);
373        };
374        if self.open.contains(&file.path) {
375            return Err("circular include/import".into());
376        }
377
378        let code = &**arena.0.alloc(file.code);
379        self.open.push(file.path.clone());
380        let defs = parse_defs(code).and_then(|m| {
381            m.map(|path, meta| {
382                let (parent, meta) = (&file.path, &meta);
383                self.find(arena, Import { parent, path, meta })
384            })
385        });
386        assert!(self.open.pop().as_ref() == Some(&file.path));
387
388        let id = self.mods.len();
389        let path = file.path;
390        self.mods.push((File { path, code }, defs));
391        Ok(id)
392    }
393}
394
395fn parse_main(code: &str) -> Result<parse::Module<&str, Term<&str>>, Error<&str>> {
396    let tokens = lex::Lexer::new(code).lex().map_err(Error::Lex)?;
397    let conv_err = |(expected, found)| (expected, Token::opt_as_str(found, code));
398    parse::Parser::new(&tokens)
399        .parse(|p| p.module(|p| p.term()))
400        .map_err(|e| Error::Parse(e.into_iter().map(conv_err).collect()))
401}
402
403fn parse_defs(code: &str) -> Result<parse::Module<&str, Vec<Def<&str>>>, Error<&str>> {
404    let tokens = lex::Lexer::new(code).lex().map_err(Error::Lex)?;
405    let conv_err = |(expected, found)| (expected, Token::opt_as_str(found, code));
406    parse::Parser::new(&tokens)
407        .parse(|p| p.module(|p| p.defs()))
408        .map_err(|e| Error::Parse(e.into_iter().map(conv_err).collect()))
409}
410
411/// Lex a string and parse resulting tokens, returning [`None`] if any error occurred.
412///
413/// Example:
414///
415/// ~~~
416/// # use jaq_core::load::parse;
417/// let t = parse("[] | .[]", |p| p.term());
418/// ~~~
419pub fn parse<'s, T: Default, F>(s: &'s str, f: F) -> Option<T>
420where
421    F: for<'t> FnOnce(&mut Parser<'s, 't>) -> parse::Result<'s, 't, T>,
422{
423    Parser::new(&Lexer::new(s).lex().ok()?).parse(f).ok()
424}
425
426/// Return the span of a string slice `part` relative to a string slice `whole`.
427///
428/// The caller must ensure that `part` is fully contained inside `whole`.
429pub fn span(whole: &str, part: &str) -> core::ops::Range<usize> {
430    let start = part.as_ptr() as usize - whole.as_ptr() as usize;
431    start..start + part.len()
432}