preserves_schema/compiler/
mod.rs

1//! Implementation of the Schema-to-Rust compiler; this is the core of the
2//! [preserves-schema-rs][] program.
3//!
4//! See the [documentation for preserves-schema-rs][preserves-schema-rs] for examples of how to
5//! use the compiler programmatically from a `build.rs` script, but very briefly, use
6//! [preserves-schemac](https://preserves.dev/doc/preserves-schemac.html) to generate a
7//! metaschema instance `*.prb` file, and then put something like this in `build.rs`:
8//!
9//! ```rust,ignore
10//! use preserves_schema::compiler::*;
11//!
12//! const PATH_TO_PRB_FILE: &'static str = "your-metaschema-instance-file.prb";
13//!
14//! fn main() -> Result<(), std::io::Error> {
15//!     let buildroot = std::path::PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
16//!
17//!     let mut gen_dir = buildroot.clone();
18//!     gen_dir.push("src/schemas");
19//!     let mut c = CompilerConfig::new("crate::schemas".to_owned());
20//!
21//!     let inputs = expand_inputs(&vec![PATH_TO_PRB_FILE.to_owned()])?;
22//!     c.load_schemas_and_bundles(&inputs, &vec![])?;
23//!     compile(&c, &mut CodeCollector::files(gen_dir))
24//! }
25//! ```
26//!
27//! plus something like this in your `lib.rs` or main program:
28//!
29//! ```rust,ignore
30//! pub mod schemas {
31//!     include!(concat!(env!("OUT_DIR"), "/src/schemas/mod.rs"));
32//! }
33//! ```
34//!
35//! [preserves-schema-rs]: https://preserves.dev/doc/preserves-schema-rs.html
36
37pub mod context;
38pub mod cycles;
39pub mod names;
40pub mod parsers;
41pub mod readers;
42pub mod rskw;
43pub mod types;
44pub mod unparsers;
45
46use crate::compiler::context::*;
47use crate::compiler::types::Purpose;
48use crate::gen::schema;
49use crate::gen::schema::*;
50use crate::gen::Language;
51use crate::syntax::block::constructors::*;
52use crate::syntax::block::{Formatter, Item};
53use crate::*;
54
55use glob::glob;
56use preserves::value::BinarySource;
57use preserves::value::BytesBinarySource;
58use preserves::value::Map;
59use preserves::value::Reader;
60use preserves::value::Set;
61
62use std::fs::DirBuilder;
63use std::fs::File;
64use std::io;
65use std::io::Read;
66use std::io::Write;
67use std::path::PathBuf;
68
69/// Names a Schema module within a (collection of) Schema bundle(s).
70pub type ModulePath = Vec<String>;
71
72/// Implement this trait to extend the compiler with custom code generation support. The main
73/// code generators are also implemented as plugins.
74///
75/// For an example of its use outside the core compiler, see [`build.rs` for the `syndicate-rs` project](https://git.syndicate-lang.org/syndicate-lang/syndicate-rs/src/commit/60e6c6badfcbcbccc902994f4f32db6048f60d1f/syndicate/build.rs).
76pub trait Plugin: std::fmt::Debug {
77    /// Use `_module_ctxt` to emit code at a per-module level.
78    fn generate_module(&self, _module_ctxt: &mut ModuleContext) {}
79
80    /// Use `module_ctxt` to emit code at a per-Schema-[Definition] level.
81    fn generate_definition(
82        &self,
83        module_ctxt: &mut ModuleContext,
84        definition_name: &str,
85        definition: &Definition,
86    );
87}
88
89pub struct LanguageTypes {
90    pub fallback: Option<Box<dyn Fn(&str) -> Set<String>>>,
91    pub definitions: Map<String, Box<dyn Fn(&str) -> Set<String>>>,
92}
93
94impl std::fmt::Debug for LanguageTypes {
95    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
96        f.debug_struct("LanguageTypes")
97            .field("fallback", &self.fallback.as_ref().map(|f| f("_")))
98            .field(
99                "definitions",
100                &self
101                    .definitions
102                    .iter()
103                    .map(|(k, f)| (k.clone(), f("_")))
104                    .collect::<Map<String, Set<String>>>(),
105            )
106            .finish()
107    }
108}
109
110#[derive(Debug)]
111pub struct ExternalModule {
112    pub path: ModulePath,
113    pub rust_namespace: String,
114    pub rust_language_types: LanguageTypes,
115}
116
117impl ExternalModule {
118    pub fn new(path: ModulePath, rust_namespace: &str) -> Self {
119        ExternalModule {
120            path,
121            rust_namespace: rust_namespace.to_owned(),
122            rust_language_types: LanguageTypes {
123                fallback: None,
124                definitions: Map::new(),
125            },
126        }
127    }
128
129    pub fn set_fallback_language_types<F: 'static + Fn(&str) -> Set<String>>(
130        mut self,
131        f: F,
132    ) -> Self {
133        self.rust_language_types.fallback = Some(Box::new(f));
134        self
135    }
136
137    pub fn set_definition_language_types<F: 'static + Fn(&str) -> Set<String>>(
138        mut self,
139        d: &str,
140        f: F,
141    ) -> Self {
142        if self
143            .rust_language_types
144            .definitions
145            .insert(d.to_owned(), Box::new(f))
146            .is_some()
147        {
148            panic!(
149                "Duplicate language types definition installed: {:?} {:?}",
150                &self.path, d
151            );
152        }
153        self
154    }
155}
156
157/// Used to collect output from the compiler.
158pub enum CodeModuleCollector<'a> {
159    /// Default file-based code emitter.
160    Files {
161        /// Where output Rust code files will be placed.
162        output_dir: PathBuf,
163    },
164    Custom {
165        /// Used to collect the various produced source files.
166        /// Useful for when compiling in e.g. proc_macro context.
167        collect_output: &'a mut dyn FnMut(Option<&ModulePath>, &str) -> io::Result<()>,
168    },
169}
170
171/// Used to configure and collect output from the compiler.
172pub struct CodeCollector<'a> {
173    pub emit_mod_declarations: bool,
174    pub collect_module: CodeModuleCollector<'a>,
175}
176
177/// Main entry point to the compiler.
178#[derive(Debug)]
179pub struct CompilerConfig {
180    /// All known Schema modules, indexed by [ModulePath] and annotated with a [Purpose].
181    pub bundle: Map<ModulePath, (Schema, Purpose)>,
182    /// Fully-qualified Rust module prefix to use for each generated module.
183    pub fully_qualified_module_prefix: String,
184    /// Rust module path to the [preserves_schema::support][crate::support] module.
185    pub support_crate: String,
186    /// External modules for cross-referencing.
187    pub external_modules: Map<ModulePath, ExternalModule>,
188    /// Plugins active in this compiler instance.
189    pub plugins: Vec<Box<dyn Plugin>>,
190    /// If true, a directive is emitted in each module instructing
191    /// [rustfmt](https://github.com/rust-lang/rustfmt) to ignore it.
192    pub rustfmt_skip: bool,
193}
194
195/// Loads a [Schema] or [Bundle] from path `i` into `bundle` for the given `purpose`.
196///
197/// If `i` holds a [Schema], then the file stem of `i` is used as the module name when placing
198/// the schema in `bundle`.
199pub fn load_schema_or_bundle_with_purpose(
200    bundle: &mut Map<ModulePath, (Schema, Purpose)>,
201    i: &PathBuf,
202    purpose: Purpose,
203) -> io::Result<()> {
204    let mut inserted = Map::<ModulePath, Schema>::new();
205    load_schema_or_bundle(&mut inserted, i)?;
206    for (k, v) in inserted.into_iter() {
207        bundle.insert(k, (v, purpose));
208    }
209    Ok(())
210}
211
212/// Loads a [Schema] or [Bundle] from raw binary encoded value `input` into `bundle` for the
213/// given `purpose`.
214///
215/// If `input` corresponds to a [Schema], then `prefix` is used as its module name; otherwise,
216/// it's a [Bundle], and `prefix` is ignored.
217pub fn load_schema_or_bundle_bin_with_purpose(
218    bundle: &mut Map<ModulePath, (Schema, Purpose)>,
219    prefix: &str,
220    input: &[u8],
221    purpose: Purpose,
222) -> io::Result<()> {
223    let mut inserted = Map::<ModulePath, Schema>::new();
224    load_schema_or_bundle_bin(&mut inserted, prefix, input)?;
225    for (k, v) in inserted.into_iter() {
226        bundle.insert(k, (v, purpose));
227    }
228    Ok(())
229}
230
231fn bundle_prefix(i: &PathBuf) -> io::Result<&str> {
232    i.file_stem()
233        .ok_or_else(|| {
234            io::Error::new(
235                io::ErrorKind::InvalidData,
236                format!("Bad schema file stem: {:?}", i),
237            )
238        })?
239        .to_str()
240        .ok_or_else(|| {
241            io::Error::new(
242                io::ErrorKind::InvalidData,
243                format!("Invalid UTF-8 in schema file name: {:?}", i),
244            )
245        })
246}
247
248/// Loads a [Schema] or [Bundle] from path `i` into `bundle`.
249///
250/// If `i` holds a [Schema], then the file stem of `i` is used as the module name when placing
251/// the schema in `bundle`.
252///
253/// Returns true if it was a schema, false if it was a bundle.
254pub fn load_schema_or_bundle(bundle: &mut Map<ModulePath, Schema>, i: &PathBuf) -> io::Result<bool> {
255    let mut f = File::open(&i)?;
256    let mut bs = vec![];
257    f.read_to_end(&mut bs)?;
258    load_schema_or_bundle_bin(bundle, bundle_prefix(i)?, &bs[..])
259}
260
261/// Loads a [Schema] or [Bundle] from raw binary encoded value `input` into `bundle`.
262///
263/// If `input` corresponds to a [Schema], then `prefix` is used as its module name; otherwise,
264/// it's a [Bundle], and `prefix` is ignored.
265///
266/// Returns true if it was a schema, false if it was a bundle.
267pub fn load_schema_or_bundle_bin(
268    bundle: &mut Map<ModulePath, Schema>,
269    prefix: &str,
270    input: &[u8],
271) -> io::Result<bool> {
272    let mut src = BytesBinarySource::new(input);
273    let mut reader = src.packed_iovalues();
274    let blob = reader.demand_next(false)?;
275    let language = Language::default();
276
277    if let Ok(s) = language.parse(&blob) {
278        bundle.insert(vec![prefix.to_owned()], s);
279        Ok(true)
280    } else if let Ok(Bundle { modules }) = language.parse(&blob) {
281        for (ModulePath(k), v) in modules.0 {
282            bundle.insert(k, v);
283        }
284        Ok(false)
285    } else {
286        Err(io::Error::new(
287            io::ErrorKind::InvalidData,
288            format!("Invalid schema binary blob {:?}", prefix),
289        ))
290    }
291}
292
293impl CompilerConfig {
294    /// Construct a [CompilerConfig] configured to use `fully_qualified_module_prefix` as the
295    /// Rust module prefix for generated code.
296    pub fn new(fully_qualified_module_prefix: String) -> Self {
297        CompilerConfig {
298            bundle: Map::new(),
299            fully_qualified_module_prefix,
300            support_crate: "preserves_schema".to_owned(),
301            external_modules: Map::new(),
302            plugins: vec![
303                Box::new(types::TypePlugin),
304                Box::new(readers::ReaderPlugin),
305                Box::new(parsers::ParserPlugin),
306                Box::new(unparsers::UnparserPlugin),
307            ],
308            rustfmt_skip: false,
309        }
310    }
311
312    pub fn add_external_module(&mut self, m: ExternalModule) {
313        let path = m.path.clone();
314        if self.external_modules.insert(path.clone(), m).is_some() {
315            panic!("Duplicate external module installed: {:?}", path)
316        }
317    }
318
319    pub fn load_schemas_and_bundles(
320        &mut self,
321        inputs: &Vec<PathBuf>,
322        xrefs: &Vec<PathBuf>,
323    ) -> io::Result<()> {
324        for i in inputs {
325            load_schema_or_bundle_with_purpose(&mut self.bundle, i, Purpose::Codegen)?;
326        }
327        for i in xrefs {
328            load_schema_or_bundle_with_purpose(&mut self.bundle, i, Purpose::Xref)?;
329        }
330        Ok(())
331    }
332
333    pub fn load_xref_bin(&mut self, prefix: &str, bundle_or_schema: &[u8]) -> io::Result<()> {
334        load_schema_or_bundle_bin_with_purpose(
335            &mut self.bundle,
336            prefix,
337            bundle_or_schema,
338            Purpose::Xref,
339        )
340    }
341
342    fn build_type_cache(&self) -> Map<Ref, types::TDefinition> {
343        self.bundle
344            .iter()
345            .flat_map(|(modpath, s)| {
346                let modpath = ModulePath(modpath.clone());
347                s.0.definitions.0.iter().map(move |(name, def)| {
348                    let ty = types::definition_type(&modpath, s.1, name, def);
349                    (ty.self_ref.clone(), ty)
350                })
351            })
352            .collect()
353    }
354
355    fn generate_definition(
356        &self,
357        b: &mut BundleContext,
358        k: &ModulePath,
359        v: &Schema,
360        n: &str,
361        d: &Definition,
362        mode: ModuleContextMode,
363        generated: &mut Map<ModuleContextMode, Vec<Item>>,
364    ) {
365        b.generate_module(k, v, mode, generated, |m| {
366            for plugin in self.plugins.iter() {
367                plugin.generate_definition(m, n, d);
368            }
369        });
370    }
371}
372
373/// Expands a vector of [mod@glob]s to a vector of actual paths.
374pub fn expand_inputs(globs: &Vec<String>) -> io::Result<Vec<PathBuf>> {
375    let mut result = Vec::new();
376    for g in globs.iter() {
377        for p in
378            glob(g).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("{}", e)))?
379        {
380            result.push(p.map_err(glob::GlobError::into_error)?)
381        }
382    }
383    Ok(result)
384}
385
386impl<'a> CodeCollector<'a> {
387    /// Construct a [CodeCollector] that collects output Rust modules directly into the file
388    /// system tree rooted at `output_dir`.
389    pub fn files(output_dir: PathBuf) -> Self {
390        CodeCollector {
391            emit_mod_declarations: true,
392            collect_module: CodeModuleCollector::Files { output_dir },
393        }
394    }
395
396    #[doc(hidden)]
397    pub fn collect_output(&mut self, module: Option<&ModulePath>, contents: &str) -> io::Result<()> {
398        match &mut self.collect_module {
399            CodeModuleCollector::Files { output_dir } => {
400                let mut output_path = output_dir.clone();
401                if let Some(k) = module {
402                    output_path.extend(k);
403                    let module_name = output_path
404                        .file_stem()
405                        .unwrap()
406                        .to_str()
407                        .unwrap()
408                        .to_owned();
409                    let module_name = names::render_modname(&module_name);
410                    output_path.set_file_name(format!("{}.rs", module_name));
411                } else {
412                    output_path.push("mod.rs");
413                }
414                DirBuilder::new().recursive(true).create(output_path.parent().unwrap())?;
415
416                if output_path.exists() {
417                    if let Ok(mut f) = File::open(&output_path) {
418                        let mut existing_contents = String::new();
419                        f.read_to_string(&mut existing_contents)?;
420                        if existing_contents == contents {
421                            return Ok(());
422                        }
423                    }
424                }
425
426                let mut f = File::create(output_path)?;
427                f.write_all(contents.as_bytes())
428            }
429            CodeModuleCollector::Custom { collect_output } => {
430                collect_output(module, contents)
431            }
432        }
433    }
434}
435
436impl Ref {
437    pub fn qualify(&self, default_module_path: &schema::ModulePath) -> Ref {
438        if self.module.0.is_empty() {
439            Ref {
440                module: default_module_path.clone(),
441                name: self.name.clone(),
442            }
443        } else {
444            self.clone()
445        }
446    }
447}
448
449impl Schema {
450    pub fn has_embedded_type(&self) -> bool {
451        self.embedded_type != EmbeddedTypeName::False
452    }
453}
454
455/// Main entry point: runs the compilation process.
456pub fn compile<'a>(config: &CompilerConfig, emitter: &mut CodeCollector<'a>) -> io::Result<()> {
457    let mut b = BundleContext::new(config);
458
459    for (k, (v, module_purpose)) in config.bundle.iter() {
460        if *module_purpose != Purpose::Codegen {
461            continue;
462        }
463
464        //---------------------------------------------------------------------------
465
466        let mut generated = Map::new();
467
468        b.generate_module(k, v, ModuleContextMode::TargetModule, &mut generated, |m| {
469            for plugin in config.plugins.iter() {
470                plugin.generate_module(m);
471            }
472        });
473
474        for (n, d) in &v.definitions.0 {
475            use ModuleContextMode::*;
476            config.generate_definition(&mut b, k, v, n, d, TargetToplevel, &mut generated);
477            config.generate_definition(&mut b, k, v, n, d, TargetGeneric, &mut generated);
478        }
479
480        //---------------------------------------------------------------------------
481
482        let mut lines: Vec<String> = Vec::new();
483
484        lines.push(Formatter::to_string(vertical(
485            false,
486            seq!["#![allow(unused_parens)]", "#![allow(unused_imports)]"],
487        )));
488        if config.rustfmt_skip {
489            lines.push("#![cfg_attr(rustfmt, rustfmt_skip)]".to_owned());
490        }
491        lines.push(Formatter::to_string(vertical(
492            false,
493            seq![
494                "",
495                "use std::convert::TryFrom;",
496                format!("use {}::support as _support;", &config.support_crate),
497                "use _support::Deserialize;",
498                "use _support::Parse;",
499                "use _support::Unparse;",
500                "use _support::preserves;",
501                "use preserves::value::Domain;",
502                "use preserves::value::NestedValue;",
503                ""
504            ],
505        )));
506
507        let mut emit_items = |items: Vec<Item>| {
508            if !items.is_empty() {
509                lines.push(Formatter::to_string(vertical(true, seq(items))));
510                lines.push("".to_owned());
511            }
512        };
513        emit_items(generated.remove(&ModuleContextMode::TargetModule).unwrap());
514        emit_items(
515            generated
516                .remove(&ModuleContextMode::TargetToplevel)
517                .unwrap(),
518        );
519        emit_items(generated.remove(&ModuleContextMode::TargetGeneric).unwrap());
520
521        {
522            let contents = lines.join("\n");
523            emitter.collect_output(Some(k), &contents)?;
524        }
525    }
526
527    {
528        let mut lines = Vec::new();
529
530        if config.rustfmt_skip {
531            lines.push("#![cfg_attr(rustfmt, rustfmt_skip)]".to_owned());
532            lines.push("".to_owned());
533        }
534
535        if emitter.emit_mod_declarations {
536            for (modpath, (_, module_purpose)) in config.bundle.iter() {
537                if *module_purpose != Purpose::Codegen {
538                    continue;
539                }
540                lines.push(format!(
541                    "pub mod {};",
542                    names::render_modname(modpath.last().unwrap())
543                ));
544            }
545            lines.push("".to_owned());
546        }
547
548        lines.push(format!(
549            "use {}::support as _support;",
550            &config.support_crate
551        ));
552        lines.push("use _support::preserves;".to_owned());
553        lines.push("".to_owned());
554
555        lines.push("#[allow(non_snake_case)]".to_owned());
556        lines.push(Formatter::to_string(item(seq![
557            "pub struct ",
558            b.language_struct_name(),
559            anglebrackets!["N: preserves::value::NestedValue"],
560            " ",
561            vertical(
562                false,
563                braces(
564                    b.literals
565                        .iter()
566                        .map(|(value, name)| item(format!("pub {}: N /* {:?} */", name, value)))
567                        .collect()
568                )
569            )
570        ])));
571        lines.push("".to_owned());
572        lines.push(Formatter::to_string(item(seq![
573            "impl",
574            anglebrackets!["N: preserves::value::NestedValue"],
575            " Default for ",
576            b.language_struct_name(),
577            "<N> ",
578            codeblock![seq![
579                "fn default() -> Self ",
580                codeblock![seq![
581                    b.language_struct_name(),
582                    " ",
583                    vertical(
584                        false,
585                        braces(
586                            b.literals
587                                .iter()
588                                .map(|(value, name)| {
589                                    let bs = preserves::value::PackedWriter::encode_iovalue(&value)
590                                        .unwrap();
591                                    item(format!(
592                                        "{}: /* {:?} */ _support::decode_lit(&{:?}).unwrap()",
593                                        name, value, bs
594                                    ))
595                                })
596                                .collect()
597                        )
598                    )
599                ]]
600            ]]
601        ])));
602        lines.push("".to_owned());
603        {
604            let mut b = Bundle {
605                modules: Modules(Map::new()),
606            };
607            for (modpath, (schema, purpose)) in config.bundle.iter() {
608                if *purpose == Purpose::Codegen {
609                    b.modules
610                        .0
611                        .insert(ModulePath(modpath.clone()), schema.clone());
612                }
613            }
614            let b_value = Language::default().unparse(&b);
615            let b_bin = preserves::value::PackedWriter::encode_iovalue(&b_value).unwrap();
616            let mut hex_encoded_bundle = String::new();
617            let mut count = 0;
618            for b in b_bin {
619                if count % 16 == 0 {
620                    hex_encoded_bundle.push_str("\\\n        ");
621                }
622                count += 1;
623                hex_encoded_bundle.push_str(&format!("\\x{:02x}", b));
624            }
625            lines.push(Formatter::to_string(item(seq![
626                "pub fn _bundle() -> &'static [u8] ",
627                codeblock![seq!["b\"", hex_encoded_bundle, "\""]]
628            ])));
629        }
630        lines.push("".to_owned());
631
632        let contents = lines.join("\n");
633        emitter.collect_output(None, &contents)?;
634    }
635
636    Ok(())
637}