Skip to main content

aion_package/codegen/
project.rs

1//! Project-level codec generation: `workflow.toml` + `schemas/*.json` →
2//! `src/<package>_io.gleam`.
3//!
4//! Generation is deterministic: schema files are processed in filename
5//! (byte) order and property order is preserved from each JSON document, so
6//! the same schemas always produce a byte-identical module. The module is
7//! written only after every schema generated successfully — a loud error
8//! never leaves a partial file behind.
9
10use std::io;
11use std::path::{Path, PathBuf};
12
13use serde::Deserialize;
14
15use super::emit;
16use super::error::CodegenError;
17use super::json;
18use super::names::{NameRegistry, is_reserved_word, is_snake_identifier};
19use super::schema::{self, SchemaArtifact};
20use crate::PackagingError;
21use crate::project::config;
22
23/// Directory (relative to the project root) codegen reads schemas from.
24const SCHEMAS_DIR: &str = "schemas";
25
26/// What to do with the generated module.
27#[derive(Clone, Copy, Debug, PartialEq, Eq)]
28pub enum CodegenMode {
29    /// Write `src/<package>_io.gleam`, replacing any existing file.
30    Write,
31    /// Compare against the on-disk file and fail on drift without writing
32    /// (CI gate).
33    Check,
34}
35
36/// Result of a successful codegen run.
37#[derive(Clone, Debug, PartialEq, Eq)]
38pub struct CodegenReport {
39    /// Absolute path of the generated module.
40    pub module_path: PathBuf,
41    /// Module path relative to the project root (`src/<package>_io.gleam`).
42    pub module_relative: String,
43    /// Schema files generated from, relative to the project root, in
44    /// generation order.
45    pub schemas: Vec<String>,
46    /// The complete generated module source.
47    pub contents: String,
48    /// Whether the module was written (`false` in check mode).
49    pub written: bool,
50}
51
52/// Generates Gleam types and JSON codecs for every `schemas/*.json` of the
53/// workflow project at `root`, writing or checking
54/// `src/<package>_io.gleam` per `mode`.
55///
56/// The project's `workflow.toml` is validated first (including that every
57/// referenced schema exists, parses, and lives under `schemas/`), so codecs
58/// can never be generated from schemas the packaging boundary would reject.
59///
60/// # Errors
61///
62/// Returns a [`CodegenError`] naming the offending file — and, for schema
63/// constructs outside the supported subset, the JSON pointer — for: invalid
64/// or missing `workflow.toml` / `gleam.toml`, missing or unreadable schema
65/// files, invalid JSON, unsupported schema constructs, generated-name
66/// collisions, write failures, and `--check` drift.
67pub fn codegen_project(root: &Path, mode: CodegenMode) -> Result<CodegenReport, CodegenError> {
68    let package_name = read_package_name(root)?;
69    let project_config = config::load_config(root)?;
70    let schemas_dir = root.join(SCHEMAS_DIR);
71    for (index, workflow) in project_config.workflows.iter().enumerate() {
72        for (field, path) in [
73            ("input_schema", &workflow.input_schema_path),
74            ("output_schema", &workflow.output_schema_path),
75        ] {
76            if path.parent() != Some(schemas_dir.as_path()) {
77                return Err(CodegenError::SchemaOutsideSchemasDir {
78                    field: format!("workflow[{index}].{field}"),
79                    path: path.clone(),
80                });
81            }
82        }
83    }
84
85    let file_names = list_schema_file_names(&schemas_dir)?;
86    let mut registry = NameRegistry::default();
87    let mut artifacts: Vec<SchemaArtifact> = Vec::with_capacity(file_names.len());
88    for file_name in &file_names {
89        artifacts.push(parse_one_schema(&schemas_dir, file_name, &mut registry)?);
90    }
91
92    let contents = emit::emit_module(&package_name, &artifacts);
93    let module_relative = format!("src/{package_name}_io.gleam");
94    let module_path = root.join("src").join(format!("{package_name}_io.gleam"));
95    let written = match mode {
96        CodegenMode::Write => {
97            std::fs::write(&module_path, &contents).map_err(|source| CodegenError::Write {
98                path: module_path.clone(),
99                source,
100            })?;
101            true
102        }
103        CodegenMode::Check => {
104            check_on_disk(&module_path, &contents)?;
105            false
106        }
107    };
108
109    Ok(CodegenReport {
110        module_path,
111        module_relative,
112        schemas: file_names
113            .iter()
114            .map(|name| format!("{SCHEMAS_DIR}/{name}"))
115            .collect(),
116        contents,
117        written,
118    })
119}
120
121/// Lists `*.json` file names directly under `schemas/`, sorted by byte
122/// order. Non-JSON entries and subdirectories are outside the codegen
123/// contract and are not generated from.
124fn list_schema_file_names(schemas_dir: &Path) -> Result<Vec<String>, CodegenError> {
125    let entries = match std::fs::read_dir(schemas_dir) {
126        Ok(entries) => entries,
127        Err(source) if source.kind() == io::ErrorKind::NotFound => {
128            return Err(CodegenError::SchemasDirMissing {
129                path: schemas_dir.to_path_buf(),
130            });
131        }
132        Err(source) => {
133            return Err(CodegenError::SchemasDirRead {
134                path: schemas_dir.to_path_buf(),
135                source,
136            });
137        }
138    };
139    let mut names = Vec::new();
140    for entry in entries {
141        let entry = entry.map_err(|source| CodegenError::SchemasDirRead {
142            path: schemas_dir.to_path_buf(),
143            source,
144        })?;
145        let path = entry.path();
146        if !path.is_file() || path.extension().is_none_or(|ext| ext != "json") {
147            continue;
148        }
149        let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
150            return Err(CodegenError::SchemaFileName {
151                path,
152                reason: "file name is not valid UTF-8".to_owned(),
153            });
154        };
155        names.push(name.to_owned());
156    }
157    if names.is_empty() {
158        return Err(CodegenError::SchemasDirEmpty {
159            path: schemas_dir.to_path_buf(),
160        });
161    }
162    names.sort();
163    Ok(names)
164}
165
166/// Reads, parses (order-preserving), and converts one schema file.
167fn parse_one_schema(
168    schemas_dir: &Path,
169    file_name: &str,
170    registry: &mut NameRegistry,
171) -> Result<SchemaArtifact, CodegenError> {
172    let path = schemas_dir.join(file_name);
173    let relative = PathBuf::from(SCHEMAS_DIR).join(file_name);
174    let Some(stem) = Path::new(file_name)
175        .file_stem()
176        .and_then(|stem| stem.to_str())
177    else {
178        return Err(CodegenError::SchemaFileName {
179            path: relative,
180            reason: "file name has no stem".to_owned(),
181        });
182    };
183    let bytes = std::fs::read(&path).map_err(|source| CodegenError::SchemaRead {
184        path: path.clone(),
185        source,
186    })?;
187    let document = json::parse_ordered(&bytes).map_err(|source| CodegenError::SchemaParse {
188        path: path.clone(),
189        source,
190    })?;
191    schema::parse_schema(&relative, stem, &document, registry)
192}
193
194fn check_on_disk(module_path: &Path, contents: &str) -> Result<(), CodegenError> {
195    let on_disk = match std::fs::read(module_path) {
196        Ok(bytes) => bytes,
197        Err(source) if source.kind() == io::ErrorKind::NotFound => {
198            return Err(CodegenError::CheckMissing {
199                path: module_path.to_path_buf(),
200            });
201        }
202        Err(source) => {
203            return Err(CodegenError::CheckRead {
204                path: module_path.to_path_buf(),
205                source,
206            });
207        }
208    };
209    if on_disk != contents.as_bytes() {
210        return Err(CodegenError::CheckDrift {
211            path: module_path.to_path_buf(),
212        });
213    }
214    Ok(())
215}
216
217#[derive(Debug, Deserialize)]
218struct GleamTomlName {
219    name: String,
220}
221
222/// Reads the Gleam package name from `<root>/gleam.toml`; it prefixes the
223/// generated module (`src/<name>_io.gleam`).
224fn read_package_name(root: &Path) -> Result<String, CodegenError> {
225    let path = root.join("gleam.toml");
226    let text = match std::fs::read_to_string(&path) {
227        Ok(text) => text,
228        Err(source) if source.kind() == io::ErrorKind::NotFound => {
229            return Err(CodegenError::Config(PackagingError::GleamTomlMissing {
230                path,
231            }));
232        }
233        Err(source) => {
234            return Err(CodegenError::Config(PackagingError::GleamMetadataRead {
235                path,
236                source,
237            }));
238        }
239    };
240    let parsed: GleamTomlName = toml::from_str(&text).map_err(|source| {
241        CodegenError::Config(PackagingError::GleamMetadataParse { path, source })
242    })?;
243    if !is_snake_identifier(&parsed.name) || is_reserved_word(&parsed.name) {
244        return Err(CodegenError::ProjectName {
245            name: parsed.name,
246            reason: "must be a snake_case identifier and not a Gleam reserved word".to_owned(),
247        });
248    }
249    Ok(parsed.name)
250}
251
252#[cfg(test)]
253mod tests {
254    use std::fs;
255    use std::path::{Path, PathBuf};
256
257    use super::{CodegenMode, codegen_project, list_schema_file_names};
258    use crate::PackagingError;
259    use crate::codegen::error::CodegenError;
260    use crate::project::fixture;
261
262    type TestResult = Result<(), Box<dyn std::error::Error>>;
263
264    const GLEAM_TOML: &str = "name = \"demo\"\nversion = \"0.1.0\"\ntarget = \"erlang\"\n";
265
266    const WORKFLOW_TOML: &str = r#"[[workflow]]
267entry_module = "demo"
268entry_function = "run"
269timeout_seconds = 30
270input_schema = "schemas/input.json"
271output_schema = "schemas/output.json"
272activities = []
273"#;
274
275    const INPUT_SCHEMA: &[u8] = br#"{
276  "type": "object",
277  "required": ["name"],
278  "additionalProperties": false,
279  "properties": {
280    "name": { "type": "string" },
281    "note": { "type": "string" }
282  }
283}"#;
284
285    const OUTPUT_SCHEMA: &[u8] = br#"{ "type": "string" }"#;
286
287    fn project(label: &str) -> Result<PathBuf, Box<dyn std::error::Error>> {
288        fixture::temp_project(
289            label,
290            &[
291                ("gleam.toml", GLEAM_TOML.as_bytes()),
292                ("workflow.toml", WORKFLOW_TOML.as_bytes()),
293                ("schemas/input.json", INPUT_SCHEMA),
294                ("schemas/output.json", OUTPUT_SCHEMA),
295                ("src/demo.gleam", b"pub fn run() { Nil }"),
296            ],
297        )
298    }
299
300    #[test]
301    fn write_mode_generates_the_module_with_header_and_report() -> TestResult {
302        let root = project("codegen-write")?;
303        let report = codegen_project(&root, CodegenMode::Write)?;
304
305        assert!(report.written);
306        assert_eq!(report.module_relative, "src/demo_io.gleam");
307        assert_eq!(report.module_path, root.join("src/demo_io.gleam"));
308        assert_eq!(
309            report.schemas,
310            vec![
311                "schemas/input.json".to_owned(),
312                "schemas/output.json".to_owned()
313            ]
314        );
315        let on_disk = fs::read_to_string(&report.module_path)?;
316        assert_eq!(on_disk, report.contents);
317        assert!(on_disk.starts_with(
318            "//// Generated by aion codegen — do not edit; regenerate from schemas/."
319        ));
320        assert!(on_disk.contains("pub type Input {"));
321        assert!(on_disk.contains("pub fn output_decoder() -> decode.Decoder(String) {"));
322        fs::remove_dir_all(&root)?;
323        Ok(())
324    }
325
326    #[test]
327    fn generation_is_deterministic_across_runs() -> TestResult {
328        let root = project("codegen-deterministic")?;
329        let first = codegen_project(&root, CodegenMode::Write)?;
330        let first_bytes = fs::read(&first.module_path)?;
331        let second = codegen_project(&root, CodegenMode::Write)?;
332        let second_bytes = fs::read(&second.module_path)?;
333
334        assert_eq!(first.contents, second.contents);
335        assert_eq!(
336            first_bytes, second_bytes,
337            "regeneration must be byte-identical"
338        );
339        fs::remove_dir_all(&root)?;
340        Ok(())
341    }
342
343    #[test]
344    fn check_mode_passes_clean_and_fails_on_drift_naming_the_file() -> TestResult {
345        let root = project("codegen-check")?;
346        let written = codegen_project(&root, CodegenMode::Write)?;
347
348        let checked = codegen_project(&root, CodegenMode::Check)?;
349        assert!(!checked.written);
350        assert_eq!(checked.contents, written.contents);
351
352        let mut perturbed = fs::read_to_string(&written.module_path)?;
353        perturbed.push_str("\n// hand edit\n");
354        fs::write(&written.module_path, &perturbed)?;
355        let result = codegen_project(&root, CodegenMode::Check);
356        let Err(CodegenError::CheckDrift { path }) = result else {
357            return Err(format!("expected CheckDrift, got {result:?}").into());
358        };
359        assert_eq!(path, written.module_path);
360        fs::remove_dir_all(&root)?;
361        Ok(())
362    }
363
364    #[test]
365    fn check_mode_fails_when_the_module_is_missing() -> TestResult {
366        let root = project("codegen-check-missing")?;
367
368        let result = codegen_project(&root, CodegenMode::Check);
369        let Err(CodegenError::CheckMissing { path }) = result else {
370            return Err(format!("expected CheckMissing, got {result:?}").into());
371        };
372        assert_eq!(path, root.join("src/demo_io.gleam"));
373        fs::remove_dir_all(&root)?;
374        Ok(())
375    }
376
377    #[test]
378    fn missing_referenced_schema_fails_through_descriptor_validation() -> TestResult {
379        let root = fixture::temp_project(
380            "codegen-missing-ref",
381            &[
382                ("gleam.toml", GLEAM_TOML.as_bytes()),
383                ("workflow.toml", WORKFLOW_TOML.as_bytes()),
384                ("schemas/output.json", OUTPUT_SCHEMA),
385            ],
386        )?;
387
388        let result = codegen_project(&root, CodegenMode::Write);
389        assert!(
390            matches!(
391                result,
392                Err(CodegenError::Config(PackagingError::SchemaRead { ref path, .. }))
393                    if *path == root.join("schemas/input.json")
394            ),
395            "missing referenced schema must fail: {result:?}"
396        );
397        fs::remove_dir_all(&root)?;
398        Ok(())
399    }
400
401    #[test]
402    fn referenced_schema_outside_schemas_dir_is_rejected() -> TestResult {
403        let descriptor = WORKFLOW_TOML.replace("schemas/input.json", "io/input.json");
404        let root = fixture::temp_project(
405            "codegen-outside",
406            &[
407                ("gleam.toml", GLEAM_TOML.as_bytes()),
408                ("workflow.toml", descriptor.as_bytes()),
409                ("io/input.json", INPUT_SCHEMA),
410                ("schemas/output.json", OUTPUT_SCHEMA),
411            ],
412        )?;
413
414        let result = codegen_project(&root, CodegenMode::Write);
415        let Err(CodegenError::SchemaOutsideSchemasDir { field, path }) = result else {
416            return Err(format!("expected SchemaOutsideSchemasDir, got {result:?}").into());
417        };
418        assert_eq!(field, "workflow[0].input_schema");
419        assert_eq!(path, root.join("io/input.json"));
420        fs::remove_dir_all(&root)?;
421        Ok(())
422    }
423
424    #[test]
425    fn unsupported_construct_aborts_before_any_write() -> TestResult {
426        let root = project("codegen-no-partial")?;
427        fixture::write_file(
428            &root,
429            "schemas/zz_tagged.json",
430            br#"{ "oneOf": [ { "type": "object", "properties": {} } ] }"#,
431        )?;
432
433        let result = codegen_project(&root, CodegenMode::Write);
434        let Err(CodegenError::UnsupportedConstruct { file, pointer, .. }) = result else {
435            return Err(format!("expected UnsupportedConstruct, got {result:?}").into());
436        };
437        assert_eq!(file, Path::new("schemas/zz_tagged.json"));
438        assert_eq!(pointer, "/oneOf");
439        assert!(
440            !root.join("src/demo_io.gleam").exists(),
441            "a failed run must not leave a partial module behind"
442        );
443        fs::remove_dir_all(&root)?;
444        Ok(())
445    }
446
447    #[test]
448    fn non_json_entries_in_schemas_are_not_generated_from() -> TestResult {
449        let root = project("codegen-non-json")?;
450        fixture::write_file(&root, "schemas/README.md", b"docs, not a schema")?;
451        fixture::write_file(
452            &root,
453            "schemas/nested/extra.json",
454            br#"{ "type": "string" }"#,
455        )?;
456
457        let report = codegen_project(&root, CodegenMode::Write)?;
458        assert_eq!(
459            report.schemas,
460            vec![
461                "schemas/input.json".to_owned(),
462                "schemas/output.json".to_owned()
463            ]
464        );
465        fs::remove_dir_all(&root)?;
466        Ok(())
467    }
468
469    #[test]
470    fn schemas_dir_listing_errors_are_typed() -> TestResult {
471        let missing = std::env::temp_dir().join("aion-codegen-no-such-dir");
472        let result = list_schema_file_names(&missing);
473        assert!(matches!(
474            result,
475            Err(CodegenError::SchemasDirMissing { ref path }) if *path == missing
476        ));
477
478        let empty = fixture::temp_project("codegen-empty-schemas", &[("schemas/.keep", b"")])?;
479        let result = list_schema_file_names(&empty.join("schemas"));
480        assert!(matches!(result, Err(CodegenError::SchemasDirEmpty { .. })));
481        fs::remove_dir_all(&empty)?;
482        Ok(())
483    }
484
485    #[test]
486    fn gleam_toml_problems_are_typed() -> TestResult {
487        let root = fixture::temp_project(
488            "codegen-no-gleam-toml",
489            &[("workflow.toml", WORKFLOW_TOML.as_bytes())],
490        )?;
491        let result = codegen_project(&root, CodegenMode::Write);
492        assert!(matches!(
493            result,
494            Err(CodegenError::Config(
495                PackagingError::GleamTomlMissing { .. }
496            ))
497        ));
498        fs::remove_dir_all(&root)?;
499
500        let bad_name = fixture::temp_project(
501            "codegen-bad-name",
502            &[
503                ("gleam.toml", b"name = \"Demo-App\"\n"),
504                ("workflow.toml", WORKFLOW_TOML.as_bytes()),
505                ("schemas/input.json", INPUT_SCHEMA),
506                ("schemas/output.json", OUTPUT_SCHEMA),
507            ],
508        )?;
509        let result = codegen_project(&bad_name, CodegenMode::Write);
510        assert!(matches!(
511            result,
512            Err(CodegenError::ProjectName { ref name, .. }) if name == "Demo-App"
513        ));
514        fs::remove_dir_all(&bad_name)?;
515        Ok(())
516    }
517
518    /// Real-world proof against this repository's stacked-dev example: the
519    /// `$defs`/`$ref` indirection in `onatopp_input.json` is outside the v1
520    /// subset and must fail loudly, naming the file and pointer. (The five
521    /// other stacked-dev schemas are proven to generate and compile in the
522    /// CLI integration suite.)
523    #[test]
524    fn stacked_dev_example_hits_the_loud_error_on_onatopp_input() -> TestResult {
525        let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("../../examples/stacked-dev");
526
527        let result = codegen_project(&root, CodegenMode::Check);
528        let Err(CodegenError::UnsupportedConstruct {
529            file,
530            pointer,
531            construct,
532        }) = result
533        else {
534            return Err(format!("expected UnsupportedConstruct, got {result:?}").into());
535        };
536        assert_eq!(file, Path::new("schemas/onatopp_input.json"));
537        assert_eq!(pointer, "/$defs");
538        assert!(construct.contains("unrecognised keyword `$defs`"));
539        Ok(())
540    }
541}