openlatch-provider 0.2.2

Self-service onboarding CLI + runtime daemon for OpenLatch Editors and Providers
// build.rs — code generation from JSON Schemas via typify.
//
// Behaviour (fail-closed):
//   1. Reads every `schemas/*.schema.json` (vendored from @openlatch/client-schemas
//      plus locally-owned manifest-*.schema.json).
//   2. Bundles them into a single root schema, rewriting cross-file `$ref`s into
//      internal `#/$defs/...` form (typify 0.6 cannot resolve cross-file refs on
//      its own).
//   3. Feeds the bundle to `typify::TypeSpace`.
//   4. Writes `src/generated/types.rs` if (and only if) the generated content changed.
//   5. Any typify error PANICS the build; an empty schemas/ directory PANICS unless
//      `OPENLATCH_PROVIDER_BOOTSTRAP=1` is set in the environment.
//
// Platform-side schemas vendored under `schemas/platform/` are *not* fed to typify.
// They will be consumed in P2 (runtime/verdict.rs) via a dedicated codegen pass once
// progenitor (or an OpenAPI-3.1-capable equivalent) is wired. The directory is
// watched here so changes invalidate the build cache.
//
// See `.local/openlatch-provider-v0.1/phase-1-editor-cli.md` task P1.T0 for context.

use std::collections::BTreeMap;
use std::env;
use std::fs;
use std::path::{Path, PathBuf};

fn main() {
    println!("cargo:rerun-if-changed=schemas/");
    println!("cargo:rerun-if-changed=schemas/platform/");
    println!("cargo:rerun-if-changed=signing/openlatch-provider.pub");
    println!("cargo:rerun-if-changed=build.rs");

    // Forward telemetry keys baked at build time so they end up in the binary.
    forward_env("OPENLATCH_PROVIDER_POSTHOG_KEY");
    forward_env("OPENLATCH_PROVIDER_SENTRY_DSN");

    let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
    let schemas_dir = manifest_dir.join("schemas");

    generate_typify_types(&manifest_dir, &schemas_dir);
}

fn forward_env(name: &str) {
    if let Ok(v) = env::var(name) {
        println!("cargo:rustc-env={name}={v}");
    } else {
        // Empty default — telemetry init no-ops if the key is absent.
        println!("cargo:rustc-env={name}=");
    }
}

fn generate_typify_types(manifest_dir: &Path, schemas_dir: &Path) {
    let out_path = manifest_dir.join("src").join("generated").join("types.rs");

    let header = "// AUTO-GENERATED by build.rs from schemas/. DO NOT EDIT.\n\
                  // Regenerate: cargo build (touches `schemas/` to invalidate).\n\
                  #![allow(clippy::all, dead_code, unused_imports)]\n\n";

    let bootstrap = env::var("OPENLATCH_PROVIDER_BOOTSTRAP").is_ok();

    let body = if !schemas_dir.is_dir() {
        if bootstrap {
            String::new()
        } else {
            panic!(
                "schemas/ directory missing at {}. Set OPENLATCH_PROVIDER_BOOTSTRAP=1 to bypass.",
                schemas_dir.display()
            );
        }
    } else {
        match generate_typify(schemas_dir) {
            Ok(s) if !s.is_empty() => s,
            Ok(_) => {
                if bootstrap {
                    String::new()
                } else {
                    panic!(
                        "schemas/ contains no *.schema.json files. \
                         Set OPENLATCH_PROVIDER_BOOTSTRAP=1 to bypass."
                    );
                }
            }
            Err(e) => panic!("typify codegen failed: {e}"),
        }
    };

    let new = format!("{header}{body}\n");
    let existing = fs::read_to_string(&out_path).unwrap_or_default();
    if existing != new {
        fs::create_dir_all(out_path.parent().unwrap()).ok();
        fs::write(&out_path, new).expect("failed to write src/generated/types.rs");
        // prettyplease's output diverges from rustfmt's in a handful of edge
        // cases (multi-line `impl` headers, trailing newlines). The CI quality
        // gate runs `cargo fmt --all -- --check`, so we re-format the file
        // through rustfmt to keep it stable across regenerations. Best-effort:
        // if rustfmt isn't on PATH we leave the prettyplease output alone.
        let _ =
            std::process::Command::new(env::var("RUSTFMT").unwrap_or_else(|_| "rustfmt".into()))
                .arg("--edition")
                .arg("2021")
                .arg(&out_path)
                .status();
    }
}

fn generate_typify(schemas_dir: &Path) -> Result<String, String> {
    use schemars::schema::RootSchema;
    use serde_json::Value;
    use std::collections::BTreeSet;
    use typify::{TypeSpace, TypeSpaceSettings};

    // Top-level schemas only (recurse-into-subdirs would pick up `schemas/platform/`
    // which is reserved for a separate codegen pass — see file header comment).
    let mut paths = BTreeSet::new();
    for entry in fs::read_dir(schemas_dir).map_err(|e| e.to_string())? {
        let entry = entry.map_err(|e| e.to_string())?;
        let p = entry.path();
        if p.is_file()
            && p.extension().and_then(|e| e.to_str()) == Some("json")
            && p.file_name()
                .and_then(|n| n.to_str())
                .map(|n| n.ends_with(".schema.json"))
                .unwrap_or(false)
        {
            paths.insert(p);
        }
    }
    if paths.is_empty() {
        return Ok(String::new());
    }

    // -- Bundle: load every file as Value and key by file stem --------------
    let mut files: BTreeMap<String, Value> = BTreeMap::new();
    for path in &paths {
        let stem = path
            .file_name()
            .and_then(|n| n.to_str())
            .map(|n| n.trim_end_matches(".schema.json").to_string())
            .ok_or_else(|| format!("invalid filename: {}", path.display()))?;
        let raw = fs::read_to_string(path).map_err(|e| format!("{}: {e}", path.display()))?;
        let mut value: Value =
            serde_json::from_str(&raw).map_err(|e| format!("{}: {e}", path.display()))?;
        // typify validates `default` and `examples` values against their
        // schema and panics ("value does not conform to the given schema") on
        // any mismatch — including legitimate Draft 2020-12 patterns it can't
        // model (e.g. `type: ["object", "null"], default: null`). Defaults and
        // examples don't influence type generation, so we strip them up-front.
        strip_keys_recursively(&mut value, &["default", "examples"]);
        files.insert(stem, value);
    }

    // -- Combine into one root schema with a unified `definitions` ----------
    //
    // Each input file becomes one entry under `definitions` keyed by its
    // PascalCased stem (e.g. `manifest-editor` → `ManifestEditor`). Any nested
    // `$defs` from an input file are hoisted to the top level (e.g.
    // `enums.schema.json`'s `AgentType` becomes `#/definitions/AgentType`).
    // Cross-file refs in any value are rewritten to point at these top-level
    // keys.
    //
    // We use `definitions` (Draft 7) not `$defs` (Draft 2020-12) because the
    // schemars 0.8 / typify 0.6 stack reads it via `RootSchema { definitions:
    // Map<String, Schema> }`. Internally, each input's content keeps its own
    // 2020-12 `$defs`/`$ref` shape — typify understands `#/definitions/Foo`
    // and `#/$defs/Foo` interchangeably.
    let mut combined_defs = serde_json::Map::new();
    for (stem, schema) in &files {
        // Hoist nested $defs.
        if let Some(Value::Object(defs)) = schema.get("$defs") {
            for (k, v) in defs {
                if combined_defs.contains_key(k) {
                    return Err(format!(
                        "duplicate $defs key `{}` while bundling schemas (last seen in `{}`)",
                        k, stem
                    ));
                }
                let mut clone = v.clone();
                rewrite_refs(&mut clone, &files)?;
                combined_defs.insert(k.clone(), clone);
            }
        }
        // Whole-file alias keyed by PascalCased stem.
        let alias_key = pascal(stem);
        let mut whole = schema.clone();
        if let Some(obj) = whole.as_object_mut() {
            obj.remove("$defs");
            obj.remove("$schema");
            obj.remove("$id");
        }
        rewrite_refs(&mut whole, &files)?;
        combined_defs.insert(alias_key, whole);
    }

    // Bisect-friendly path: when OPENLATCH_PROVIDER_DEBUG_TYPIFY is set we
    // feed `definitions` entries one at a time, isolating which (if any)
    // schema typify rejects. Default path bundles them all in one shot.
    let bisect = env::var("OPENLATCH_PROVIDER_DEBUG_TYPIFY").is_ok();

    let mut space = TypeSpace::new(TypeSpaceSettings::default().with_struct_builder(true));

    if bisect {
        for (name, def) in &combined_defs {
            let one = serde_json::json!({
                "title": "OpenLatchProviderBundleSingle",
                "type": "object",
                "definitions": { name.clone(): def.clone() },
            });
            let root: RootSchema = serde_json::from_value(one)
                .map_err(|e| format!("bundle parse for `{name}`: {e}"))?;
            space
                .add_root_schema(root)
                .map_err(|e| format!("typify add_root_schema for `{name}`: {e}"))?;
        }
    } else {
        let combined = serde_json::json!({
            "title": "OpenLatchProviderBundle",
            "type": "object",
            "definitions": Value::Object(combined_defs),
        });

        // Debug aid — drop the bundled schema next to the codegen output so we
        // can inspect it if typify rejects the input.
        if let Ok(out_dir) = env::var("OUT_DIR") {
            let _ = fs::write(
                std::path::Path::new(&out_dir).join("typify-bundle.json"),
                serde_json::to_string_pretty(&combined).unwrap_or_default(),
            );
        }

        let root: RootSchema = serde_json::from_value(combined)
            .map_err(|e| format!("bundle root schema parse: {e}"))?;
        space
            .add_root_schema(root)
            .map_err(|e| format!("typify add_root_schema: {e}"))?;
    }

    let stream = space.to_stream();
    let parsed: syn::File = syn::parse2(stream).map_err(|e| e.to_string())?;
    Ok(prettyplease::unparse(&parsed))
}

/// Rewrite cross-file `$ref` strings to internal `#/$defs/...` form.
///
/// Handles three forms found in `schemas/`:
///   - `enums.schema.json#/$defs/AgentType` → `#/$defs/AgentType`
///   - `manifest-editor.schema.json#/$defs/Foo` → `#/$defs/Foo`
///   - `manifest-editor.schema.json` (whole-file ref) → `#/$defs/ManifestEditor`
///
/// Internal refs (`#/$defs/...`) and JSON-Pointer URIs are left alone.
fn rewrite_refs(
    value: &mut serde_json::Value,
    files: &BTreeMap<String, serde_json::Value>,
) -> Result<(), String> {
    use serde_json::Value;

    match value {
        Value::Object(map) => {
            // First, rewrite the $ref string in this object (if any).
            if let Some(Value::String(r)) = map.get_mut("$ref") {
                if let Some(rewritten) = rewrite_ref_string(r, files)? {
                    *r = rewritten;
                }
            }
            // Then recurse.
            for (_k, v) in map.iter_mut() {
                rewrite_refs(v, files)?;
            }
        }
        Value::Array(arr) => {
            for v in arr.iter_mut() {
                rewrite_refs(v, files)?;
            }
        }
        _ => {}
    }
    Ok(())
}

fn rewrite_ref_string(
    r: &str,
    files: &BTreeMap<String, serde_json::Value>,
) -> Result<Option<String>, String> {
    // Already internal — leave alone.
    if r.starts_with('#') {
        return Ok(None);
    }
    // Split file part vs fragment.
    let (file_part, fragment) = match r.split_once('#') {
        Some((a, b)) => (a, Some(b)),
        None => (r, None),
    };
    if !file_part.ends_with(".schema.json") {
        // Not a cross-file schema ref we know how to rewrite (could be a
        // remote URL ref). Leave alone — typify will surface an error.
        return Ok(None);
    }
    let stem = file_part.trim_end_matches(".schema.json");
    if !files.contains_key(stem) {
        return Err(format!(
            "cross-file $ref `{}` points at unknown schema `{}`",
            r, file_part
        ));
    }
    let new = match fragment {
        Some(frag) => {
            // `enums.schema.json#/$defs/AgentType` → `#/definitions/AgentType`.
            // We hoisted every nested $defs entry to top-level `definitions`,
            // so the fragment path needs `/$defs/` collapsed to point at the
            // bundled top level.
            // Form: `/$defs/Name` → `#/definitions/Name`
            // Form: `/Foo/Bar`     → `#/definitions/<PascalStem>/Foo/Bar`
            if let Some(name) = frag.strip_prefix("/$defs/") {
                format!("#/definitions/{name}")
            } else {
                format!("#/definitions/{}{frag}", pascal(stem))
            }
        }
        None => format!("#/definitions/{}", pascal(stem)),
    };
    Ok(Some(new))
}

fn strip_keys_recursively(value: &mut serde_json::Value, keys: &[&str]) {
    use serde_json::Value;
    match value {
        Value::Object(map) => {
            for k in keys {
                map.remove(*k);
            }
            for (_, v) in map.iter_mut() {
                strip_keys_recursively(v, keys);
            }
        }
        Value::Array(arr) => {
            for v in arr.iter_mut() {
                strip_keys_recursively(v, keys);
            }
        }
        _ => {}
    }
}

fn pascal(stem: &str) -> String {
    let mut out = String::with_capacity(stem.len());
    let mut upper = true;
    for ch in stem.chars() {
        if ch == '-' || ch == '_' || ch == '.' {
            upper = true;
            continue;
        }
        if upper {
            out.extend(ch.to_uppercase());
            upper = false;
        } else {
            out.push(ch);
        }
    }
    out
}