prototext 0.2.0

Lossless protobuf ↔ enhanced-textproto converter
// SPDX-FileCopyrightText: 2025-2026 Frederic Ruget <fred@atlant.is> (GitHub: @douzebis)
// SPDX-FileCopyrightText: 2025-2026 THALES CLOUD SECURISE SAS
//
// SPDX-License-Identifier: MIT

//! Build script: produce `$OUT_DIR/*.pb` descriptor sets.
//!
//! Two modes selected by the `protox` Cargo feature (on by default):
//!
//! - **`protox` enabled** (default, crates.io): compile `.proto` schemas using
//!   the pure-Rust `protox` compiler — no system `protoc` required.
//! - **`protox` disabled** (Nix builds): copy pre-compiled `.pb` files from
//!   `fixtures/prebuilt/`, which are generated by the Nix `patchPhase` using
//!   `protoc` before `cargo build` runs.
//!
//! Files produced in both modes:
//!   - `descriptor.pb`     — FileDescriptorSet for google/protobuf/descriptor.proto
//!   - `knife.pb`          — FileDescriptorSet for fixtures/schemas/knife.proto
//!   - `enum_collision.pb` — FileDescriptorSet for fixtures/schemas/enum_collision.proto
//!
//! `descriptor.pb` is embedded into the binary at compile time via:
//!   `include_bytes!(concat!(env!("OUT_DIR"), "/descriptor.pb"))`
//!
//! `knife.pb` and `enum_collision.pb` are used by integration tests only.

#[cfg(feature = "protox")]
use prost::Message as _;

#[cfg(feature = "protox")]
fn compile(files: &[&str], includes: &[&str], out_dir: &str, out_name: &str) {
    let descriptor = protox::compile(files, includes)
        .unwrap_or_else(|e| panic!("failed to compile {files:?}: {e}"));
    let bytes = descriptor.encode_to_vec();
    std::fs::write(format!("{out_dir}/{out_name}"), bytes)
        .unwrap_or_else(|e| panic!("failed to write {out_name}: {e}"));
}

#[cfg(not(feature = "protox"))]
fn copy_prebuilt(out_dir: &str, manifest_dir: &str) {
    // Fast path: Nix build supplies stable store-path env vars so that
    // OUT_DIR contents are byte-for-byte identical across all sandboxes and
    // Cargo fingerprints remain valid (no spurious external-dep recompiles).
    if let Ok(descriptor_pb) = std::env::var("DESCRIPTOR_PB") {
        let knife_pb = std::env::var("KNIFE_PB")
            .unwrap_or_else(|_| panic!("DESCRIPTOR_PB is set but KNIFE_PB is not"));
        let enum_collision_pb = std::env::var("ENUM_COLLISION_PB")
            .unwrap_or_else(|_| panic!("DESCRIPTOR_PB is set but ENUM_COLLISION_PB is not"));
        for (name, src) in &[
            ("descriptor.pb", descriptor_pb),
            ("knife.pb", knife_pb),
            ("enum_collision.pb", enum_collision_pb),
        ] {
            let dst = std::path::Path::new(out_dir).join(name);
            std::fs::copy(src, &dst)
                .unwrap_or_else(|e| panic!("failed to copy {name} from '{src}': {e}"));
        }
        return;
    }
    // Fallback (crates.io / local dev): copy from fixtures/prebuilt/ which
    // patchPhase or a local protoc run must have populated beforehand.
    let prebuilt = std::path::Path::new(manifest_dir).join("fixtures/prebuilt");
    for name in &["descriptor.pb", "knife.pb", "enum_collision.pb"] {
        let src = prebuilt.join(name);
        let dst = std::path::Path::new(out_dir).join(name);
        std::fs::copy(&src, &dst).unwrap_or_else(|e| panic!("failed to copy {name}: {e}"));
    }
}

/// Build the WKT scoring graph (`wkt.rkyv`) from `wkt/SOURCES`.
///
/// Three modes, tried in order:
///
/// - **`prebuilt-wkt` feature** (nixpkgs build): copy pre-generated files
///   from `wkt/prebuilt/` committed to git — no `reproto` invocation needed.
/// - **`WKT_RKYV` env var set** (`default.nix` full build): the files were
///   produced by the `wktRkyv` Nix derivation; copy from the store path.
/// - **Otherwise** (crates.io / local dev): compile `wkt.desc` from SOURCES
///   via `protoc` (or `protox`), then invoke `reproto --schema-db-out`.
#[cfg(feature = "wkt-db")]
fn build_wkt_graph(out_dir: &str, manifest_dir: &str) {
    use std::path::Path;
    #[cfg(not(feature = "prebuilt-wkt"))]
    use std::process::Command;

    let wkt_rkyv_dst = format!("{out_dir}/wkt.rkyv");
    let wkt_index_dst = format!("{out_dir}/wkt_index.rkyv");

    // Fast path: nixpkgs build — copy pre-generated files committed to git.
    #[cfg(feature = "prebuilt-wkt")]
    {
        let prebuilt = Path::new(manifest_dir).join("wkt/prebuilt");
        for (name, dst) in &[
            ("wkt.rkyv", &wkt_rkyv_dst),
            ("wkt_index.rkyv", &wkt_index_dst),
        ] {
            let src = prebuilt.join(name);
            std::fs::copy(&src, dst)
                .unwrap_or_else(|e| panic!("failed to copy {name} from wkt/prebuilt/: {e}"));
        }
    }

    // default.nix full build and crates.io / local dev paths.
    #[cfg(not(feature = "prebuilt-wkt"))]
    {
        // Fast path: default.nix full build pre-supplies both files.
        if let Ok(prebuilt) = std::env::var("WKT_RKYV") {
            std::fs::copy(&prebuilt, &wkt_rkyv_dst)
                .unwrap_or_else(|e| panic!("failed to copy WKT_RKYV '{prebuilt}': {e}"));
            let prebuilt_index = std::env::var("WKT_INDEX")
                .unwrap_or_else(|_| panic!("WKT_RKYV is set but WKT_INDEX is not"));
            std::fs::copy(&prebuilt_index, &wkt_index_dst)
                .unwrap_or_else(|e| panic!("failed to copy WKT_INDEX '{prebuilt_index}': {e}"));
            return;
        }

        // Compile wkt.desc from SOURCES.
        let sources_path = Path::new(manifest_dir).join("wkt/SOURCES");
        let sources_text =
            std::fs::read_to_string(&sources_path).expect("failed to read wkt/SOURCES");
        let proto_files: Vec<&str> = sources_text
            .lines()
            .map(str::trim)
            .filter(|l| !l.is_empty())
            .collect();

        let wkt_desc_path = format!("{out_dir}/wkt.desc");

        #[cfg(feature = "protox")]
        {
            use prost::Message as _;
            let descriptor = protox::compile(&proto_files, &[""])
                .unwrap_or_else(|e| panic!("failed to compile WKT protos: {e}"));
            let bytes = descriptor.encode_to_vec();
            std::fs::write(&wkt_desc_path, bytes)
                .unwrap_or_else(|e| panic!("failed to write wkt.desc: {e}"));
        }

        #[cfg(not(feature = "protox"))]
        {
            // Use system protoc.
            let mut cmd = Command::new("protoc");
            cmd.arg(format!("--descriptor_set_out={wkt_desc_path}"));
            cmd.arg("--include_imports");
            for f in &proto_files {
                cmd.arg(f);
            }
            let status = cmd
                .status()
                .unwrap_or_else(|e| panic!("failed to run protoc: {e}"));
            assert!(status.success(), "protoc failed with status {status}");
        }

        // Run reproto --build-schema-db to produce schemas.desc + schemas/hopcroft.rkyv.
        // -I takes a directory; the positional arg is the .desc filename relative to it.
        // --build-schema-db writes <stem>.desc and <stem>/hopcroft.rkyv.
        let schemas_desc = format!("{out_dir}/schemas.desc");
        let reproto_bin = std::env::var("REPROTO_BIN").unwrap_or_else(|_| "reproto".to_string());
        let status = Command::new(&reproto_bin)
            .arg(format!("--build-schema-db={schemas_desc}"))
            .arg(format!("-O{out_dir}/reproto-out"))
            .arg(format!("-I{out_dir}"))
            .arg("wkt.desc")
            .status()
            .unwrap_or_else(|e| panic!("failed to run reproto '{reproto_bin}': {e}"));
        assert!(
            status.success(),
            "reproto --build-schema-db failed with status {status}"
        );
        // Copy schemas/hopcroft.rkyv → wkt.rkyv (the path embedded via include_bytes!).
        std::fs::copy(format!("{out_dir}/schemas/hopcroft.rkyv"), &wkt_rkyv_dst)
            .unwrap_or_else(|e| panic!("failed to copy hopcroft.rkyv: {e}"));
        // Copy schemas/index.rkyv → wkt_index.rkyv (the path embedded via include_bytes!).
        std::fs::copy(format!("{out_dir}/schemas/index.rkyv"), &wkt_index_dst)
            .unwrap_or_else(|e| panic!("failed to copy index.rkyv: {e}"));
    }
}

fn main() {
    let out_dir = std::env::var("OUT_DIR").expect("OUT_DIR not set");
    let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set");

    #[cfg(feature = "protox")]
    {
        let schemas_dir = format!("{manifest_dir}/fixtures/schemas");
        // Compile descriptor.pb from the full WKT SOURCES list so that the
        // embedded descriptor pool covers exactly the same types as WKT_GRAPH.
        // Single source of truth: prototext/wkt/SOURCES (spec 0096).
        let wkt_sources_path = std::path::Path::new(&manifest_dir).join("wkt/SOURCES");
        let wkt_sources_text =
            std::fs::read_to_string(&wkt_sources_path).expect("failed to read wkt/SOURCES");
        let wkt_proto_files: Vec<&str> = wkt_sources_text
            .lines()
            .map(str::trim)
            .filter(|l| !l.is_empty())
            .collect();
        compile(&wkt_proto_files, &[""], &out_dir, "descriptor.pb");
        compile(&["knife.proto"], &[&schemas_dir], &out_dir, "knife.pb");
        compile(
            &["enum_collision.proto"],
            &[&schemas_dir],
            &out_dir,
            "enum_collision.pb",
        );
    }

    #[cfg(not(feature = "protox"))]
    copy_prebuilt(&out_dir, &manifest_dir);

    #[cfg(feature = "wkt-db")]
    build_wkt_graph(&out_dir, &manifest_dir);

    println!("cargo:rerun-if-changed=build.rs");
    println!("cargo:rerun-if-changed=wkt/SOURCES");
    println!("cargo:rerun-if-changed=fixtures/schemas/knife.proto");
    println!("cargo:rerun-if-changed=fixtures/schemas/enum_collision.proto");
}