aprender-mcp 0.31.2

Model Context Protocol (MCP) server for aprender — exposes apr CLI as MCP tools
Documentation
// FALSIFY-MCP-008 build-time schema codegen (PMAT-514 extension).
//
// Reads `contracts/apr-mcp-tool-schemas-v1.yaml` and emits a Rust module
// containing, for each tool:
//   * `pub const <TOOL>_SCHEMA: &str`      — JSON Schema body for `inputSchema`
//     (canonicalized by serde_json::to_string — keys emitted in insertion
//     order via serde_json::Map).
//   * `pub const <TOOL>_DESCRIPTION: &str` — tool-level human-readable
//     description, raw-string-escaped for embedding in a Rust literal.
//
// The generated file is `$OUT_DIR/schemas.rs` and is `include!`d from
// `src/lib.rs` inside a `pub mod schemas { ... }`. Tools consume the
// constants via `serde_json::from_str` (schema) and direct `.to_string()`
// (description), making the YAML contract the single source of truth.
//
// Before the PMAT-514 extension the description was hand-mirrored in the
// Rust source file and drifted silently twice in a 24h window (apr.serve in
// commit 715781df5, apr.run in 91a613968). Codegen closes that class at
// compile time; the test-layer `tool_descriptions_match_yaml_contract`
// remains as a defence-in-depth safety net.
//
// NO unwrap() in build.rs — every failure path uses `expect(...)` with a
// context message so a broken contract surfaces as a clear cargo error.

use serde::Deserialize;
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};

/// Root of `contracts/apr-mcp-tool-schemas-v1.yaml`.
#[derive(Debug, Deserialize)]
struct ContractRoot {
    tools: Vec<ToolEntry>,
}

#[derive(Debug, Deserialize)]
struct ToolEntry {
    name: String,
    description: String,
    #[serde(default)]
    args: Vec<ArgEntry>,
    #[serde(default)]
    required: Vec<String>,
}

#[derive(Debug, Deserialize)]
struct ArgEntry {
    name: String,
    #[serde(rename = "type")]
    arg_type: String,
    description: String,
    #[serde(default)]
    #[allow(dead_code)]
    // YAML also encodes `required`; authoritative source is ToolEntry.required
    required: bool,
}

fn main() {
    let contract_path = locate_contract();
    println!("cargo:rerun-if-changed={}", contract_path.display());
    println!("cargo:rerun-if-changed=build.rs");

    let yaml = std::fs::read_to_string(&contract_path).unwrap_or_else(|e| {
        panic!(
            "FALSIFY-MCP-008: failed to read {}: {e}",
            contract_path.display()
        )
    });
    let parsed: ContractRoot = serde_yaml::from_str(&yaml).unwrap_or_else(|e| {
        panic!(
            "FALSIFY-MCP-008: failed to parse {} as YAML: {e}",
            contract_path.display()
        )
    });

    let out_dir = std::env::var_os("OUT_DIR")
        .expect("FALSIFY-MCP-008: OUT_DIR not set by cargo; cannot emit generated schemas");
    let out_path = PathBuf::from(&out_dir).join("schemas.rs");

    let generated = render_module(&parsed.tools);
    std::fs::write(&out_path, generated).unwrap_or_else(|e| {
        panic!(
            "FALSIFY-MCP-008: failed to write {}: {e}",
            out_path.display()
        )
    });
}

/// Resolve `apr-mcp-tool-schemas-v1.yaml` relative to this crate's
/// manifest dir.
///
/// Single source of truth per-crate: `CARGO_MANIFEST_DIR/contracts/…`.
/// Shipping the YAML inside the crate (via `Cargo.toml` `include`) is what
/// makes `cargo install aprender` work — the workspace-root
/// `contracts/apr-mcp-tool-schemas-v1.yaml` is outside the published
/// package and would panic this build.rs at install time (v0.31.1 bug).
/// A drift-guard test in `tests/falsify_mcp_008.rs` asserts the in-crate
/// copy stays byte-identical to the workspace-root copy in-tree.
fn locate_contract() -> PathBuf {
    let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
        .expect("FALSIFY-MCP-008: CARGO_MANIFEST_DIR unset in build.rs");
    Path::new(&manifest_dir)
        .join("contracts")
        .join("apr-mcp-tool-schemas-v1.yaml")
}

/// Emit the generated `schemas.rs` body: header + one `pub const` per tool.
fn render_module(tools: &[ToolEntry]) -> String {
    let mut out = String::new();
    out.push_str("// @generated by crates/aprender-mcp/build.rs from\n");
    out.push_str("// contracts/apr-mcp-tool-schemas-v1.yaml. DO NOT EDIT BY HAND.\n");
    out.push_str("//\n");
    out.push_str("// Each constant is the JSON Schema body (as a JSON string) for the\n");
    out.push_str(
        "// corresponding tool's MCP `inputSchema`. Consume via serde_json::from_str.\n\n",
    );

    // Emit the tool-name list so downstream tests can iterate without re-reading YAML.
    out.push_str("/// All tool names in the contract, in declaration order.\n");
    out.push_str("pub const TOOL_NAMES: &[&str] = &[\n");
    for t in tools {
        out.push_str("    \"");
        out.push_str(&escape_rust_str(&t.name));
        out.push_str("\",\n");
    }
    out.push_str("];\n\n");

    for tool in tools {
        let schema_json = render_schema_json(tool);
        let escaped_schema = escape_rust_str(&schema_json);
        let schema_const = tool_schema_const_name(&tool.name);
        out.push_str(&format!("/// JSON Schema body for `{}`.\n", tool.name));
        out.push_str(&format!(
            "pub const {schema_const}: &str = \"{escaped_schema}\";\n\n"
        ));

        let escaped_desc = escape_rust_str(&tool.description);
        let desc_const = tool_description_const_name(&tool.name);
        out.push_str(&format!(
            "/// Tool-level description for `{}` (PMAT-514).\n",
            tool.name
        ));
        out.push_str(&format!(
            "pub const {desc_const}: &str = \"{escaped_desc}\";\n\n"
        ));
    }
    out
}

/// Serialize one tool's inputSchema as canonical JSON.
///
/// Shape (matches `InputSchema` Serialize output with no trailing fields when
/// properties/required are empty):
/// ```json
/// {"type":"object","properties":{...},"required":[...]}
/// ```
fn render_schema_json(tool: &ToolEntry) -> String {
    use serde_json::{Map, Value};

    let mut schema = Map::new();
    schema.insert("type".to_string(), Value::String("object".to_string()));

    if !tool.args.is_empty() {
        // Sort properties by arg name so output is deterministic across YAML
        // reorders. The live `InputSchema` uses a HashMap whose iteration
        // order is non-deterministic, so the test layer canonicalizes on the
        // live side too — here we just pick one canonical ordering.
        let mut props_sorted: BTreeMap<&str, &ArgEntry> = BTreeMap::new();
        for a in &tool.args {
            props_sorted.insert(a.name.as_str(), a);
        }
        let mut props = Map::new();
        for (name, arg) in props_sorted {
            let mut prop = Map::new();
            prop.insert("type".to_string(), Value::String(arg.arg_type.clone()));
            prop.insert(
                "description".to_string(),
                Value::String(arg.description.clone()),
            );
            props.insert(name.to_string(), Value::Object(prop));
        }
        schema.insert("properties".to_string(), Value::Object(props));
    }

    if !tool.required.is_empty() {
        let req: Vec<Value> = tool
            .required
            .iter()
            .map(|s| Value::String(s.clone()))
            .collect();
        schema.insert("required".to_string(), Value::Array(req));
    }

    serde_json::to_string(&Value::Object(schema))
        .expect("FALSIFY-MCP-008: failed to serialize schema Map to JSON")
}

/// Convert `apr.version` → `APR_VERSION_SCHEMA`.
fn tool_schema_const_name(tool_name: &str) -> String {
    let mut out = tool_name.replace(['.', '-'], "_").to_uppercase();
    out.push_str("_SCHEMA");
    out
}

/// Convert `apr.version` → `APR_VERSION_DESCRIPTION`.
fn tool_description_const_name(tool_name: &str) -> String {
    let mut out = tool_name.replace(['.', '-'], "_").to_uppercase();
    out.push_str("_DESCRIPTION");
    out
}

/// Escape a string for emission inside a normal `"..."` Rust literal.
/// Only used for tool names (safe ASCII), but defensive for future tools.
fn escape_rust_str(s: &str) -> String {
    let mut out = String::with_capacity(s.len());
    for c in s.chars() {
        match c {
            '\\' => out.push_str("\\\\"),
            '"' => out.push_str("\\\""),
            '\n' => out.push_str("\\n"),
            '\r' => out.push_str("\\r"),
            '\t' => out.push_str("\\t"),
            c => out.push(c),
        }
    }
    out
}