pleme-doc-gen 0.1.45

Rust replacement for the M0 Python _gen-patterns.py + _gen-docs.py scripts in pleme-io/actions. Walks every action.yml + emits substrate's patterns-full.nix + per-action README.md + root catalog. Per the NO-SHELL prime directive.
//! eat — unified absorb + reverse + render + restore pipeline.
//!
//! Per the operator's "verified tested caixas fully released and
//! sailed out" directive: this verb is the substrate's eat-and-prepare
//! stage. One call takes a path and produces:
//!
//!   - <out>/<name>.caixa.lisp        — typed manifest from reverse
//!   - <out>/<name>.files.json        — typed file-content manifest
//!                                       from file_capture
//!   - <out>/<name>-rendered/         — working copy: captured source
//!                                       overlaid with substrate's CI
//!                                       scaffolding (.github/workflows,
//!                                       .pleme-io-release.toml,
//!                                       nix/modules/, flake.nix)
//!
//! Eaten caixas are pipeline-ready by construction: every prerequisite
//! the auto-release workflow needs is in place. Operators chain `eat`
//! into `verify-tests` (locally green CI) and then `ship` (push to
//! pleme-io GH repo; CI takes over from there).
//!
//! Composition contract: eat is ONLY substrate composition. No new
//! parsing, no new emission — just orchestrates the existing primitives
//! into the typed lifecycle stage.

use anyhow::{anyhow, Result};
use std::path::{Path, PathBuf};

use crate::file_capture::CapturedFile;
use crate::sexp_ast::{Forms, SExp};

/// Inject typed `:files [{...}]` AND `:symlinks [{:path :target} …]`
/// slots into a Forms tree. Mutates the first form's SExp::List (the
/// `(defcaixa …)` call), appending the keyword + vector pairs before
/// the closing paren. Per the operator's "caixas are typed lisp"
/// directive — files + symlinks live INSIDE the lisp source.
pub fn inject_files_into_forms(forms: Forms, files: &[CapturedFile]) -> Forms {
    inject_files_and_symlinks_into_forms(forms, files, &[])
}

pub fn inject_files_and_symlinks_into_forms(
    forms: Forms,
    files: &[CapturedFile],
    symlinks: &[crate::file_capture::CapturedSymlink],
) -> Forms {
    inject_all_captures_into_forms(forms, files, symlinks, &[])
}

/// Full typed-capture injector: :files + :symlinks + :binaries. Used
/// when --include-binaries flag is set; otherwise binaries stays empty.
pub fn inject_all_captures_into_forms(
    forms: Forms,
    files: &[CapturedFile],
    symlinks: &[crate::file_capture::CapturedSymlink],
    binaries: &[crate::file_capture::CapturedBinary],
) -> Forms {
    if files.is_empty() && symlinks.is_empty() && binaries.is_empty() {
        return forms;
    }
    let Forms(items) = forms;
    let out_items: Vec<SExp> = items.into_iter().map(|form| match form {
        SExp::List(mut inner) => {
            if !files.is_empty() {
                let files_vec: Vec<SExp> = files.iter().map(|f| {
                    SExp::Map(vec![
                        (SExp::kw("path"),   SExp::str(&f.path)),
                        (SExp::kw("sha256"), SExp::str(&f.sha256)),
                        (SExp::kw("size"),   SExp::sym(f.size.to_string())),
                        (SExp::kw("body"),   SExp::str(&f.body)),
                    ])
                }).collect();
                inner.push(SExp::kw("files"));
                inner.push(SExp::Vector(files_vec));
            }
            if !symlinks.is_empty() {
                let links_vec: Vec<SExp> = symlinks.iter().map(|s| {
                    SExp::Map(vec![
                        (SExp::kw("path"),   SExp::str(&s.path)),
                        (SExp::kw("target"), SExp::str(&s.target)),
                    ])
                }).collect();
                inner.push(SExp::kw("symlinks"));
                inner.push(SExp::Vector(links_vec));
            }
            if !binaries.is_empty() {
                let bins_vec: Vec<SExp> = binaries.iter().map(|b| {
                    SExp::Map(vec![
                        (SExp::kw("path"),   SExp::str(&b.path)),
                        (SExp::kw("sha256"), SExp::str(&b.sha256)),
                        (SExp::kw("size"),   SExp::sym(b.size.to_string())),
                        (SExp::kw("base64"), SExp::str(&b.base64)),
                    ])
                }).collect();
                inner.push(SExp::kw("binaries"));
                inner.push(SExp::Vector(bins_vec));
            }
            SExp::List(inner)
        }
        other => other,
    }).collect();
    Forms(out_items)
}

#[derive(Debug, Clone)]
pub struct EatReport {
    pub source_path: PathBuf,
    pub ecosystem: Option<String>,
    pub caixa_name: String,
    pub caixa_lisp_path: PathBuf,
    pub files_manifest_path: PathBuf,
    pub rendered_path: PathBuf,
    pub captured_file_count: usize,
    pub captured_bytes: usize,
    pub rendered_artifact_count: usize,
    pub restored_file_count: usize,
}

impl EatReport {
    pub fn to_json(&self) -> String {
        use crate::json_ast::Value;
        let mut o = Value::obj();
        o.insert("source", Value::s(self.source_path.to_string_lossy().to_string()));
        if let Some(eco) = &self.ecosystem {
            o.insert("ecosystem", Value::s(eco));
        }
        o.insert("caixa-name", Value::s(&self.caixa_name));
        o.insert("caixa-lisp", Value::s(self.caixa_lisp_path.to_string_lossy().to_string()));
        o.insert("files-manifest", Value::s(self.files_manifest_path.to_string_lossy().to_string()));
        o.insert("rendered", Value::s(self.rendered_path.to_string_lossy().to_string()));
        o.insert("captured-files", Value::i(self.captured_file_count as i64));
        o.insert("captured-bytes", Value::i(self.captured_bytes as i64));
        o.insert("rendered-artifacts", Value::i(self.rendered_artifact_count as i64));
        o.insert("restored-files", Value::i(self.restored_file_count as i64));
        crate::json_ast::render(&o)
    }
}

/// Top-level eat operation. Composes existing substrate primitives:
///   1. discover::detect    — identify ecosystem
///   2. reverse::reverse_from_path → typed (defcaixa …)
///   3. file_capture::capture → typed CapturedFile list
///   4. caixa::render → manifest scaffold (workflows, nix modules, etc)
///   5. file_capture::restore → original source overlaid on the scaffold
///
/// Step ordering: substrate's CI files (auto-release.yml etc) are
/// emitted FIRST, then captured files overlay them — but the
/// auto-release.yml renderer uses force=false, so any captured
/// auto-release.yml from the original repo wins. This is intentional
/// — when consuming a pleme-io repo that already has the canonical
/// pipeline, we keep the canonical pipeline; when consuming a foreign
/// repo, the substrate's pipeline wins (no upstream auto-release.yml).
pub fn eat(
    source: &Path,
    out: &Path,
    capture_cfg: &crate::file_capture::CaptureConfig,
) -> Result<EatReport> {
    use crate::ast::Render;
    std::fs::create_dir_all(out)?;

    // Step 1 — discover
    let detected = crate::discover::detect(source)
        .ok_or_else(|| anyhow!("no ecosystem detected at {}", source.display()))?;
    let caixa_name = detected.name.clone();
    let ecosystem = detected.ecosystem.to_string();

    // Step 2 — reverse (typed manifest Forms only, no files yet)
    let forms = crate::reverse::reverse_from_path(source)?;

    // Step 3 — capture files
    let cap = crate::file_capture::capture(source, capture_cfg)?;

    // Step 4 — inject :files + :symlinks + :binaries into the lisp
    // Forms and write the CANONICAL caixa.lisp. caixa::render reads
    // all three back + restores byte-identical files / symlinks /
    // base64-decoded binaries.
    let forms_with_files = inject_all_captures_into_forms(
        forms, &cap.files, &cap.symlinks, &cap.binaries);
    let caixa_lisp_path = out.join(format!("{caixa_name}.caixa.lisp"));
    let lisp_src = forms_with_files.render();
    std::fs::write(&caixa_lisp_path, &lisp_src)?;
    // files_manifest_path retained in EatReport for backward-compat —
    // the canonical .caixa.lisp now contains the same information
    // typed. Points operators to the .caixa.lisp itself.
    let files_manifest_path = caixa_lisp_path.clone();

    // Step 4 — render manifest scaffold
    let rendered_path = out.join(format!("{caixa_name}-rendered"));
    std::fs::create_dir_all(&rendered_path)?;
    let rendered = crate::caixa::render(&lisp_src, &rendered_path, true)?;

    // Step 5 — restore captured files (overlay). The render's
    // force=true above means substrate-scaffolded files were written;
    // restore now writes original files at their relative paths,
    // overwriting substrate stubs where conflict (operator's actual
    // source wins over scaffolded smoke tests). Substrate files NOT
    // in the captured set (auto-release.yml, .pleme-io-release.toml,
    // nix/modules/*) are preserved.
    let restored = crate::file_capture::restore(&rendered_path, &cap.files)?;

    // Step 6 — embed the canonical .caixa.lisp INSIDE the rendered
    // dir so it ships with the repo. The typed source travels with
    // the artifact; downstream caixa-deps-resolve can find it at
    // the cloned repo's root.
    let in_repo_lisp = rendered_path.join(format!("{caixa_name}.caixa.lisp"));
    std::fs::write(&in_repo_lisp, &lisp_src)?;

    Ok(EatReport {
        source_path: source.to_path_buf(),
        ecosystem: Some(ecosystem),
        caixa_name,
        caixa_lisp_path,
        files_manifest_path,
        rendered_path,
        captured_file_count: cap.files.len(),
        captured_bytes: cap.total_bytes,
        rendered_artifact_count: rendered.len(),
        restored_file_count: restored.len(),
    })
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;

    fn mk_repo(files: &[(&str, &str)]) -> tempdir::TempDir {
        let tmp = tempdir::TempDir::new("eat-src").unwrap();
        for (rel, body) in files {
            let p = tmp.path().join(rel);
            if let Some(parent) = p.parent() { fs::create_dir_all(parent).unwrap(); }
            fs::write(&p, body).unwrap();
        }
        tmp
    }

    #[test]
    fn eat_rust_crate_produces_caixa_files_manifest_rendered() {
        let src = mk_repo(&[
            ("Cargo.toml",
             "[package]\nname = \"my-crate\"\nversion = \"1.0.0\"\nlicense = \"MIT\"\ndescription = \"x\"\n"),
            ("src/lib.rs", "// original code\n#[test] fn original_test() {}"),
        ]);
        let out = tempdir::TempDir::new("eat-out").unwrap();
        let cfg = crate::file_capture::CaptureConfig::default();
        let report = eat(src.path(), out.path(), &cfg).unwrap();

        // All three substrate outputs present.
        assert!(report.caixa_lisp_path.is_file());
        assert!(report.files_manifest_path.is_file());
        assert!(report.rendered_path.is_dir());

        // Captured both files.
        assert_eq!(report.captured_file_count, 2);

        // Rendered dir contains substrate's CI scaffolding.
        assert!(report.rendered_path.join(".github/workflows/auto-release.yml").is_file());

        // Restored ORIGINAL src/lib.rs (substrate stub overwritten by capture).
        let restored_lib = fs::read_to_string(report.rendered_path.join("src/lib.rs")).unwrap();
        assert!(restored_lib.contains("original code"),
            "expected restored original; got: {restored_lib}");

        // Restored ORIGINAL Cargo.toml (preserved with my-crate metadata).
        let restored_cargo = fs::read_to_string(report.rendered_path.join("Cargo.toml")).unwrap();
        assert!(restored_cargo.contains("name = \"my-crate\""));
    }

    #[test]
    fn eat_embeds_files_in_caixa_lisp() {
        // Per typed-source directive: :files [{...}] lives INSIDE the
        // .caixa.lisp, not in a JSON sidecar. files_manifest_path
        // now points at the .caixa.lisp itself.
        let src = mk_repo(&[
            ("Cargo.toml", "[package]\nname = \"x\"\n"),
            ("README.md", "# Title"),
        ]);
        let out = tempdir::TempDir::new("eat-out").unwrap();
        let cfg = crate::file_capture::CaptureConfig::default();
        let report = eat(src.path(), out.path(), &cfg).unwrap();
        assert_eq!(report.files_manifest_path, report.caixa_lisp_path,
            "manifest path should point at the canonical .caixa.lisp");
        let lisp = fs::read_to_string(&report.caixa_lisp_path).unwrap();
        assert!(lisp.contains(":files"),
            "expected :files slot in lisp source; got:\n{lisp}");
        assert!(lisp.contains("README.md"));
        assert!(lisp.contains("Cargo.toml"));
        assert!(lisp.contains(":sha256"));
    }

    #[test]
    fn eat_fails_on_undetectable_dir() {
        let src = mk_repo(&[("just-data.txt", "no manifest here")]);
        let out = tempdir::TempDir::new("eat-out").unwrap();
        let cfg = crate::file_capture::CaptureConfig::default();
        let err = eat(src.path(), out.path(), &cfg).unwrap_err();
        assert!(err.to_string().contains("no ecosystem detected"));
    }
}