pleme-doc-gen 0.1.41

Rust replacement for the M0 Python _gen-patterns.py + _gen-docs.py scripts in pleme-io/actions. Walks every action.yml + emits substrate's patterns-full.nix + per-action README.md + root catalog. Per the NO-SHELL prime directive.
//! consume-monorepo — typed mass-absorption primitive.
//!
//! Walks subdirectories of an arbitrary monorepo path, dispatches
//! discover + reverse per subdir, optionally renders the resulting
//! (defcaixa …) sources to typed scaffolds. Collapses the operator's
//! 2-shell-loop dogfood pattern into ONE substrate call.
//!
//! Operator-facing path:
//!
//!   pleme-doc-gen consume-monorepo \
//!     --path ~/code/github/pleme-io/actions \
//!     --out  ./caixas/
//!
//! Substrate behaviour:
//!   1. For each direct subdir of <path> (skipping hidden + caller-listed):
//!      a. discover::detect → ecosystem keyword
//!      b. reverse::reverse_from_path → typed (defcaixa …) Forms
//!      c. ast::emit to <out>/<subdir-name>.caixa.lisp
//!   2. If --render-too: caixa::render each .caixa.lisp into
//!      <out>/<subdir-name>-rendered/
//!   3. Emit typed JSON ConsumeReport summarising successes + failures
//!
//! Per the ★★ CLOSED-LOOP MASS-SYNTHESIS directive Rule 2 (closed-
//! loop primitive composition at the CLI layer): the operator wraps
//! N existing primitives (discover, reverse, caixa::render) into one
//! typed verb via this module; the substrate stays composable rather
//! than monolithic.

use anyhow::Result;
use std::path::{Path, PathBuf};

/// Per-subdir outcome of the consume pass.
#[derive(Debug, Clone)]
pub struct SubdirOutcome {
    pub name: String,
    pub ecosystem: Option<String>,
    pub caixa_path: Option<PathBuf>,
    pub rendered_path: Option<PathBuf>,
    pub artifact_count: usize,
    pub error: Option<String>,
    /// Fidelity report from comparing the original subdir against the
    /// freshly-rendered scaffold. Populated only when consume() is
    /// called with `measure: true` AND the subdir was successfully
    /// rendered. Per-field perfect/lossy/gap status drives the
    /// substrate's typed-quality compass.
    pub fidelity: Option<crate::fidelity::FidelityReport>,
}

/// Aggregate report of the consume pass.
#[derive(Debug, Default, Clone)]
pub struct ConsumeReport {
    pub root: PathBuf,
    pub consumed: usize,
    pub skipped: usize,
    pub failed: usize,
    pub outcomes: Vec<SubdirOutcome>,
    /// Aggregate fidelity stats across every measured subdir. Zero
    /// when consume() ran without `measure: true`.
    pub fidelity_perfect_total: usize,
    pub fidelity_lossy_total: usize,
    pub fidelity_gap_total: usize,
    pub fidelity_measured: usize,
}

impl ConsumeReport {
    /// Aggregate score across every measured subdir, expressed in
    /// permille (0–1000). Returns None when nothing was measured.
    pub fn aggregate_score_permille(&self) -> Option<i64> {
        if self.fidelity_measured == 0 { return None; }
        let total = self.fidelity_perfect_total
            + self.fidelity_lossy_total
            + self.fidelity_gap_total;
        if total == 0 { return Some(1000); }
        Some(((self.fidelity_perfect_total as f64 / total as f64) * 1000.0).round() as i64)
    }
}

impl ConsumeReport {
    /// Render the report as typed JSON via json_ast. No format!() of
    /// JSON syntax — dogfoods the prime directive at the report layer.
    pub fn to_json(&self) -> String {
        use crate::json_ast::Value;
        let mut root = Value::obj();
        root.insert("root", Value::s(self.root.to_string_lossy().to_string()));
        root.insert("consumed", Value::i(self.consumed as i64));
        root.insert("skipped", Value::i(self.skipped as i64));
        root.insert("failed", Value::i(self.failed as i64));
        if self.fidelity_measured > 0 {
            root.insert("fidelity-measured", Value::i(self.fidelity_measured as i64));
            root.insert("fidelity-perfect", Value::i(self.fidelity_perfect_total as i64));
            root.insert("fidelity-lossy", Value::i(self.fidelity_lossy_total as i64));
            root.insert("fidelity-gap", Value::i(self.fidelity_gap_total as i64));
            if let Some(s) = self.aggregate_score_permille() {
                root.insert("fidelity-score-permille", Value::i(s));
            }
        }
        let outcomes: Vec<Value> = self.outcomes.iter().map(|o| {
            let mut row = Value::obj();
            row.insert("name", Value::s(&o.name));
            if let Some(eco) = &o.ecosystem { row.insert("ecosystem", Value::s(eco)); }
            if let Some(p) = &o.caixa_path {
                row.insert("caixa", Value::s(p.to_string_lossy().to_string()));
            }
            if let Some(p) = &o.rendered_path {
                row.insert("rendered", Value::s(p.to_string_lossy().to_string()));
            }
            if o.artifact_count > 0 {
                row.insert("artifacts", Value::i(o.artifact_count as i64));
            }
            if let Some(f) = &o.fidelity {
                row.insert("fidelity-perfect", Value::i(f.perfect_count as i64));
                row.insert("fidelity-lossy", Value::i(f.lossy_count as i64));
                row.insert("fidelity-gap", Value::i(f.gap_count as i64));
                row.insert("fidelity-score-permille",
                    Value::i((f.score() * 1000.0).round() as i64));
            }
            if let Some(e) = &o.error { row.insert("error", Value::s(e)); }
            row
        }).collect();
        root.insert("outcomes", Value::Array(outcomes));
        crate::json_ast::render(&root)
    }
}

/// Walk subdirs of `path`; consume each one. Returns the typed
/// ConsumeReport regardless of per-subdir failures.
///
/// `skip` is a list of subdir names to ignore (e.g. "_tlisp-stdlib"
/// in pleme-io/actions). Hidden dirs (starting with '.') are always
/// skipped.
pub fn consume(
    path: &Path,
    out: &Path,
    skip: &[String],
    render_too: bool,
    measure: bool,
) -> Result<ConsumeReport> {
    use crate::ast::Render;
    std::fs::create_dir_all(out)?;
    let mut report = ConsumeReport {
        root: path.to_path_buf(),
        ..Default::default()
    };
    let entries = std::fs::read_dir(path)?;
    let mut subdirs: Vec<PathBuf> = entries.filter_map(|e| e.ok())
        .filter(|e| e.file_type().map(|t| t.is_dir()).unwrap_or(false))
        .map(|e| e.path())
        .filter(|p| {
            let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
            !name.starts_with('.') && !skip.iter().any(|s| s == name)
        })
        .collect();
    subdirs.sort();

    for subdir in &subdirs {
        let name = subdir.file_name()
            .and_then(|n| n.to_str())
            .unwrap_or("")
            .to_string();

        // Step 1 — discover. Subdirs with no recognized manifest get
        // counted as skipped, not failed.
        let detected = crate::discover::detect(subdir);
        let ecosystem = detected.as_ref().map(|d| d.ecosystem.to_string());
        if detected.is_none() {
            report.skipped += 1;
            report.outcomes.push(SubdirOutcome {
                name, ecosystem: None, caixa_path: None, rendered_path: None,
                artifact_count: 0, error: Some("no ecosystem detected".into()),
                fidelity: None,
            });
            continue;
        }

        // Step 2 — reverse + write the .caixa.lisp.
        let mut outcome = SubdirOutcome {
            name: name.clone(), ecosystem, caixa_path: None,
            rendered_path: None, artifact_count: 0, error: None,
            fidelity: None,
        };
        let forms = match crate::reverse::reverse_from_path(subdir) {
            Ok(f) => f,
            Err(e) => {
                outcome.error = Some(format!("reverse: {e}"));
                report.failed += 1;
                report.outcomes.push(outcome);
                continue;
            }
        };
        let mut caixa_path = out.to_path_buf();
        caixa_path.push(format!("{name}.caixa.lisp"));
        if let Err(e) = std::fs::write(&caixa_path, forms.render()) {
            outcome.error = Some(format!("write caixa: {e}"));
            report.failed += 1;
            report.outcomes.push(outcome);
            continue;
        }
        outcome.caixa_path = Some(caixa_path.clone());

        // Step 3 (optional) — render the .caixa.lisp to a scaffold.
        if render_too {
            let rendered_dir = out.join(format!("{name}-rendered"));
            if let Err(e) = std::fs::create_dir_all(&rendered_dir) {
                outcome.error = Some(format!("mkdir rendered: {e}"));
                report.failed += 1;
                report.outcomes.push(outcome);
                continue;
            }
            let src = forms.render();
            match crate::caixa::render(&src, &rendered_dir, true) {
                Ok(files) => {
                    outcome.rendered_path = Some(rendered_dir.clone());
                    outcome.artifact_count = files.len();
                    // Measure fidelity if requested. Only possible
                    // when render succeeded — the rendered dir is the
                    // comparison target.
                    if measure {
                        match crate::fidelity::measure(subdir, &rendered_dir) {
                            Ok(f) => {
                                report.fidelity_perfect_total += f.perfect_count;
                                report.fidelity_lossy_total += f.lossy_count;
                                report.fidelity_gap_total += f.gap_count;
                                report.fidelity_measured += 1;
                                outcome.fidelity = Some(f);
                            }
                            Err(e) => {
                                // Non-fatal — record the measure error
                                // in the outcome's error slot but keep
                                // counting the subdir as consumed.
                                outcome.error = Some(format!("measure: {e}"));
                            }
                        }
                    }
                }
                Err(e) => {
                    outcome.error = Some(format!("render: {e}"));
                    report.failed += 1;
                    report.outcomes.push(outcome);
                    continue;
                }
            }
        }

        report.consumed += 1;
        report.outcomes.push(outcome);
    }

    Ok(report)
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;

    fn mk_monorepo(files: &[(&str, &str)]) -> tempdir::TempDir {
        let tmp = tempdir::TempDir::new("mono").expect("tempdir");
        for (path, body) in files {
            let p = tmp.path().join(path);
            if let Some(parent) = p.parent() { fs::create_dir_all(parent).unwrap(); }
            fs::write(&p, body).unwrap();
        }
        tmp
    }

    #[test]
    fn consume_one_rust_subdir_emits_caixa_file() {
        let mono = mk_monorepo(&[
            ("crate-a/Cargo.toml", "[package]\nname = \"crate-a\"\nversion = \"0.1.0\"\n"),
        ]);
        let out = tempdir::TempDir::new("out").unwrap();
        let report = consume(mono.path(), out.path(), &[], false, false).unwrap();
        assert_eq!(report.consumed, 1);
        assert_eq!(report.skipped, 0);
        assert_eq!(report.failed, 0);
        assert!(out.path().join("crate-a.caixa.lisp").is_file());
    }

    #[test]
    fn consume_skips_subdir_without_manifest() {
        let mono = mk_monorepo(&[
            ("crate-a/Cargo.toml", "[package]\nname = \"crate-a\"\n"),
            ("docs/README.md", "no manifest\n"),
        ]);
        let out = tempdir::TempDir::new("out").unwrap();
        let report = consume(mono.path(), out.path(), &[], false, false).unwrap();
        assert_eq!(report.consumed, 1);
        assert_eq!(report.skipped, 1);
        assert!(out.path().join("crate-a.caixa.lisp").is_file());
        assert!(!out.path().join("docs.caixa.lisp").is_file());
    }

    #[test]
    fn consume_honors_skip_list() {
        let mono = mk_monorepo(&[
            ("a/Cargo.toml", "[package]\nname = \"a\"\n"),
            ("b/Cargo.toml", "[package]\nname = \"b\"\n"),
        ]);
        let out = tempdir::TempDir::new("out").unwrap();
        let skip = vec!["b".to_string()];
        let report = consume(mono.path(), out.path(), &skip, false, false).unwrap();
        assert_eq!(report.consumed, 1);
        assert!(out.path().join("a.caixa.lisp").is_file());
        assert!(!out.path().join("b.caixa.lisp").is_file());
    }

    #[test]
    fn consume_with_render_too_emits_artifact_dirs() {
        let mono = mk_monorepo(&[
            ("crate-a/Cargo.toml", "[package]\nname = \"crate-a\"\n"),
        ]);
        let out = tempdir::TempDir::new("out").unwrap();
        let report = consume(mono.path(), out.path(), &[], true, false).unwrap();
        assert_eq!(report.consumed, 1);
        let outcome = &report.outcomes[0];
        assert!(outcome.rendered_path.is_some());
        assert!(outcome.artifact_count > 5,
            "expected ≥ 5 artifacts, got {}", outcome.artifact_count);
        let rendered = out.path().join("crate-a-rendered");
        assert!(rendered.join("Cargo.toml").is_file());
    }

    #[test]
    fn consume_with_measure_populates_aggregate_fidelity() {
        // Two perfect Rust crates → aggregate fidelity = 1000/1000.
        // Rust now compares 6 fields incl. keywords + categories (NA-
        // safe: both empty strings round-trip as Perfect).
        let mono = mk_monorepo(&[
            ("a/Cargo.toml",
             "[package]\nname = \"a\"\nversion = \"1.0\"\ndescription = \"d\"\nlicense = \"MIT\"\n"),
            ("b/Cargo.toml",
             "[package]\nname = \"b\"\nversion = \"2.0\"\ndescription = \"d\"\nlicense = \"MIT\"\n"),
        ]);
        let out = tempdir::TempDir::new("out").unwrap();
        let report = consume(mono.path(), out.path(), &[], true, true).unwrap();
        assert_eq!(report.consumed, 2);
        assert_eq!(report.fidelity_measured, 2);
        assert_eq!(report.fidelity_gap_total, 0);
        assert_eq!(report.fidelity_lossy_total, 0);
        assert_eq!(report.aggregate_score_permille(), Some(1000));
        // Per-subdir fidelity is recorded.
        for o in &report.outcomes {
            let f = o.fidelity.as_ref()
                .unwrap_or_else(|| panic!("{} missing fidelity", o.name));
            assert!(f.perfect_count >= 4, "{}: per-subdir perfect ≥4", o.name);
        }
        let json = report.to_json();
        assert!(json.contains("\"fidelity-measured\": 2"));
        assert!(json.contains("\"fidelity-score-permille\": 1000"));
    }

    #[test]
    fn report_json_renders_typed_consumed_count() {
        let r = ConsumeReport {
            root: PathBuf::from("/x"),
            consumed: 5, skipped: 1, failed: 2,
            outcomes: vec![SubdirOutcome {
                name: "a".into(), ecosystem: Some("rust-single-crate".into()),
                caixa_path: Some(PathBuf::from("/x/a.caixa.lisp")),
                rendered_path: None, artifact_count: 0, error: None,
                fidelity: None,
            }],
            fidelity_perfect_total: 0,
            fidelity_lossy_total: 0,
            fidelity_gap_total: 0,
            fidelity_measured: 0,
        };
        let j = r.to_json();
        assert!(j.contains("\"consumed\": 5"));
        assert!(j.contains("\"skipped\": 1"));
        assert!(j.contains("\"failed\": 2"));
        assert!(j.contains("\"name\": \"a\""));
        assert!(j.contains("\"ecosystem\": \"rust-single-crate\""));
    }
}