pleme-doc-gen 0.1.40

Rust replacement for the M0 Python _gen-patterns.py + _gen-docs.py scripts in pleme-io/actions. Walks every action.yml + emits substrate's patterns-full.nix + per-action README.md + root catalog. Per the NO-SHELL prime directive.
//! consume-gh-org — bulk absorb a GitHub org's repos as typed caixas.
//!
//! Lists an org's public repos via `gh repo list`, shallow-clones
//! each to a temp work directory, runs `discover + reverse` on each
//! clone, emits one .caixa.lisp per recognized repo + an aggregate
//! OrgReport. Optionally also renders each .caixa.lisp into a typed
//! scaffold dir.
//!
//! The most automated consumption-feat the substrate ships:
//!
//!   pleme-doc-gen consume-gh-org \
//!     --org pleme-io --limit 100 \
//!     --out ./typed-pleme-io
//!
//! One CLI call → N typed .caixa.lisp sources covering every repo
//! in the org. Composes the existing reverse + caixa::render primitives
//! over a network-fetched corpus.
//!
//! Per the ★★ CLOSED-LOOP MASS-SYNTHESIS directive Rule 2 (closed-
//! loop primitive composition at the CLI layer): the operator wraps
//! gh-CLI clone + consume into ONE substrate verb; agents can drive
//! org-scale absorption deterministically.

use anyhow::{anyhow, Result};
use std::path::{Path, PathBuf};

/// Per-repo outcome of the org-consume pass.
#[derive(Debug, Clone)]
pub struct RepoOutcome {
    pub slug: String,
    pub ecosystem: Option<String>,
    pub caixa_path: Option<PathBuf>,
    pub rendered_path: Option<PathBuf>,
    pub artifact_count: usize,
    pub error: Option<String>,
    /// Per-repo fidelity (original-vs-rendered manifest fields). Set
    /// only when consume_org() ran with `measure: true` AND the repo
    /// was successfully cloned + rendered.
    pub fidelity: Option<crate::fidelity::FidelityReport>,
    /// Per-repo render-health (manifest + workflow + test presence).
    /// Set only when consume_org() ran with `verify_rendered: true`.
    pub render_health: Option<crate::render_health::RenderHealth>,
}

#[derive(Debug, Default, Clone)]
pub struct OrgReport {
    pub org: String,
    pub listed: usize,
    pub consumed: usize,
    pub skipped: usize,
    pub failed: usize,
    pub outcomes: Vec<RepoOutcome>,
    /// Aggregate fidelity stats across every measured repo. Zero when
    /// consume_org() ran without `measure: true`.
    pub fidelity_perfect_total: usize,
    pub fidelity_lossy_total: usize,
    pub fidelity_gap_total: usize,
    pub fidelity_measured: usize,
    /// Per-ecosystem breakdown — how many of each ecosystem were
    /// measured, and how many perfect fields each contributed.
    /// Enables operators to spot which extractor needs work.
    pub fidelity_by_ecosystem: std::collections::BTreeMap<String, EcosystemFidelity>,
    /// Aggregate render-health: count of pipeline-ready scaffolds.
    pub health_pipeline_ready: usize,
    pub health_measured: usize,
}

#[derive(Debug, Default, Clone)]
pub struct EcosystemFidelity {
    pub measured: usize,
    pub perfect: usize,
    pub lossy: usize,
    pub gap: usize,
}

impl OrgReport {
    /// Aggregate score across every measured repo, in permille
    /// (0-1000). None when nothing was measured.
    pub fn aggregate_score_permille(&self) -> Option<i64> {
        if self.fidelity_measured == 0 { return None; }
        let total = self.fidelity_perfect_total
            + self.fidelity_lossy_total
            + self.fidelity_gap_total;
        if total == 0 { return Some(1000); }
        Some(((self.fidelity_perfect_total as f64 / total as f64) * 1000.0).round() as i64)
    }
}

impl OrgReport {
    pub fn to_json(&self) -> String {
        use crate::json_ast::Value;
        let mut root = Value::obj();
        root.insert("org", Value::s(&self.org));
        root.insert("listed", Value::i(self.listed as i64));
        root.insert("consumed", Value::i(self.consumed as i64));
        root.insert("skipped", Value::i(self.skipped as i64));
        root.insert("failed", Value::i(self.failed as i64));
        if self.fidelity_measured > 0 {
            root.insert("fidelity-measured", Value::i(self.fidelity_measured as i64));
            root.insert("fidelity-perfect", Value::i(self.fidelity_perfect_total as i64));
            root.insert("fidelity-lossy", Value::i(self.fidelity_lossy_total as i64));
            root.insert("fidelity-gap", Value::i(self.fidelity_gap_total as i64));
            if let Some(s) = self.aggregate_score_permille() {
                root.insert("fidelity-score-permille", Value::i(s));
            }
        }
        if self.health_measured > 0 {
            root.insert("health-measured", Value::i(self.health_measured as i64));
            root.insert("health-pipeline-ready", Value::i(self.health_pipeline_ready as i64));
            let score = ((self.health_pipeline_ready as f64
                / self.health_measured as f64) * 1000.0).round() as i64;
            root.insert("health-score-permille", Value::i(score));
            // Per-ecosystem breakdown — surfaces which extractors are
            // strongest + which need work.
            let mut by_eco = Value::obj();
            for (eco, f) in &self.fidelity_by_ecosystem {
                let mut row = Value::obj();
                row.insert("measured", Value::i(f.measured as i64));
                row.insert("perfect", Value::i(f.perfect as i64));
                row.insert("lossy", Value::i(f.lossy as i64));
                row.insert("gap", Value::i(f.gap as i64));
                let total = f.perfect + f.lossy + f.gap;
                let score = if total == 0 { 1000_i64 }
                            else { ((f.perfect as f64 / total as f64) * 1000.0).round() as i64 };
                row.insert("score-permille", Value::i(score));
                by_eco.insert(eco, row);
            }
            root.insert("fidelity-by-ecosystem", by_eco);
        }
        let outcomes: Vec<Value> = self.outcomes.iter().map(|o| {
            let mut row = Value::obj();
            row.insert("slug", Value::s(&o.slug));
            if let Some(eco) = &o.ecosystem { row.insert("ecosystem", Value::s(eco)); }
            if let Some(p) = &o.caixa_path {
                row.insert("caixa", Value::s(p.to_string_lossy().to_string()));
            }
            if let Some(p) = &o.rendered_path {
                row.insert("rendered", Value::s(p.to_string_lossy().to_string()));
            }
            if o.artifact_count > 0 {
                row.insert("artifacts", Value::i(o.artifact_count as i64));
            }
            if let Some(f) = &o.fidelity {
                row.insert("fidelity-perfect", Value::i(f.perfect_count as i64));
                row.insert("fidelity-lossy", Value::i(f.lossy_count as i64));
                row.insert("fidelity-gap", Value::i(f.gap_count as i64));
                row.insert("fidelity-score-permille",
                    Value::i((f.score() * 1000.0).round() as i64));
            }
            if let Some(h) = &o.render_health {
                row.insert("health-manifest", Value::b(h.has_manifest));
                row.insert("health-auto-release", Value::b(h.has_auto_release));
                row.insert("health-workflow-count", Value::i(h.workflow_count as i64));
                row.insert("health-test-count", Value::i(h.test_count as i64));
                row.insert("health-pipeline-ready", Value::b(h.is_pipeline_ready()));
                row.insert("health-score-permille", Value::i(h.score_permille()));
            }
            if let Some(e) = &o.error { row.insert("error", Value::s(e)); }
            row
        }).collect();
        root.insert("outcomes", Value::Array(outcomes));
        crate::json_ast::render(&root)
    }
}

/// List the org's repos via `gh repo list <org> --json fullName --limit N`.
/// Filters out archived repos by default (caller can override).
pub fn list_org_repos(org: &str, limit: usize, include_archived: bool) -> Result<Vec<String>> {
    let limit_s = limit.to_string();
    let mut args: Vec<&str> = vec!["repo", "list", org,
        "--json", "name,isArchived", "--jq",
        if include_archived { ".[] | .name" } else { ".[] | select(.isArchived == false) | .name" },
        "--limit", &limit_s];
    let _ = include_archived; // already inlined into jq
    args.retain(|a| !a.is_empty());
    let out = std::process::Command::new("gh")
        .args(&args)
        .output()
        .map_err(|e| anyhow!("gh repo list failed: {e}"))?;
    if !out.status.success() {
        let stderr = String::from_utf8_lossy(&out.stderr);
        return Err(anyhow!("gh repo list non-zero: {stderr}"));
    }
    let text = String::from_utf8_lossy(&out.stdout);
    let names: Vec<String> = text.lines()
        .filter(|l| !l.is_empty())
        .map(|l| format!("{org}/{l}"))
        .collect();
    Ok(names)
}

/// Shallow-clone `slug` into `work_dir/<repo>`. Returns the clone path.
fn shallow_clone(slug: &str, work_dir: &Path) -> Result<PathBuf> {
    let repo_name = slug.rsplit('/').next().unwrap_or(slug);
    let target = work_dir.join(repo_name);
    if target.is_dir() {
        return Ok(target); // already cloned (re-run)
    }
    let url = format!("https://github.com/{slug}.git");
    let st = std::process::Command::new("git")
        .args(["clone", "--depth", "1", "--quiet", &url, target.to_str().unwrap()])
        .status()
        .map_err(|e| anyhow!("git clone {slug}: {e}"))?;
    if !st.success() {
        return Err(anyhow!("git clone {slug} returned non-zero"));
    }
    Ok(target)
}

/// Top-level consume-gh-org operation. Returns OrgReport even when
/// individual repos fail.
///
/// `work_dir` is where shallow clones land; `out` is where the
/// .caixa.lisp + (optionally) rendered scaffolds go.
pub fn consume_org(
    org: &str,
    out: &Path,
    work_dir: &Path,
    limit: usize,
    render_too: bool,
    include_archived: bool,
    measure: bool,
    verify_rendered: bool,
) -> Result<OrgReport> {
    use crate::ast::Render;
    std::fs::create_dir_all(out)?;
    std::fs::create_dir_all(work_dir)?;

    let slugs = list_org_repos(org, limit, include_archived)?;
    let mut report = OrgReport { org: org.to_string(), listed: slugs.len(),
        ..Default::default() };

    for slug in &slugs {
        let mut outcome = RepoOutcome {
            slug: slug.clone(), ecosystem: None, caixa_path: None,
            rendered_path: None, artifact_count: 0, error: None,
            fidelity: None, render_health: None,
        };

        // Step 1 — shallow clone.
        let clone_path = match shallow_clone(slug, work_dir) {
            Ok(p) => p,
            Err(e) => {
                outcome.error = Some(format!("clone: {e}"));
                report.failed += 1;
                report.outcomes.push(outcome);
                continue;
            }
        };

        // Step 2 — discover.
        let detected = crate::discover::detect(&clone_path);
        outcome.ecosystem = detected.as_ref().map(|d| d.ecosystem.to_string());
        if detected.is_none() {
            outcome.error = Some("no ecosystem detected".into());
            report.skipped += 1;
            report.outcomes.push(outcome);
            continue;
        }

        // Step 3 — reverse + write .caixa.lisp.
        let forms = match crate::reverse::reverse_from_path(&clone_path) {
            Ok(f) => f,
            Err(e) => {
                outcome.error = Some(format!("reverse: {e}"));
                report.failed += 1;
                report.outcomes.push(outcome);
                continue;
            }
        };
        let repo_name = slug.rsplit('/').next().unwrap_or(slug);
        let caixa_path = out.join(format!("{repo_name}.caixa.lisp"));
        if let Err(e) = std::fs::write(&caixa_path, forms.render()) {
            outcome.error = Some(format!("write caixa: {e}"));
            report.failed += 1;
            report.outcomes.push(outcome);
            continue;
        }
        outcome.caixa_path = Some(caixa_path.clone());

        // Step 4 (optional) — render to a typed scaffold dir.
        if render_too {
            let rendered = out.join(format!("{repo_name}-rendered"));
            if let Err(e) = std::fs::create_dir_all(&rendered) {
                outcome.error = Some(format!("mkdir rendered: {e}"));
                report.failed += 1;
                report.outcomes.push(outcome);
                continue;
            }
            let src = forms.render();
            match crate::caixa::render(&src, &rendered, true) {
                Ok(files) => {
                    outcome.rendered_path = Some(rendered.clone());
                    outcome.artifact_count = files.len();
                    // Measure fidelity if requested. Only fires when
                    // render succeeds; per-repo failures are recorded
                    // as non-fatal so the org pass continues.
                    if measure {
                        match crate::fidelity::measure(&clone_path, &rendered) {
                            Ok(f) => {
                                report.fidelity_perfect_total += f.perfect_count;
                                report.fidelity_lossy_total += f.lossy_count;
                                report.fidelity_gap_total += f.gap_count;
                                report.fidelity_measured += 1;
                                if let Some(eco) = &f.ecosystem {
                                    let row = report.fidelity_by_ecosystem
                                        .entry(eco.clone()).or_default();
                                    row.measured += 1;
                                    row.perfect += f.perfect_count;
                                    row.lossy += f.lossy_count;
                                    row.gap += f.gap_count;
                                }
                                outcome.fidelity = Some(f);
                            }
                            Err(e) => {
                                outcome.error = Some(format!("measure: {e}"));
                            }
                        }
                    }
                    if verify_rendered {
                        if let Some(eco) = outcome.ecosystem.as_deref() {
                            let h = crate::render_health::check(&rendered, eco);
                            if h.is_pipeline_ready() {
                                report.health_pipeline_ready += 1;
                            }
                            report.health_measured += 1;
                            outcome.render_health = Some(h);
                        }
                    }
                }
                Err(e) => {
                    outcome.error = Some(format!("render: {e}"));
                    report.failed += 1;
                    report.outcomes.push(outcome);
                    continue;
                }
            }
        }

        report.consumed += 1;
        report.outcomes.push(outcome);
    }

    Ok(report)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn report_json_renders_typed_consumed_count() {
        let r = OrgReport {
            org: "test-org".into(), listed: 5, consumed: 3, skipped: 1, failed: 1,
            outcomes: vec![RepoOutcome {
                slug: "test-org/repo-a".into(),
                ecosystem: Some("rust-single-crate".into()),
                caixa_path: Some(PathBuf::from("/x/repo-a.caixa.lisp")),
                rendered_path: None, artifact_count: 0, error: None,
                fidelity: None, render_health: None,
            }],
            fidelity_perfect_total: 0, fidelity_lossy_total: 0,
            fidelity_gap_total: 0, fidelity_measured: 0,
            fidelity_by_ecosystem: std::collections::BTreeMap::new(),
            health_pipeline_ready: 0, health_measured: 0,
        };
        let j = r.to_json();
        assert!(j.contains("\"org\": \"test-org\""));
        assert!(j.contains("\"listed\": 5"));
        assert!(j.contains("\"consumed\": 3"));
        assert!(j.contains("\"slug\": \"test-org/repo-a\""));
    }

    #[test]
    fn aggregate_score_permille_handles_no_measurements() {
        let r = OrgReport::default();
        assert_eq!(r.aggregate_score_permille(), None);
    }

    #[test]
    fn aggregate_score_permille_is_perfect_when_all_perfect() {
        let r = OrgReport {
            fidelity_measured: 3, fidelity_perfect_total: 12,
            fidelity_lossy_total: 0, fidelity_gap_total: 0,
            ..Default::default()
        };
        assert_eq!(r.aggregate_score_permille(), Some(1000));
    }

    #[test]
    fn aggregate_score_permille_handles_partial_perfection() {
        let r = OrgReport {
            fidelity_measured: 2, fidelity_perfect_total: 6,
            fidelity_lossy_total: 1, fidelity_gap_total: 1,
            ..Default::default()
        };
        // 6 perfect / (6+1+1) = 0.75 → 750 permille
        assert_eq!(r.aggregate_score_permille(), Some(750));
    }
}