pleme-doc-gen 0.1.45

Rust replacement for the M0 Python _gen-patterns.py + _gen-docs.py scripts in pleme-io/actions. Walks every action.yml + emits substrate's patterns-full.nix + per-action README.md + root catalog. Per the NO-SHELL prime directive.
//! Typed GitHub-API client surface.
//!
//! Per the ★★ PRIME DIRECTIVE — trait-per-concern + ruthless
//! standardization. Every gh-API consumer in pleme-doc-gen flows
//! through ONE trait so:
//!   - tests can swap in a MockClient (no network, deterministic)
//!   - future consumers (PR creation, issue ops, file fetches for
//!     deeper discover signals) reuse the same surface
//!   - the gh CLI shell-out is contained to one impl
//!
//! Today only `list_root_filenames` + `fetch_file_text` are needed
//! (powering discover::detect_github_url). The trait will grow as
//! future consumers add new typed methods.

use std::path::Path;

/// Typed view over the GH API surface pleme-doc-gen needs. Failures
/// return None so callers can degrade cleanly when gh isn't installed
/// or the repo isn't accessible.
pub trait GithubClient {
    /// List the filenames in the root of `<slug>` (no recursion).
    /// Equivalent to `gh api repos/<slug>/contents --jq .[].name`.
    fn list_root_filenames(&self, slug: &str) -> Option<Vec<String>>;

    /// Fetch the text content of `<path>` from `<slug>`. Equivalent
    /// to `gh api repos/<slug>/contents/<path> --jq .content` +
    /// base64 decode.
    fn fetch_file_text(&self, slug: &str, path: &str) -> Option<String>;

    /// Run a GH search-repos query + return matching `owner/repo`
    /// slugs (up to `limit`). Equivalent to
    /// `gh search repos "<query>" --json fullName --jq .[].fullName --limit N`.
    /// Default impl returns None; callers that don't search opt out.
    fn search_repos(&self, query: &str, limit: usize) -> Option<Vec<String>> {
        let _ = (query, limit); None
    }
}

/// Real client — shells out to `gh`. Sole impl during normal operation.
pub struct GhCliClient;

impl GithubClient for GhCliClient {
    fn list_root_filenames(&self, slug: &str) -> Option<Vec<String>> {
        let mut endpoint = String::from("repos/");
        endpoint.push_str(slug);
        endpoint.push_str("/contents");
        let out = std::process::Command::new("gh")
            .args(["api", &endpoint, "--jq", ".[].name"])
            .output().ok()?;
        if !out.status.success() { return None; }
        let text = String::from_utf8(out.stdout).ok()?;
        Some(text.lines().map(String::from).collect())
    }

    fn fetch_file_text(&self, slug: &str, path: &str) -> Option<String> {
        let mut endpoint = String::from("repos/");
        endpoint.push_str(slug);
        endpoint.push_str("/contents/");
        endpoint.push_str(path);
        let out = std::process::Command::new("gh")
            .args(["api", &endpoint, "--jq", ".content"])
            .output().ok()?;
        if !out.status.success() { return None; }
        let b64 = String::from_utf8(out.stdout).ok()?;
        let raw: String = b64.chars().filter(|c| !c.is_whitespace()).collect();
        base64_decode_lite(&raw)
    }

    fn search_repos(&self, query: &str, limit: usize) -> Option<Vec<String>> {
        let limit_s = limit.to_string();
        let out = std::process::Command::new("gh")
            .args(["search", "repos", query,
                   "--json", "fullName",
                   "--jq", ".[].fullName",
                   "--limit", &limit_s])
            .output().ok()?;
        if !out.status.success() { return None; }
        let text = String::from_utf8(out.stdout).ok()?;
        Some(text.lines().filter(|l| !l.is_empty()).map(String::from).collect())
    }
}

/// Mock client — returns canned responses. Used by tests + by
/// downstream tools (CLI dry-runs, fixtures) that want determinism.
///
/// Set `files` to the list of filenames the repo "contains" + `contents`
/// to a {path → text} map for files whose content needs reading +
/// `search_results` to canned slugs the search trait method returns.
#[derive(Debug, Default, Clone)]
pub struct MockClient {
    pub files: Vec<String>,
    pub contents: std::collections::BTreeMap<(String, String), String>,
    pub search_results: Vec<String>,
}

impl MockClient {
    pub fn new() -> Self { Self::default() }
    pub fn with_files(mut self, files: impl IntoIterator<Item = impl Into<String>>) -> Self {
        self.files = files.into_iter().map(Into::into).collect();
        self
    }
    pub fn with_file_content(
        mut self, slug: impl Into<String>, path: impl Into<String>, content: impl Into<String>
    ) -> Self {
        self.contents.insert((slug.into(), path.into()), content.into());
        self
    }
    pub fn with_search_results(mut self, results: impl IntoIterator<Item = impl Into<String>>) -> Self {
        self.search_results = results.into_iter().map(Into::into).collect();
        self
    }
}

impl GithubClient for MockClient {
    fn list_root_filenames(&self, _slug: &str) -> Option<Vec<String>> {
        Some(self.files.clone())
    }
    fn fetch_file_text(&self, slug: &str, path: &str) -> Option<String> {
        self.contents.get(&(slug.to_string(), path.to_string())).cloned()
    }
    fn search_repos(&self, _query: &str, limit: usize) -> Option<Vec<String>> {
        Some(self.search_results.iter().take(limit).cloned().collect())
    }
}

/// Minimal base64 decoder — gh api returns small manifest files
/// base64-encoded; avoiding a base64 crate dep keeps deps lean.
/// Standard alphabet; tolerates `=` padding.
///
/// Lives here (next to `GhCliClient`) because it's the helper that
/// makes the gh content-fetch path useful; testable via the unit
/// tests below.
pub fn base64_decode_lite(s: &str) -> Option<String> {
    let alpha = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    let mut idx = [255u8; 256];
    for (i, &c) in alpha.iter().enumerate() {
        idx[c as usize] = i as u8;
    }
    let bytes: Vec<u8> = s.bytes().filter(|&b| b != b'=').collect();
    let mut out: Vec<u8> = Vec::with_capacity(bytes.len() * 3 / 4);
    for chunk in bytes.chunks(4) {
        let mut v: u32 = 0;
        for (i, &b) in chunk.iter().enumerate() {
            let idx_v = idx[b as usize];
            if idx_v == 255 { return None; }
            v |= (idx_v as u32) << (18 - i * 6);
        }
        out.push(((v >> 16) & 0xFF) as u8);
        if chunk.len() > 2 { out.push(((v >> 8) & 0xFF) as u8); }
        if chunk.len() > 3 { out.push((v & 0xFF) as u8); }
    }
    String::from_utf8(out).ok()
}

// Silence unused-import warning when this module isn't pulled into
// a binary path; Path is used by future trait methods.
#[allow(dead_code)]
fn _path_marker(_p: &Path) {}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn mock_search_returns_canned_slugs_up_to_limit() {
        let m = MockClient::new().with_search_results(
            ["a/x", "b/y", "c/z", "d/w"]);
        assert_eq!(m.search_repos("q", 2).unwrap(), vec!["a/x", "b/y"]);
        assert_eq!(m.search_repos("q", 10).unwrap().len(), 4);
    }

    #[test]
    fn mock_returns_canned_filenames() {
        let m = MockClient::new().with_files(["Cargo.toml", "src"]);
        let files = m.list_root_filenames("any/repo").unwrap();
        assert_eq!(files, vec!["Cargo.toml", "src"]);
    }

    #[test]
    fn mock_returns_canned_content() {
        let m = MockClient::new()
            .with_file_content("o/r", "Cargo.toml", "[workspace]\n");
        let body = m.fetch_file_text("o/r", "Cargo.toml").unwrap();
        assert_eq!(body, "[workspace]\n");
    }

    #[test]
    fn mock_returns_none_for_unknown_content() {
        let m = MockClient::new();
        assert!(m.fetch_file_text("o/r", "Cargo.toml").is_none());
    }

    #[test]
    fn base64_lite_round_trips_short_text() {
        assert_eq!(base64_decode_lite("SGVsbG8=").as_deref(), Some("Hello"));
    }
    #[test]
    fn base64_lite_decodes_workspace_marker() {
        // [workspace]\n base64
        let d = base64_decode_lite("W3dvcmtzcGFjZV0K").unwrap();
        assert!(d.contains("[workspace]"));
    }
}