harn-cli 0.7.1

CLI for the Harn programming language — run, test, REPL, format, and lint
//! `harn dump-highlight-keywords` — regenerate `docs/theme/harn-keywords.js`.
//!
//! The mdBook documentation site (`docs/`) uses a custom highlight.js language
//! definition to render ```` ```harn ```` code blocks. To keep the highlighter
//! in sync with the actual language and stdlib without hand-maintaining a
//! duplicate keyword list, this command emits a small JS file that the
//! highlight.js module consumes at runtime.
//!
//! Sources of truth:
//!
//! - `harn_lexer::KEYWORDS` — language keywords.
//! - `harn_vm::stdlib::stdlib_builtin_names()` — globally-available stdlib
//!   builtins (all three tiers are registered unconditionally on a Harn VM,
//!   so everything this function returns is reachable without an `import`).
//!
//! With `--check`, the command diffs the generated content against the file
//! on disk and exits non-zero if they differ (same idiom as `cargo fmt
//! --check`). CI runs this to fail any PR that changes a keyword or a builtin
//! name without regenerating.

use std::collections::BTreeSet;
use std::fs;
use std::path::Path;
use std::process;

use harn_lexer::KEYWORDS;
use harn_vm::stdlib::stdlib_builtin_names;

/// Literals that render as `hljs-literal` rather than `hljs-keyword`.
/// Hand-maintained against `KEYWORDS`; update both this list and the test
/// below if a new literal keyword is added.
const LITERALS: &[&str] = &["true", "false", "nil"];

pub(crate) fn run(output_path: &str, check_only: bool) {
    let generated = generate_file();
    let path = Path::new(output_path);

    if check_only {
        let existing = match fs::read_to_string(path) {
            Ok(s) => s,
            Err(e) => {
                eprintln!("error: cannot read {}: {e}", path.display());
                eprintln!("hint: run `make gen-highlight` to regenerate.");
                process::exit(1);
            }
        };
        if existing != generated {
            eprintln!(
                "error: {} is stale relative to the lexer/stdlib.",
                path.display()
            );
            eprintln!("hint: run `make gen-highlight` to regenerate.");
            process::exit(1);
        }
        return;
    }

    if let Some(parent) = path.parent() {
        if let Err(e) = fs::create_dir_all(parent) {
            eprintln!("error: cannot create {}: {e}", parent.display());
            process::exit(1);
        }
    }
    if let Err(e) = fs::write(path, &generated) {
        eprintln!("error: cannot write {}: {e}", path.display());
        process::exit(1);
    }
    println!("wrote {}", path.display());
}

/// Build the full file contents. Pure so it's easy to unit-test.
fn generate_file() -> String {
    let literals: BTreeSet<&str> = LITERALS.iter().copied().collect();

    let keywords: Vec<&str> = KEYWORDS
        .iter()
        .copied()
        .filter(|k| !literals.contains(k))
        .collect();

    // Builtins: names registered on a fully-initialized VM, minus anything
    // that's already a keyword (highlight.js treats those as keywords) and
    // compiler-internal `__*` names users never call directly.
    let keyword_set: BTreeSet<&str> = KEYWORDS.iter().copied().collect();
    let builtin_owned: Vec<String> = stdlib_builtin_names()
        .into_iter()
        .filter(|name| !name.starts_with("__"))
        .filter(|name| !keyword_set.contains(name.as_str()))
        .collect();
    let mut builtins: BTreeSet<&str> = builtin_owned.iter().map(String::as_str).collect();
    builtins.remove("");

    let keyword_line = keywords.join(" ");
    let literal_line = LITERALS.join(" ");
    let builtin_line = builtins.into_iter().collect::<Vec<_>>().join(" ");

    format!(
        "// GENERATED by `harn dump-highlight-keywords` — do not edit by hand.\n\
         //\n\
         // Sources of truth:\n\
         //   crates/harn-lexer/src/token.rs  (KEYWORDS)\n\
         //   crates/harn-vm/src/stdlib.rs    (stdlib_builtin_names)\n\
         //\n\
         // Regenerate with: make gen-highlight\n\
         // CI guard:        cargo run -p harn-cli -- dump-highlight-keywords --check\n\
         window.__HARN_KEYWORDS = {{\n\
         \x20\x20keyword: {keyword:?},\n\
         \x20\x20literal: {literal:?},\n\
         \x20\x20built_in: {built_in:?}\n\
         }};\n",
        keyword = keyword_line,
        literal = literal_line,
        built_in = builtin_line,
    )
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn generated_file_contains_core_keywords() {
        let out = generate_file();
        assert!(out.contains("pipeline"));
        assert!(out.contains("parallel"));
        assert!(out.contains("defer"));
        assert!(out.contains("window.__HARN_KEYWORDS"));
    }

    #[test]
    fn generated_file_contains_known_builtins() {
        let out = generate_file();
        for name in &["println", "read_file", "llm_call", "http_get"] {
            assert!(
                out.contains(name),
                "expected builtin `{name}` in generated file"
            );
        }
    }

    /// CI backstop so PRs that change a keyword or stdlib builtin name
    /// without regenerating `docs/theme/harn-keywords.js` fail `make test`.
    #[test]
    fn committed_keyword_file_matches_generator() {
        let manifest_dir = env!("CARGO_MANIFEST_DIR");
        let path = std::path::Path::new(manifest_dir)
            .join("..")
            .join("..")
            .join("docs")
            .join("theme")
            .join("harn-keywords.js");
        let on_disk = std::fs::read_to_string(&path).unwrap_or_else(|e| {
            panic!(
                "failed to read {}: {e}\n\
                 hint: run `make gen-highlight` to regenerate.",
                path.display()
            )
        });
        let generated = generate_file();
        assert_eq!(
            on_disk, generated,
            "docs/theme/harn-keywords.js is stale relative to the lexer/stdlib.\n\
             Run `make gen-highlight` to regenerate."
        );
    }

    #[test]
    fn literals_are_not_also_keywords() {
        let out = generate_file();
        // Literals must live in the literal field, not bleed into the
        // keyword string.
        let keyword_section_start = out.find("keyword: \"").expect("keyword field");
        let keyword_section_end = out[keyword_section_start..]
            .find('"')
            .and_then(|i| out[keyword_section_start + i + 1..].find('"'))
            .unwrap();
        let keyword_section =
            &out[keyword_section_start..keyword_section_start + keyword_section_end + 20];
        for lit in LITERALS {
            assert!(
                !keyword_section.contains(&format!(" {lit} ")),
                "literal `{lit}` leaked into keyword list"
            );
        }
    }
}