harn-cli 0.5.83

CLI for the Harn programming language — run, test, REPL, format, and lint
//! `harn dump-highlight-keywords` — regenerate `docs/theme/harn-keywords.js`.
//!
//! The mdBook documentation site (`docs/`) uses a custom highlight.js language
//! definition to render ```` ```harn ```` code blocks. To keep the highlighter
//! in sync with the actual language and stdlib without hand-maintaining a
//! duplicate keyword list, this command emits a small JS file that the
//! highlight.js module consumes at runtime.
//!
//! Sources of truth:
//!
//! - `harn_lexer::KEYWORDS` — language keywords.
//! - `harn_vm::stdlib::stdlib_builtin_names()` — globally-available stdlib
//!   builtins (all three tiers are registered unconditionally on a Harn VM,
//!   so everything this function returns is reachable without an `import`).
//!
//! With `--check`, the command diffs the generated content against the file
//! on disk and exits non-zero if they differ (same idiom as `cargo fmt
//! --check`). CI runs this to fail any PR that changes a keyword or a builtin
//! name without regenerating.

use std::collections::BTreeSet;
use std::fs;
use std::path::Path;
use std::process;

use harn_lexer::KEYWORDS;
use harn_vm::stdlib::stdlib_builtin_names;

/// Literals that render as `hljs-literal` rather than `hljs-keyword`.
/// Kept in sync with `KEYWORDS` by hand — these three are a closed set in
/// Harn and unlikely to change. If a new literal keyword (e.g. `undefined`)
/// is ever added, update both here and the test below.
const LITERALS: &[&str] = &["true", "false", "nil"];

pub(crate) fn run(output_path: &str, check_only: bool) {
    let generated = generate_file();
    let path = Path::new(output_path);

    if check_only {
        let existing = match fs::read_to_string(path) {
            Ok(s) => s,
            Err(e) => {
                eprintln!("error: cannot read {}: {e}", path.display());
                eprintln!("hint: run `make gen-highlight` to regenerate.");
                process::exit(1);
            }
        };
        if existing != generated {
            eprintln!(
                "error: {} is stale relative to the lexer/stdlib.",
                path.display()
            );
            eprintln!("hint: run `make gen-highlight` to regenerate.");
            process::exit(1);
        }
        return;
    }

    if let Some(parent) = path.parent() {
        if let Err(e) = fs::create_dir_all(parent) {
            eprintln!("error: cannot create {}: {e}", parent.display());
            process::exit(1);
        }
    }
    if let Err(e) = fs::write(path, &generated) {
        eprintln!("error: cannot write {}: {e}", path.display());
        process::exit(1);
    }
    println!("wrote {}", path.display());
}

/// Build the full file contents. Pure function — no I/O, easy to unit-test.
fn generate_file() -> String {
    let literals: BTreeSet<&str> = LITERALS.iter().copied().collect();

    // Keywords: everything in KEYWORDS minus the literal set.
    let keywords: Vec<&str> = KEYWORDS
        .iter()
        .copied()
        .filter(|k| !literals.contains(k))
        .collect();

    // Builtins: everything registered on a fully-initialized VM. This includes
    // opcode-level pseudo-builtins (spawn/await/cancel) appended by
    // `stdlib_builtin_names`. Filter out any name that also appears as a
    // keyword — highlight.js should treat those as keywords, not builtins.
    let keyword_set: BTreeSet<&str> = KEYWORDS.iter().copied().collect();
    let builtin_owned: Vec<String> = stdlib_builtin_names()
        .into_iter()
        // Skip compiler-internal names (destructuring helpers, range sugar,
        // etc.). These are prefixed with `__` and users never call them
        // directly; including them in the highlighter would just create
        // false positives on variables that happen to share the prefix.
        .filter(|name| !name.starts_with("__"))
        .filter(|name| !keyword_set.contains(name.as_str()))
        .collect();
    let mut builtins: BTreeSet<&str> = builtin_owned.iter().map(String::as_str).collect();
    // De-duplicate defensively in case register_vm_stdlib ever registers the
    // same name twice (BTreeSet handles dedup + sort).
    builtins.remove("");

    let keyword_line = keywords.join(" ");
    let literal_line = LITERALS.join(" ");
    let builtin_line = builtins.into_iter().collect::<Vec<_>>().join(" ");

    format!(
        "// GENERATED by `harn dump-highlight-keywords` — do not edit by hand.\n\
         //\n\
         // Sources of truth:\n\
         //   crates/harn-lexer/src/token.rs  (KEYWORDS)\n\
         //   crates/harn-vm/src/stdlib.rs    (stdlib_builtin_names)\n\
         //\n\
         // Regenerate with: make gen-highlight\n\
         // CI guard:        cargo run -p harn-cli -- dump-highlight-keywords --check\n\
         window.__HARN_KEYWORDS = {{\n\
         \x20\x20keyword: {keyword:?},\n\
         \x20\x20literal: {literal:?},\n\
         \x20\x20built_in: {built_in:?}\n\
         }};\n",
        keyword = keyword_line,
        literal = literal_line,
        built_in = builtin_line,
    )
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn generated_file_contains_core_keywords() {
        let out = generate_file();
        assert!(out.contains("pipeline"));
        assert!(out.contains("parallel"));
        assert!(out.contains("defer"));
        assert!(out.contains("window.__HARN_KEYWORDS"));
    }

    #[test]
    fn generated_file_contains_known_builtins() {
        let out = generate_file();
        // Globally available — must be in built_in.
        for name in &["println", "read_file", "llm_call", "http_get"] {
            assert!(
                out.contains(name),
                "expected builtin `{name}` in generated file"
            );
        }
    }

    /// CI backstop for highlighter drift.
    ///
    /// `--check` mode catches drift when someone runs the binary; this test
    /// catches it under `cargo test --workspace`, so any PR that changes a
    /// keyword or stdlib builtin name without regenerating
    /// `docs/theme/harn-keywords.js` fails `make test`. Belt + suspenders.
    #[test]
    fn committed_keyword_file_matches_generator() {
        let manifest_dir = env!("CARGO_MANIFEST_DIR");
        let path = std::path::Path::new(manifest_dir)
            .join("..")
            .join("..")
            .join("docs")
            .join("theme")
            .join("harn-keywords.js");
        let on_disk = std::fs::read_to_string(&path).unwrap_or_else(|e| {
            panic!(
                "failed to read {}: {e}\n\
                 hint: run `make gen-highlight` to regenerate.",
                path.display()
            )
        });
        let generated = generate_file();
        assert_eq!(
            on_disk, generated,
            "docs/theme/harn-keywords.js is stale relative to the lexer/stdlib.\n\
             Run `make gen-highlight` to regenerate."
        );
    }

    #[test]
    fn literals_are_not_also_keywords() {
        let out = generate_file();
        // `true`/`false`/`nil` should appear in literal, not in the keyword
        // segment. We can't easily parse the JS here, but we can check that
        // the keyword line (which excludes literals) does not contain " true".
        let keyword_section_start = out.find("keyword: \"").expect("keyword field");
        let keyword_section_end = out[keyword_section_start..]
            .find('"')
            .and_then(|i| out[keyword_section_start + i + 1..].find('"'))
            .unwrap();
        let keyword_section =
            &out[keyword_section_start..keyword_section_start + keyword_section_end + 20];
        for lit in LITERALS {
            assert!(
                !keyword_section.contains(&format!(" {lit} ")),
                "literal `{lit}` leaked into keyword list"
            );
        }
    }
}