harn-cli 0.8.141

CLI for the Harn programming language — run, test, REPL, format, and lint
use std::fs;
use std::path::{Path, PathBuf};

use crate::cli::{ProvidersBuildCapabilitiesArgs, ProvidersBuildConfigArgs};

const PROVIDER_CONFIG_GENERATED_HEADER: &str = "\
# @generated by `harn providers build-config`; do not edit directly.
# Edit crates/harn-vm/src/llm/catalog_sources/**/*.toml instead.
";
const PROVIDER_CAPABILITIES_GENERATED_HEADER: &str = "\
# @generated by `harn providers build-capabilities`; do not edit directly.
# Edit crates/harn-vm/src/llm/capability_sources/**/*.toml instead.
";

pub(crate) fn run_build_config(args: &ProvidersBuildConfigArgs) -> Result<(), String> {
    let generated = generated_provider_config(&args.source_dir)?;

    if args.check {
        match fs::read_to_string(&args.output) {
            Ok(existing) if existing == generated.body => {
                println!("provider config snapshot is up to date");
                return Ok(());
            }
            Ok(_) | Err(_) => {
                eprintln!(
                    "error: generated provider config is stale or missing: {}",
                    args.output.display()
                );
                return Err("provider config snapshot drifted".to_string());
            }
        }
    }

    if let Some(parent) = args.output.parent() {
        fs::create_dir_all(parent).map_err(|error| {
            format!(
                "failed to create provider config directory {}: {error}",
                parent.display()
            )
        })?;
    }
    fs::write(&args.output, generated.body)
        .map_err(|error| format!("failed to write {}: {error}", args.output.display()))?;
    println!(
        "wrote {} from {} TOML fragments",
        args.output.display(),
        generated.fragment_count
    );
    Ok(())
}

pub(crate) fn run_build_capabilities(args: &ProvidersBuildCapabilitiesArgs) -> Result<(), String> {
    let generated = generated_provider_capabilities(&args.source_dir)?;

    if args.check {
        match fs::read_to_string(&args.output) {
            Ok(existing) if existing == generated.body => {
                println!("provider capability snapshot is up to date");
                return Ok(());
            }
            Ok(_) | Err(_) => {
                eprintln!(
                    "error: generated provider capability snapshot is stale or missing: {}",
                    args.output.display()
                );
                return Err("provider capability snapshot drifted".to_string());
            }
        }
    }

    if let Some(parent) = args.output.parent() {
        fs::create_dir_all(parent).map_err(|error| {
            format!(
                "failed to create provider capability directory {}: {error}",
                parent.display()
            )
        })?;
    }
    fs::write(&args.output, generated.body)
        .map_err(|error| format!("failed to write {}: {error}", args.output.display()))?;
    println!(
        "wrote {} from {} TOML fragments",
        args.output.display(),
        generated.fragment_count
    );
    Ok(())
}

struct GeneratedProviderConfig {
    body: String,
    fragment_count: usize,
}

fn generated_provider_config(source_dir: &Path) -> Result<GeneratedProviderConfig, String> {
    let mut fragments = Vec::new();
    collect_toml_fragments(source_dir, &mut fragments)?;
    fragments.sort();
    if fragments.is_empty() {
        return Err(format!(
            "provider catalog source directory has no TOML fragments: {}",
            source_dir.display()
        ));
    }

    let mut body = String::from(PROVIDER_CONFIG_GENERATED_HEADER);
    for path in &fragments {
        let fragment = fs::read_to_string(path)
            .map_err(|error| format!("failed to read {}: {error}", path.display()))?;
        // Reject bare `key = value` lines that appear before the first table
        // header in a model fragment. Fragments are concatenated as raw text, so
        // any such leading key binds at runtime to the PREVIOUS fragment's last
        // model table instead of this fragment's first model — silently
        // mislabeling a model's tier/open_weight/strengths across the fragment
        // boundary. Keep every model key inside an explicit `[models.X]` table so
        // concatenation can never reattach it. Scoped to `60-models/`: other
        // fragment dirs (defaults, routing, aliases) legitimately set root-level
        // `ProvidersConfig` fields like `default_provider`.
        let label = fragment_label(source_dir, path);
        if is_model_fragment(&label) {
            if let Some(error) = leading_bare_key_error(&label, &fragment) {
                return Err(error);
            }
        }
        body.push_str("\n# --- source: ");
        body.push_str(&label);
        body.push_str(" ---\n");
        body.push_str(fragment.trim_end());
        body.push('\n');
    }

    harn_vm::llm_config::parse_config_toml(&body)
        .map_err(|error| format!("generated provider config does not parse: {error}"))?;

    Ok(GeneratedProviderConfig {
        body,
        fragment_count: fragments.len(),
    })
}

fn generated_provider_capabilities(source_dir: &Path) -> Result<GeneratedProviderConfig, String> {
    let mut fragments = Vec::new();
    collect_toml_fragments(source_dir, &mut fragments)?;
    fragments.sort();
    if fragments.is_empty() {
        return Err(format!(
            "provider capability source directory has no TOML fragments: {}",
            source_dir.display()
        ));
    }

    let mut body = String::from(PROVIDER_CAPABILITIES_GENERATED_HEADER);
    for path in &fragments {
        let fragment = fs::read_to_string(path)
            .map_err(|error| format!("failed to read {}: {error}", path.display()))?;
        body.push_str("\n# --- source: ");
        body.push_str(&fragment_label(source_dir, path));
        body.push_str(" ---\n");
        body.push_str(fragment.trim_end());
        body.push('\n');
    }

    let parsed = toml::from_str::<harn_vm::llm::capabilities::CapabilitiesFile>(&body)
        .map_err(|error| format!("generated provider capabilities do not parse: {error}"))?;

    // Opinionated footgun gate: refuse to generate (or pass --check on) a
    // capability matrix that declares a known-footgun provider/model/config
    // combo (e.g. a tool route with reasoning forced off, or an OpenRouter
    // Harmony tool route with no clean-sub-provider pin). Data-driven over the
    // matrix — see harn_vm::llm::capability_audit.
    let audit = harn_vm::llm::capability_audit::audit_capabilities(&parsed);
    if !audit.is_clean() {
        return Err(format!(
            "capability matrix footgun check failed:\n{}",
            audit.render()
        ));
    }

    Ok(GeneratedProviderConfig {
        body,
        fragment_count: fragments.len(),
    })
}

fn collect_toml_fragments(dir: &Path, fragments: &mut Vec<PathBuf>) -> Result<(), String> {
    let mut entries = Vec::new();
    let read_dir = fs::read_dir(dir).map_err(|error| {
        format!(
            "failed to read provider catalog source dir {}: {error}",
            dir.display()
        )
    })?;
    for entry in read_dir {
        let entry = entry.map_err(|error| {
            format!(
                "failed to read provider catalog source dir entry in {}: {error}",
                dir.display()
            )
        })?;
        entries.push(entry.path());
    }
    entries.sort();

    for path in entries {
        if path.is_dir() {
            collect_toml_fragments(&path, fragments)?;
        } else if path.extension().and_then(|extension| extension.to_str()) == Some("toml") {
            fragments.push(path);
        }
    }
    Ok(())
}

fn fragment_label(source_dir: &Path, path: &Path) -> String {
    path.strip_prefix(source_dir)
        .unwrap_or(path)
        .to_string_lossy()
        .replace('\\', "/")
}

/// True when a fragment label names a `60-models/` model fragment, where bare
/// keys are model metadata that must live inside a `[models.X]` table. Other
/// source dirs (`00-base`, `20-routing`, `40-defaults`, ...) legitimately set
/// root-level `ProvidersConfig` fields like `default_provider`, so the
/// leading-bare-key guard does not apply to them.
fn is_model_fragment(label: &str) -> bool {
    label.starts_with("60-models/") || label.contains("/60-models/")
}

/// Return an error message when `fragment` has any bare `key = value` line
/// before its first `[table]` header. Such a key has no table to belong to
/// inside the fragment, so after the fragments are concatenated as raw text it
/// silently attaches to the PREVIOUS fragment's last table — the leading-key
/// bleed that mislabels a model across the fragment boundary. Comments and
/// blank lines before the first table are fine; only assignment lines are
/// rejected. Returns `None` when the fragment is clean.
fn leading_bare_key_error(label: &str, fragment: &str) -> Option<String> {
    for (line_number, raw) in fragment.lines().enumerate() {
        let line = raw.trim();
        if line.is_empty() || line.starts_with('#') {
            continue;
        }
        if line.starts_with('[') {
            // Reached the first table header without seeing a bare key: clean.
            return None;
        }
        // A non-comment, non-blank, non-header line before the first table.
        // Only `key = value` assignments are the bleed hazard.
        if line.contains('=') {
            let key = line.split('=').next().unwrap_or(line).trim();
            return Some(format!(
                "provider catalog fragment {label} has a leading bare key `{key}` on line {} \
                 before its first [table] header. Move it inside the model's `[models.X]` table: \
                 a bare key before the first table binds to the PREVIOUS fragment's last model \
                 after concatenation, silently mislabeling that model.",
                line_number + 1
            ));
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use super::leading_bare_key_error;

    #[test]
    fn rejects_leading_bare_key_before_first_table() {
        let fragment = "\
# a comment is fine
tier = \"mid\"
open_weight = true
[models.\"x\"]
name = \"X\"
";
        let error =
            leading_bare_key_error("60-models/test.toml", fragment).expect("leading key rejected");
        assert!(error.contains("leading bare key `tier`"), "got: {error}");
        assert!(error.contains("line 2"), "should report the line: {error}");
    }

    #[test]
    fn accepts_fragment_with_only_comments_before_first_table() {
        let fragment = "\
# header comment
# another note

[models.\"x\"]
name = \"X\"
tier = \"mid\"
open_weight = true
";
        assert!(leading_bare_key_error("60-models/test.toml", fragment).is_none());
    }

    #[test]
    fn accepts_keys_inside_tables() {
        // Bare keys AFTER a table header belong to that table and are allowed.
        let fragment = "\
[models.\"x\"]
name = \"X\"
tier = \"mid\"
[models.\"y\"]
name = \"Y\"
tier = \"frontier\"
";
        assert!(leading_bare_key_error("60-models/test.toml", fragment).is_none());
    }
}