hrdr 0.2.6 - Docs.rs

//! `hrdr` — herder: an agentic coding harness.
//!
//! No subcommand launches the interactive TUI. `hrdr run <task>` runs a single
//! turn headlessly, streaming to stdout (scriptable, pipeable).
//! `hrdr models` lists available models from the configured endpoint.
//!
//! hrdr talks to any running OpenAI-compatible endpoint; choose one with
//! `--base-url` or a `--provider` preset. It does not manage a model server —
//! start your own (infr, llama.cpp, vLLM, …) or point at a hosted provider.

use std::io::Write;

use anyhow::Result;
use clap::{Parser, Subcommand};
use hrdr_agent::{Agent, AgentConfig, AgentEvent};
use hrdr_llm::Client;

#[derive(Parser)]
#[command(
    name = "hrdr",
    version,
    about = "hrdr — herder: a fast, agentic coding harness for OpenAI-compatible models.",
    before_help = include_str!("../art.txt"),
)]
struct Cli {
    /// OpenAI-compatible base URL (default: $HRDR_BASE_URL or http://localhost:8080/v1).
    #[arg(long, global = true)]
    base_url: Option<String>,

    /// Model id (default: $HRDR_MODEL).
    #[arg(long, global = true)]
    model: Option<String>,

    /// Provider preset: zen (OpenCode Zen), openai, or local. Sets the endpoint
    /// and API-key env.
    #[arg(long, global = true)]
    provider: Option<String>,

    /// Use vim keybindings in the input pane (default: plain claude-style input).
    #[arg(long, global = true)]
    vim: bool,

    /// Path to an hjkl theme TOML for the TUI (default: bundled dark theme).
    #[arg(long, global = true)]
    theme: Option<String>,

    /// Reasoning effort for reasoning models: minimal, low, medium, or high
    /// (sent as `reasoning_effort`; other values are status-bar labels only).
    #[arg(long, global = true)]
    effort: Option<String>,

    /// Auto-compact toggle: any value in 0.0–1.0 enables it, 0 disables (the
    /// trigger point is set by --compaction-reserved).
    #[arg(long, global = true)]
    auto_compact: Option<f64>,

    /// Tokens reserved below the context window before auto-compaction fires
    /// (default 20000); compaction triggers at context_window − this.
    #[arg(long, global = true)]
    compaction_reserved: Option<u32>,

    /// Prune old tool output from the model context before each request (on|off; default on).
    #[arg(long = "auto-prune", global = true, value_name = "on|off")]
    auto_prune: Option<String>,

    /// Prompt caching: off, on, or auto (default; on for remote endpoints).
    #[arg(long = "prompt-cache", global = true, value_name = "off|on|auto")]
    prompt_cache: Option<String>,

    /// Don't auto-resume the most recent session for the working directory.
    #[arg(long = "no-auto-resume", global = true)]
    no_auto_resume: bool,

    /// Don't ring the terminal bell when a turn finishes.
    #[arg(long = "no-bell", global = true)]
    no_bell: bool,

    /// Icon set for the TUI: nerd (default), unicode, or ascii.
    #[arg(long, global = true)]
    icons: Option<String>,

    /// Per-message timestamp style: none, relative (default), or exact.
    #[arg(long, global = true)]
    timestamps: Option<String>,

    /// Status-bar mode: none, truncate (default), or wrap.
    #[arg(long, global = true)]
    statusbar: Option<String>,

    /// File checkpointing: on, off, or auto (default; off inside a git repo).
    #[arg(long, global = true)]
    checkpoints: Option<String>,

    /// Turns a completed TODO item stays visible before it's pruned (default 5).
    #[arg(long, global = true)]
    todo_ttl: Option<u64>,

    /// Show the model's `<think>` reasoning: on/off/1/0 (default on).
    #[arg(long = "show-thinking", global = true, value_name = "on|off")]
    show_thinking: Option<String>,

    /// Print shell completions to stdout and exit
    #[arg(long, value_enum, value_name = "SHELL", hide = true)]
    completions: Option<CompletionShell>,

    /// Print the man page (troff) to stdout and exit
    #[arg(long, hide = true)]
    man: bool,

    #[command(subcommand)]
    command: Option<Command>,
}

/// Shells `--completions` can generate for: clap_complete's five core shells
/// plus nushell (separate generator crate). Mirrors gpur's packaging helpers.
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
enum CompletionShell {
    Bash,
    Zsh,
    Fish,
    Powershell,
    Elvish,
    Nushell,
}

impl CompletionShell {
    fn generate(self, cmd: &mut clap::Command) {
        use clap_complete::Shell;
        let out = &mut std::io::stdout();
        match self {
            CompletionShell::Bash => clap_complete::generate(Shell::Bash, cmd, "hrdr", out),
            CompletionShell::Zsh => clap_complete::generate(Shell::Zsh, cmd, "hrdr", out),
            CompletionShell::Fish => clap_complete::generate(Shell::Fish, cmd, "hrdr", out),
            CompletionShell::Powershell => {
                clap_complete::generate(Shell::PowerShell, cmd, "hrdr", out)
            }
            CompletionShell::Elvish => clap_complete::generate(Shell::Elvish, cmd, "hrdr", out),
            CompletionShell::Nushell => {
                clap_complete::generate(clap_complete_nushell::Nushell, cmd, "hrdr", out)
            }
        }
    }
}

#[derive(Subcommand)]
enum Command {
    /// Run a single task to completion headlessly, streaming output to stdout.
    Run {
        /// Emit newline-delimited JSON events on stdout (for scripting/CI).
        #[arg(long)]
        json: bool,
        /// Suppress the tool/usage chrome on stderr; print only the reply text.
        #[arg(long)]
        quiet: bool,
        /// Override the tool-round budget for this run.
        #[arg(long, value_name = "N")]
        max_steps: Option<usize>,
        /// The task prompt (all trailing words are joined).
        #[arg(trailing_var_arg = true, required = true)]
        prompt: Vec<String>,
    },
    /// List available models from the configured endpoint.
    Models,
}

#[tokio::main]
async fn main() -> Result<()> {
    tracing_subscriber::fmt()
        .with_env_filter(
            tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "warn".into()),
        )
        .with_writer(std::io::stderr)
        .init();

    let cli = Cli::parse();

    // Packaging helpers (hidden): emit completions / man page and exit.
    if let Some(shell) = cli.completions {
        use clap::CommandFactory;
        shell.generate(&mut Cli::command());
        return Ok(());
    }
    if cli.man {
        use clap::CommandFactory;
        clap_mangen::Man::new(Cli::command()).render(&mut std::io::stdout())?;
        return Ok(());
    }

    // Precedence: CLI flag > env var > config file > built-in default. Display
    // knobs live in UiConfig (hrdr-app); model/endpoint/loop knobs in
    // AgentConfig (hrdr-agent) — both read the same config.toml + HRDR_* vars.
    let mut config = AgentConfig::load();
    let mut ui = hrdr_app::UiConfig::load();

    // Apply a provider preset (CLI > config/env) before explicit CLI overrides.
    // Custom `[providers.<name>]` from config shadow the built-ins.
    let mut remote_provider = false;
    let provider_name = cli.provider.clone().or_else(|| config.provider.clone());
    if let Some(name) = &provider_name {
        let p = config.resolve_provider(name).ok_or_else(|| {
            anyhow::anyhow!(
                "unknown provider '{name}' (built-ins: zen, openai, openrouter, claude, local; \
                 or define [providers.{name}] in config)"
            )
        })?;
        // Provider sets the endpoint unless an explicit --base-url / $HRDR_BASE_URL wins.
        let base_overridden = cli.base_url.is_some() || std::env::var_os("HRDR_BASE_URL").is_some();
        if !base_overridden {
            config.base_url = p.base_url.clone();
        }
        // Key precedence: inline > key_env var > credential saved by `/login`.
        if let Some(key) = hrdr_agent::resolve_api_key(name, &p) {
            config.api_key = Some(key);
        } else if p.remote && config.api_key.is_none() {
            let env = p.key_env.as_deref().unwrap_or("HRDR_API_KEY");
            eprintln!("hrdr: provider '{name}' needs an API key — set ${env}, or run /login");
        }
        // Provider's default model, unless the user set one explicitly.
        let model_overridden = cli.model.is_some() || std::env::var_os("HRDR_MODEL").is_some();
        if !model_overridden && let Some(m) = p.model.clone() {
            config.model = m;
        }
        if config.context_window.is_none() {
            config.context_window = p.context_window;
        }
        config.headers = p.headers.into_iter().collect();
        config.api_version = p.api_version;
        remote_provider = p.remote;
    }

    if let Some(u) = cli.base_url {
        config.base_url = u;
    }
    if let Some(m) = cli.model {
        config.model = m;
    }
    if cli.vim {
        ui.vim_mode = true;
    }
    if let Some(t) = cli.theme {
        ui.theme = Some(t);
    }
    if let Some(e) = cli.effort {
        config.effort = Some(e);
    }
    if let Some(r) = cli.auto_compact {
        config.auto_compact = r;
    }
    if let Some(n) = cli.compaction_reserved {
        config.compaction_reserved = n;
    }
    if let Some(v) = cli
        .auto_prune
        .as_deref()
        .and_then(hrdr_agent::parse_env_bool)
    {
        config.auto_prune = v;
    }
    if cli.no_auto_resume {
        ui.auto_resume = false;
    }
    if cli.no_bell {
        ui.bell = false;
    }
    if let Some(i) = cli.icons {
        ui.icons = Some(i);
    }
    if let Some(t) = cli.timestamps {
        ui.timestamps = Some(t);
    }
    if let Some(s) = cli.statusbar {
        ui.statusbar = Some(s);
    }
    if let Some(c) = cli.checkpoints {
        config.checkpoints = Some(c);
    }
    if let Some(p) = cli.prompt_cache {
        config.prompt_cache = Some(p);
    }
    if let Some(n) = cli.todo_ttl {
        ui.todo_ttl = n;
    }
    if let Some(v) = cli
        .show_thinking
        .as_deref()
        .and_then(hrdr_agent::parse_env_bool)
    {
        ui.show_thinking = v;
    }

    if remote_provider && config.model == "default" {
        eprintln!(
            "hrdr: set a model with --model (run `hrdr models` to list this provider's models)"
        );
    }

    // Resolve the context window (drives the status bar's "X of Y" + the
    // auto-compaction threshold). Precedence: explicit config/provider wins;
    // else ask the server (many OpenAI-compatible servers advertise it — vLLM's
    // `max_model_len`, llama.cpp's `/props` n_ctx, …). Left unknown for an
    // endpoint that advertises nothing.
    if config.context_window.is_none() {
        let probe = hrdr_llm::Client::new(
            config.base_url.clone(),
            config.api_key.clone(),
            config.model.clone(),
        );
        config.context_window = probe.context_window().await;
    }

    match cli.command {
        Some(Command::Run {
            json,
            quiet,
            max_steps,
            prompt,
        }) => {
            if let Some(n) = max_steps {
                config.max_steps = n;
            }
            run_headless(config, prompt.join(" "), json, quiet).await
        }
        Some(Command::Models) => list_models(config).await,
        None => hrdr_tui::run(config, ui).await,
    }
}

/// Headless single-turn run. Default: reply text on stdout, tool/usage chrome
/// on stderr. `--json`: newline-delimited JSON events on stdout (scripting).
/// `--quiet`: text only. Exit code 0 on a completed turn, 1 on error.
async fn run_headless(config: AgentConfig, prompt: String, json: bool, quiet: bool) -> Result<()> {
    let mut agent = Agent::new(config)?;
    // Connect any configured MCP servers before the turn (their tools join the
    // set); surface the per-server status on stderr unless quiet.
    for notice in agent.connect_mcp().await {
        if !quiet {
            eprintln!("\x1b[90m[{notice}]\x1b[0m");
        }
    }
    // Headless runs have no interactive steering.
    let result = agent
        .run(prompt, hrdr_agent::steering_queue(), |ev| {
            if json {
                println!("{}", event_json(&ev));
                let _ = std::io::stdout().flush();
                return;
            }
            match ev {
                AgentEvent::Text(t) => {
                    print!("{t}");
                    let _ = std::io::stdout().flush();
                }
                AgentEvent::Reasoning(_) => {}
                AgentEvent::ToolStart { name, args, .. } if !quiet => {
                    eprintln!(
                        "\x1b[33m⚙ {name}\x1b[0m {}",
                        hrdr_tools::truncate_inline(&args, 120)
                    );
                }
                AgentEvent::ToolOutput { chunk, .. } if !quiet => {
                    eprint!("\x1b[90m{chunk}\x1b[0m");
                    let _ = std::io::stderr().flush();
                }
                AgentEvent::Notice(text) if !quiet => eprintln!("\x1b[90m[{text}]\x1b[0m"),
                AgentEvent::ToolEnd { name, ok, .. } if !quiet => {
                    let mark = if ok {
                        "\x1b[32m✓\x1b[0m"
                    } else {
                        "\x1b[31m✗\x1b[0m"
                    };
                    eprintln!("{mark} {name}");
                }
                AgentEvent::Usage {
                    prompt_tokens,
                    completion_tokens,
                    cached_prompt_tokens,
                    reasoning_tokens,
                } if !quiet => {
                    let cached = cached_prompt_tokens
                        .map(|c| format!(" ({c} cached)"))
                        .unwrap_or_default();
                    let reasoning = reasoning_tokens
                        .map(|r| format!(" · reasoning {r}"))
                        .unwrap_or_default();
                    eprintln!(
                        "\x1b[90m[usage] ctx {prompt_tokens}{cached} · out {completion_tokens}{reasoning}\x1b[0m"
                    );
                }
                AgentEvent::TurnDone => println!(),
                _ => {}
            }
        })
        .await;
    if let Err(e) = result {
        if json {
            println!(
                "{}",
                serde_json::json!({"type": "error", "message": e.to_string()})
            );
        }
        return Err(e);
    }
    Ok(())
}

/// One [`AgentEvent`] as a single-line JSON object (`hrdr run --json`).
fn event_json(ev: &AgentEvent) -> String {
    use serde_json::json;
    let v = match ev {
        AgentEvent::Text(t) => json!({"type": "text", "text": t}),
        AgentEvent::Reasoning(t) => json!({"type": "reasoning", "text": t}),
        AgentEvent::ToolStart { id, name, args } => {
            json!({"type": "tool_start", "id": id, "name": name, "args": args})
        }
        AgentEvent::ToolOutput { id, chunk } => {
            json!({"type": "tool_output", "id": id, "chunk": chunk})
        }
        AgentEvent::ToolEnd {
            id,
            name,
            result,
            ok,
        } => {
            json!({"type": "tool_end", "id": id, "name": name, "ok": ok, "result": result})
        }
        AgentEvent::Notice(text) => json!({"type": "notice", "text": text}),
        AgentEvent::Steered(text) => json!({"type": "steer", "text": text}),
        AgentEvent::Usage {
            prompt_tokens,
            completion_tokens,
            cached_prompt_tokens,
            reasoning_tokens,
        } => {
            json!({
                "type": "usage",
                "prompt_tokens": prompt_tokens,
                "completion_tokens": completion_tokens,
                "cached_prompt_tokens": cached_prompt_tokens,
                "reasoning_tokens": reasoning_tokens,
            })
        }
        AgentEvent::TurnDone => json!({"type": "done"}),
    };
    v.to_string()
}

/// Print available model ids, one per line.
async fn list_models(config: AgentConfig) -> Result<()> {
    let client = Client::new(config.base_url, config.api_key, config.model);
    let models = client.list_models().await?;
    for m in models {
        println!("{m}");
    }
    Ok(())
}