claudette 0.4.1

//! Top-level entry points — single-shot and REPL.

use std::io::{self, Write};
use std::path::PathBuf;
use std::sync::{Arc, Mutex};

use crate::{
    compact_session, estimate_session_tokens, CompactionConfig, ConversationRuntime,
    PermissionMode, PermissionPolicy, PermissionPromptDecision, PermissionPrompter,
    PermissionRequest, Session, TurnSummary,
};
use anyhow::{Context, Result};

use crate::api::{stdout_text_callback, telegram_text_callback, OllamaApiClient};
use crate::commands::{dispatch_slash_command, parse_slash_command, ReplState, SlashOutcome};
use crate::executor::SecretaryToolExecutor;
use crate::memory::try_load_memory;
use crate::model_config;
use crate::prompt::secretary_system_prompt_with_memory;
use crate::theme;
use crate::tool_groups::{ToolGroup, ToolRegistry};

// Brain default now lives in `model_config::ModelConfig::from_preset`. The
// Auto preset (qwen3.5:4b brain + qwen3.5:9b fallback, shipped Sprint 14)
// replaces the `DEFAULT_MODEL = "qwen3:8b"` constant that used to live
// here — callers should use `current_model()` or `model_config::active()`.

/// Estimated-tokens threshold at which the REPL fires its own compaction
/// pass (heuristic summarisation of the oldest messages).
///
/// **Why the metric changed (2026-04-09):** previously we used
/// the runtime's built-in trigger which fires on
/// `cumulative_input_tokens`. That metric grows monotonically — with Ollama
/// sending the entire history every turn, cumulative input crosses any
/// fixed threshold within ~3 turns and then NEVER falls back below it,
/// because the usage tracker doesn't subtract removed-message tokens after
/// a compact. Result: every subsequent turn fired auto-compaction even
/// though the session itself was small. A real transcript on 2026-04-09
/// caught this — six consecutive turns each removing 5 messages.
///
/// The fix: bypass the runtime's trigger (set its threshold to
/// `u32::MAX` in [`build_runtime`]) and roll our own in
/// [`maybe_compact_session`], using `estimate_session_tokens(session)` —
/// a metric that's actually bounded by the current session size and
/// drops back below the threshold after a successful compact.
///
/// Default `1_000_000` makes auto-compact effectively a no-op for typical
/// local-brain setups (16K–128K context). The gate stays in place so a
/// pathologically long session still trips it, but day-to-day work won't
/// see compaction noise. Users on tight context windows who *want* the old
/// safety net can set `CLAUDETTE_COMPACT_THRESHOLD=12000` (or whatever
/// fraction of their `num_ctx` they prefer).
pub const DEFAULT_COMPACT_THRESHOLD: usize = 1_000_000;

/// Resolve the compaction threshold the REPL is currently using — honors
/// the `CLAUDETTE_COMPACT_THRESHOLD` env var, falls back to
/// [`DEFAULT_COMPACT_THRESHOLD`]. Public so the `get_capabilities` tool
/// and the `/status` slash command can report the same value the REPL
/// is actually checking against.
#[must_use]
pub fn compact_threshold() -> usize {
    std::env::var("CLAUDETTE_COMPACT_THRESHOLD")
        .ok()
        .and_then(|s| s.parse::<usize>().ok())
        .unwrap_or(DEFAULT_COMPACT_THRESHOLD)
}

/// Soft (early) compaction threshold. Returns `None` when unset — the
/// default — preserving the existing one-tier behaviour where the only
/// gate is `compact_threshold()` at 1M.
///
/// When the env var is set to a positive number AND the session has grown
/// above it but is still under the hard threshold, [`maybe_compact_session`]
/// runs a *soft* compact: same machinery as the hard path but preserves
/// 12 recent messages instead of 4, so summarisation kicks in earlier with
/// less context loss. Useful for long real-world sessions on 35B+ brains
/// where one transcript was paying ~573K input tokens per turn.
///
/// Tracks P3 in the 2026-05-04 optimization queue.
#[must_use]
pub fn soft_compact_threshold() -> Option<usize> {
    std::env::var("CLAUDETTE_SOFT_COMPACT_THRESHOLD")
        .ok()
        .and_then(|s| s.parse::<usize>().ok())
        .filter(|n| *n > 0)
}

/// Recent-message preservation count for the hard (1M default) compaction
/// path. Aggressive — keeps just enough context for the model to continue
/// the immediate conversation.
const HARD_COMPACT_PRESERVE: usize = 4;

/// Recent-message preservation count for the soft (env-var-gated) path.
/// Three times the hard count: the user opted into early compaction, so
/// trade summary aggressiveness for continuity.
const SOFT_COMPACT_PRESERVE: usize = 12;

/// Default REPL/TUI max iterations per turn — how many (model → tool → result)
/// cycles a single user prompt is allowed to drive before the runtime aborts
/// with "conversation loop exceeded the maximum number of iterations".
///
/// `40` is generous: it accommodates legitimate long tool chains (multi-step
/// research, build + test + grep + fix) while still capping pathological
/// spirals from small brains. Override via `CLAUDETTE_MAX_ITERATIONS`.
pub const DEFAULT_MAX_ITERATIONS: usize = 40;

/// Resolve the per-turn max-iteration ceiling. Honors
/// `CLAUDETTE_MAX_ITERATIONS`; falls back to [`DEFAULT_MAX_ITERATIONS`].
#[must_use]
pub fn max_iterations() -> usize {
    std::env::var("CLAUDETTE_MAX_ITERATIONS")
        .ok()
        .and_then(|s| s.parse::<usize>().ok())
        .filter(|n| *n > 0)
        .unwrap_or(DEFAULT_MAX_ITERATIONS)
}

/// Resolve the model name the runtime is currently using. Sprint 14: this
/// now delegates to `model_config::active().brain.model`, so once a
/// `/preset` or `/brain` slash command mutates the active config, every
/// caller (`/status`, `/capabilities`, `get_capabilities` tool) immediately
/// sees the new value. The preset resolution still honors
/// `CLAUDETTE_MODEL` env var because `ModelConfig::resolve` merges env
/// into the default Auto preset at first access.
#[must_use]
pub fn current_model() -> String {
    model_config::active().brain.model
}

/// Caller-supplied options for session persistence. Kept as a struct (rather
/// than a pile of bool args) so adding e.g. `session_path: Option<PathBuf>`
/// later is non-breaking.
#[derive(Debug, Clone, Default)]
pub struct SessionOptions {
    /// If true, attempt to load the saved session before the first turn.
    /// Errors out if the session file is missing.
    pub resume: bool,
    /// If true, persist the session to disk after every turn.
    /// REPL mode sets this unconditionally; single-shot only sets it when
    /// `--resume` was passed (so a one-off invocation can't clobber a long
    /// REPL conversation).
    pub autosave: bool,
}

/// Resolve where the secretary's session file lives. Honors the
/// `CLAUDETTE_SESSION` env var (full path); otherwise falls back to
/// `~/.claudette/sessions/last.json`. We use a single fixed path so
/// `--resume` is unambiguous; named sessions can come later if useful.
#[must_use]
pub fn default_session_path() -> PathBuf {
    if let Ok(custom) = std::env::var("CLAUDETTE_SESSION") {
        if !custom.is_empty() {
            return PathBuf::from(custom);
        }
    }
    sessions_dir().join("last.json")
}

/// Resolve the directory holding all session JSON files. `pub(crate)` so the
/// slash-command dispatcher can list / save / load named sessions under it.
pub(crate) fn sessions_dir() -> PathBuf {
    let home = std::env::var("USERPROFILE")
        .or_else(|_| std::env::var("HOME"))
        .unwrap_or_else(|_| ".".to_string());
    PathBuf::from(home).join(".claudette").join("sessions")
}

/// Try to load a saved session from the default path. Returns
/// `Ok(Some(session))` if it loaded, `Ok(None)` if the file doesn't exist,
/// `Err` if it exists but is corrupt.
pub fn try_load_session() -> Result<Option<Session>> {
    try_load_session_at(&default_session_path())
}

/// Same as `try_load_session` but reads from a caller-supplied path. Lets
/// tests avoid touching `CLAUDETTE_SESSION` (which is process-global and
/// races between parallel tests).
pub fn try_load_session_at(path: &std::path::Path) -> Result<Option<Session>> {
    if !path.exists() {
        return Ok(None);
    }
    let session = Session::load_from_path(path)
        .with_context(|| format!("failed to load session from {}", path.display()))?;
    Ok(Some(session))
}

/// Persist `session` to the default path, creating the parent directory if
/// needed. Best-effort: returns the error to the caller so the REPL can
/// surface it once and continue.
pub fn save_session(session: &Session) -> Result<()> {
    save_session_at(session, &default_session_path())
}

/// Same as `save_session` but writes to a caller-supplied path.
pub fn save_session_at(session: &Session, path: &std::path::Path) -> Result<()> {
    if let Some(parent) = path.parent() {
        std::fs::create_dir_all(parent)
            .with_context(|| format!("failed to create {}", parent.display()))?;
    }
    session
        .save_to_path(path)
        .with_context(|| format!("failed to save session to {}", path.display()))?;
    Ok(())
}

/// Run a single user turn through the secretary agent loop and return the
/// turn summary. With `opts.resume = true`, loads the saved session first.
/// With `opts.autosave = true`, writes the session back after the turn.
pub fn run_secretary(user_input: &str, opts: SessionOptions) -> Result<TurnSummary> {
    let session = if opts.resume {
        try_load_session()?.ok_or_else(|| {
            anyhow::anyhow!("no saved session at {}", default_session_path().display())
        })?
    } else {
        Session::default()
    };

    let mut runtime = build_runtime(session);
    // Stash any file paths from the raw user prompt — bypasses the brain's
    // tendency to drop them when constructing tool-call arguments.
    crate::tools::set_current_turn_paths(crate::tools::extract_user_prompt_paths(user_input));

    // Sprint 14: even single-shot runs go through the fallback wrapper so
    // brain100 / brownfield benchmarks can measure Auto-preset escalation
    // behaviour. On Fast / Smart presets (no fallback configured) this
    // reduces to the prior `run_turn` + empty-response retry.
    let mut no_prompter: Option<&mut dyn PermissionPrompter> = None;
    let summary =
        crate::brain_selector::run_turn_with_fallback(&mut runtime, user_input, &mut no_prompter)
            .map_err(|e| anyhow::anyhow!("secretary turn failed: {e}"))?;

    // Same session-size trigger as the REPL — fire after the turn so the
    // session we autosave (when --resume is set) is already trimmed.
    if let Some(removed) = maybe_compact_session(&mut runtime, false) {
        eprintln!("[auto-compacted {removed} older message(s)]");
    }

    if opts.autosave {
        save_session(runtime.session())?;
    }
    Ok(summary)
}

/// Run an interactive REPL against a single long-lived `ConversationRuntime`.
/// Reads lines from stdin, runs each as a turn, prints the assistant's reply.
/// Lines starting with `/` are interpreted as slash commands (see
/// `commands.rs`) and never reach the model. Exits on EOF, the `/exit`
/// command, or the bare words `exit`/`quit`/`:q` (kept for muscle memory).
/// Always autosaves after every model turn when `opts.autosave` is set.
pub fn run_secretary_repl(opts: SessionOptions) -> Result<()> {
    theme::init();

    let session = if opts.resume {
        match try_load_session()? {
            Some(s) => {
                eprintln!(
                    "{} {} {}",
                    theme::SAVE,
                    theme::ok("resumed session"),
                    theme::dim(&format!(
                        "from {} ({} messages)",
                        default_session_path().display(),
                        s.messages.len()
                    ))
                );
                s
            }
            None => {
                eprintln!(
                    "{} {}",
                    theme::dim("○"),
                    theme::dim(&format!(
                        "no saved session at {} — starting fresh",
                        default_session_path().display()
                    ))
                );
                Session::default()
            }
        }
    } else {
        Session::default()
    };

    let mut runtime = build_runtime_streaming(session, false);
    let mut state = ReplState::default();
    let mut prompter = CliPrompter;

    eprintln!(
        "{} {} {}",
        theme::ROBOT,
        theme::brand("claudette"),
        theme::dim("— your local secretary")
    );
    eprintln!(
        "{} {}",
        theme::SPARKLES,
        theme::dim("type /help for commands, /exit (or Ctrl-D) to leave")
    );
    eprintln!(
        "{} {}",
        theme::SAVE,
        theme::dim(&format!("session: {}", default_session_path().display()))
    );
    eprintln!();

    loop {
        // Print prompt.
        {
            let stderr = io::stderr();
            let mut err = stderr.lock();
            write!(err, "{} ", theme::accent(theme::PROMPT_ARROW))?;
            err.flush()?;
        }

        // Read one line WITHOUT holding the stdin lock across run_turn.
        // The CliPrompter needs stdin access for [y/N] confirmation
        // prompts, so we must drop the lock before entering the runtime.
        let line = {
            let stdin = io::stdin();
            let mut buf = String::new();
            match stdin.read_line(&mut buf) {
                Ok(0) => {
                    eprintln!();
                    break; // EOF
                }
                Ok(_) => buf,
                Err(e) => {
                    eprintln!("stdin error: {e}");
                    break;
                }
            }
        };
        // stdin lock is now dropped — safe for the prompter to read.

        let trimmed = line.trim();
        if trimmed.is_empty() {
            continue;
        }
        if matches!(trimmed, "exit" | "quit" | ":q") {
            break;
        }

        if let Some(cmd) = parse_slash_command(trimmed) {
            match dispatch_slash_command(cmd, &mut runtime, &state) {
                SlashOutcome::Continue => continue,
                SlashOutcome::Exit => break,
            }
        }

        crate::tools::set_current_turn_paths(crate::tools::extract_user_prompt_paths(trimmed));

        // Vision: if the line contains image-file path tokens (drag-drop
        // typically pastes them via Windows Terminal), attach them and
        // route directly to `run_turn_with_images`, bypassing the brain
        // selector. The fallback logic is for "stuck" detection on text
        // turns and doesn't apply when we're sending an image.
        let extracted = crate::image_attach::extract_image_attachments_from_input(trimmed);
        if extracted.extension_matches > 0 && extracted.attached.is_empty() {
            if let Some(reason) = &extracted.first_failure {
                eprintln!(
                    "{} {}",
                    theme::WARN_GLYPH,
                    theme::warn(&format!(
                        "image-path detected but couldn't attach: {reason}"
                    ))
                );
            }
        }

        let turn_result: Result<TurnSummary, String> = if extracted.attached.is_empty() {
            // Sprint 14: route through brain_selector so Auto-preset turns get
            // the 4b → 9b escalation when stuck signals fire. On Fast/Smart
            // (no fallback configured) this collapses to the existing
            // run_turn_with_retry behaviour — no overhead.
            let mut prompter_opt: Option<&mut dyn PermissionPrompter> = Some(&mut prompter);
            crate::brain_selector::run_turn_with_fallback(&mut runtime, trimmed, &mut prompter_opt)
        } else {
            let count = extracted.attached.len();
            eprintln!(
                "{} {}",
                theme::SAVE,
                theme::dim(&format!("📎 attached {count} image(s) — routing to vision"))
            );
            let images: Vec<(String, String)> = extracted
                .attached
                .into_iter()
                .map(|a| (a.media_type, a.data_b64))
                .collect();
            runtime
                .run_turn_with_images(trimmed, images, Some(&mut prompter))
                .map_err(|e| e.to_string())
        };

        match turn_result {
            Ok(summary) => {
                // No post-turn re-print: streaming has already pushed every
                // text delta to stdout via `stdout_text_callback`. The model's
                // text terminator newline is also fired by the callback at
                // end-of-stream, so the status line below lands on its own row.

                state.record_turn(summary.usage.input_tokens, summary.usage.output_tokens);
                eprintln!(
                    "{} {}",
                    theme::BOLT,
                    theme::info(&format!(
                        "turn iter={} in={} out={}",
                        summary.iterations, summary.usage.input_tokens, summary.usage.output_tokens,
                    ))
                );

                // Cross-session recall: index the user input + the assistant
                // text from this turn. Best-effort — a transient Ollama
                // outage or a missing embed model emits a single warn line
                // without breaking the REPL. Disable with
                // `CLAUDETTE_RECALL_DISABLE=1` (e.g., privacy, no Ollama).
                if !recall_disabled() {
                    if let Err(e) = index_turn_for_recall(trimmed, &runtime) {
                        eprintln!(
                            "{} {}",
                            theme::warn(theme::WARN_GLYPH),
                            theme::warn(&format!("recall: {e}"))
                        );
                    }
                }

                // the runtime's built-in trigger is disabled (see
                // build_runtime_inner) — we fire our own session-size trigger
                // here instead, AFTER the turn so the model never sees a
                // mid-turn rebuild.
                if let Some(removed) = maybe_compact_session(&mut runtime, false) {
                    eprintln!(
                        "{} {}",
                        theme::SAVE,
                        theme::ok(&format!(
                            "auto-compacted {removed} older message(s) — session was over {}-token threshold",
                            compact_threshold(),
                        ))
                    );
                }

                if opts.autosave {
                    if let Err(e) = save_session(runtime.session()) {
                        // Surface the error but don't drop the REPL — the
                        // session in memory is still valid; only persistence
                        // is broken.
                        eprintln!(
                            "{} {}",
                            theme::warn(theme::WARN_GLYPH),
                            theme::warn(&format!("session save failed: {e:#}"))
                        );
                    }
                }
            }
            Err(e) => {
                eprintln!(
                    "{} {}",
                    theme::error(theme::ERR_GLYPH),
                    theme::error(&format!("turn failed: {e}"))
                );
            }
        }
    }

    Ok(())
}

/// Assemble a `ConversationRuntime` with the secretary's model, tools,
/// executor, prompt, and a permissive policy, around the given session
/// (fresh or restored). Loads `~/.claudette/CLAUDETTE.MD` (if present)
/// and appends it to the system prompt as background memory.
///
/// **No streaming callback installed** — use this from single-shot mode and
/// tests, where the assistant's text is printed via `summary.assistant_messages`
/// after the turn completes. The REPL should call [`build_runtime_streaming`]
/// instead.
///
/// `pub(crate)` so the slash-command dispatcher can rebuild the runtime
/// in-place when the user runs `/reload` (which re-reads the memory file
/// without dropping the conversation history).
pub(crate) fn build_runtime(
    session: Session,
) -> ConversationRuntime<OllamaApiClient, SecretaryToolExecutor> {
    build_runtime_inner(session, false, false)
}

/// Same as [`build_runtime`] but installs the stdout streaming callback so
/// text deltas appear in the terminal as they arrive. Used by the REPL and
/// by every slash command that rebuilds the runtime in place
/// (`/clear`, `/load`, `/reload`, `/compact`).
pub(crate) fn build_runtime_streaming(
    session: Session,
    telegram: bool,
) -> ConversationRuntime<OllamaApiClient, SecretaryToolExecutor> {
    build_runtime_inner(session, true, telegram)
}

fn build_runtime_inner(
    session: Session,
    streaming: bool,
    telegram: bool,
) -> ConversationRuntime<OllamaApiClient, SecretaryToolExecutor> {
    // Sprint 14: pull brain model + limits from the process-global
    // `model_config::active()` snapshot. Slash commands (`/preset`,
    // `/brain`) mutate the active config; the next `build_runtime_*`
    // call (e.g. after `/clear`, `/reload`, or after a fallback turn)
    // picks up the new values.
    let brain = model_config::active().brain;
    build_runtime_with_brain(session, &brain, streaming, telegram)
}

/// Sprint 14: explicit-brain variant of [`build_runtime_streaming`].
/// Used by `brain_selector` to spin up a fallback runtime against a
/// different model (e.g. qwen3.5:9b) while reusing the same session +
/// permission policy + system prompt. `pub(crate)` so it stays internal.
pub(crate) fn build_runtime_with_brain(
    session: Session,
    brain: &crate::model_config::RoleConfig,
    streaming: bool,
    telegram: bool,
) -> ConversationRuntime<OllamaApiClient, SecretaryToolExecutor> {
    // One shared ToolRegistry is the single source of truth for the
    // `tools` field on every request. The API client reads from it (via
    // ToolsProvider::Dynamic) and the executor mutates it when the model
    // calls `enable_tools`. Both halves hold a clone of the Arc so the
    // mutations are immediately visible on the next chat turn.
    //
    // No mode (REPL, single-shot, Telegram) pre-enables groups any more.
    // Pre-rewrite, Telegram auto-enabled five groups so the model could
    // call tools without the enable_tools → tool two-step. The cost
    // (~2,500 tokens of schema on every turn, ~15% of a 16K window) was
    // dominating one-word interactions like "hey". Now everything goes
    // through enable_tools; the brain pays one extra round-trip for the
    // first tool call in a session and saves ~2,300 tokens per turn.
    let reg = ToolRegistry::new();
    let registry = Arc::new(Mutex::new(reg));

    let mut api_client = OllamaApiClient::with_registry(brain.model.clone(), registry.clone())
        .with_context(brain.num_ctx)
        .with_max_predict(brain.num_predict);
    if streaming {
        let cb = if telegram {
            telegram_text_callback()
        } else {
            stdout_text_callback()
        };
        api_client = api_client.with_text_callback(cb);
    }
    // Clone the registry handle for the unknown-tool hinter before the
    // executor consumes it. The hinter maps a confabulated *group* name
    // (e.g. `facts`, `markets`) to that group's actual tools so the brain
    // gets a useful "did you mean?" list instead of an empty array.
    let hinter_registry = Arc::clone(&registry);
    let executor = SecretaryToolExecutor::with_registry(registry);
    let policy = build_permission_policy();
    let memory = try_load_memory();

    ConversationRuntime::new(
        session,
        api_client,
        executor,
        policy,
        secretary_system_prompt_with_memory(memory.as_deref(), telegram),
    )
    // Tools in optional groups need 3+ iterations (enable_tools → tool call
    // → respond). With the empty-response retry nudge, 8 was too tight for
    // single-shot search/grep/git chains. The shared default (currently 40)
    // and the `CLAUDETTE_MAX_ITERATIONS` env-var knob live in `max_iterations`.
    .with_max_iterations(max_iterations())
    .with_auto_compaction_input_tokens_threshold(u32::MAX)
    .with_unknown_tool_hinter(move |name: &str| {
        ToolGroup::parse(name).map_or_else(Vec::new, |group| {
            // Poisoned-lock recovery: another thread held the lock and
            // panicked. Continue with the inner state — the hinter is a
            // best-effort suggestion, not a correctness path.
            let reg = match hinter_registry.lock() {
                Ok(g) => g,
                Err(p) => p.into_inner(),
            };
            reg.group_tool_names(group)
        })
    })
}

// ────────────────────────────────────────────────────────────────────────────
// Permission system
// ────────────────────────────────────────────────────────────────────────────

/// Build the per-tool permission policy. Active mode is `WorkspaceWrite`:
/// read-only and workspace-write tools pass through silently, but tools
/// tagged `DangerFullAccess` trigger the CLI prompter for `[y/N]`
/// confirmation before executing.
pub(crate) fn build_permission_policy() -> PermissionPolicy {
    use PermissionMode::{DangerFullAccess, ReadOnly, WorkspaceWrite};

    PermissionPolicy::new(WorkspaceWrite)
        // ── Read-only (auto-allowed) ────────────────────────────────
        .with_tool_requirement("get_current_time", ReadOnly)
        .with_tool_requirement("note_list", ReadOnly)
        .with_tool_requirement("note_read", ReadOnly)
        .with_tool_requirement("todo_list", ReadOnly)
        // enable_tools: meta-tool, pure in-memory state change, no IO
        .with_tool_requirement("enable_tools", ReadOnly)
        .with_tool_requirement("read_file", ReadOnly)
        .with_tool_requirement("list_dir", ReadOnly)
        .with_tool_requirement("get_capabilities", ReadOnly)
        // load_workspace_rules: reads ~/.claudette/instructions.md on demand
        // (added in the 2026-05-04 token-trim work to lazy-load what used to
        // auto-attach to the system prompt). Read-only.
        .with_tool_requirement("load_workspace_rules", ReadOnly)
        .with_tool_requirement("glob_search", ReadOnly)
        .with_tool_requirement("grep_search", ReadOnly)
        .with_tool_requirement("git_status", ReadOnly)
        .with_tool_requirement("git_diff", ReadOnly)
        .with_tool_requirement("git_log", ReadOnly)
        .with_tool_requirement("git_branch", ReadOnly)
        // ── Workspace-write (auto-allowed) ──────────────────────────
        .with_tool_requirement("note_create", WorkspaceWrite)
        .with_tool_requirement("note_update", WorkspaceWrite)
        .with_tool_requirement("note_delete", WorkspaceWrite)
        .with_tool_requirement("todo_add", WorkspaceWrite)
        .with_tool_requirement("todo_complete", WorkspaceWrite)
        .with_tool_requirement("todo_uncomplete", WorkspaceWrite)
        .with_tool_requirement("todo_delete", WorkspaceWrite)
        .with_tool_requirement("write_file", WorkspaceWrite)
        .with_tool_requirement("generate_code", WorkspaceWrite)
        .with_tool_requirement("web_search", WorkspaceWrite)
        .with_tool_requirement("web_fetch", WorkspaceWrite)
        .with_tool_requirement("open_in_editor", WorkspaceWrite)
        .with_tool_requirement("reveal_in_explorer", WorkspaceWrite)
        .with_tool_requirement("open_url", WorkspaceWrite)
        .with_tool_requirement("add_numbers", WorkspaceWrite)
        .with_tool_requirement("spawn_agent", WorkspaceWrite)
        // ── Sprint 9 Phase 0a: facts group (read-only REST calls) ───
        .with_tool_requirement("wikipedia_search", ReadOnly)
        .with_tool_requirement("wikipedia_summary", ReadOnly)
        .with_tool_requirement("weather_current", ReadOnly)
        .with_tool_requirement("weather_forecast", ReadOnly)
        // ── Sprint 9 Phase 0a: registry group (read-only) ────────────
        .with_tool_requirement("crate_info", ReadOnly)
        .with_tool_requirement("crate_search", ReadOnly)
        .with_tool_requirement("npm_info", ReadOnly)
        .with_tool_requirement("npm_search", ReadOnly)
        // ── Sprint 9 Phase 0a: github group ──────────────────────────
        // Reads: auto-allowed. Writes: WorkspaceWrite (hit the network
        // on the user's behalf but don't touch the filesystem).
        .with_tool_requirement("gh_list_my_prs", ReadOnly)
        .with_tool_requirement("gh_list_assigned_issues", ReadOnly)
        .with_tool_requirement("gh_get_issue", ReadOnly)
        .with_tool_requirement("gh_search_code", ReadOnly)
        .with_tool_requirement("gh_list_repo_issues", ReadOnly)
        .with_tool_requirement("gh_pr_status", ReadOnly)
        .with_tool_requirement("gh_create_issue", WorkspaceWrite)
        .with_tool_requirement("gh_comment_issue", WorkspaceWrite)
        .with_tool_requirement("gh_fork", WorkspaceWrite)
        .with_tool_requirement("gh_create_pr", WorkspaceWrite)
        // ── Sprint 9 Phase 0b: markets group (all read-only) ─────────
        .with_tool_requirement("tv_get_quote", ReadOnly)
        .with_tool_requirement("tv_technical_rating", ReadOnly)
        .with_tool_requirement("tv_search_symbol", ReadOnly)
        .with_tool_requirement("tv_economic_calendar", ReadOnly)
        .with_tool_requirement("vestige_asa_info", ReadOnly)
        .with_tool_requirement("vestige_search_asa", ReadOnly)
        .with_tool_requirement("vestige_top_movers", ReadOnly)
        // ── Sprint 10: telegram group ────────────────────────────────
        // Reads: auto-allowed. Sends: WorkspaceWrite (posts messages on
        // the user's behalf but doesn't touch the filesystem).
        .with_tool_requirement("tg_get_updates", ReadOnly)
        .with_tool_requirement("tg_send", WorkspaceWrite)
        .with_tool_requirement("tg_send_photo", WorkspaceWrite)
        // ── Life Agent (v0.2.0): calendar group ──────────────────────
        // Reads: auto-allowed. Writes/RSVP: WorkspaceWrite. Delete is
        // irreversible from claudette's side, so DangerFullAccess.
        .with_tool_requirement("calendar_list_events", ReadOnly)
        .with_tool_requirement("calendar_create_event", WorkspaceWrite)
        .with_tool_requirement("calendar_update_event", WorkspaceWrite)
        .with_tool_requirement("calendar_respond_to_event", WorkspaceWrite)
        .with_tool_requirement("calendar_delete_event", DangerFullAccess)
        // ── Life Agent: gmail group (gmail.readonly OAuth scope) ─────
        .with_tool_requirement("gmail_list", ReadOnly)
        .with_tool_requirement("gmail_search", ReadOnly)
        .with_tool_requirement("gmail_read", ReadOnly)
        .with_tool_requirement("gmail_list_labels", ReadOnly)
        // ── Life Agent: schedule group ───────────────────────────────
        .with_tool_requirement("schedule_list", ReadOnly)
        .with_tool_requirement("schedule_once", WorkspaceWrite)
        .with_tool_requirement("schedule_recurring", WorkspaceWrite)
        .with_tool_requirement("schedule_cancel", WorkspaceWrite)
        // ── Recall (cross-session memory): pure search ───────────────
        .with_tool_requirement("recall", ReadOnly)
        // ── Dangerous (ALWAYS prompts for [y/N] confirmation) ────��──
        .with_tool_requirement("bash", DangerFullAccess)
        .with_tool_requirement("edit_file", DangerFullAccess)
        .with_tool_requirement("git_add", DangerFullAccess)
        .with_tool_requirement("git_commit", DangerFullAccess)
        .with_tool_requirement("git_push", DangerFullAccess)
        .with_tool_requirement("git_checkout", DangerFullAccess)
        // Brownfield: git_clone writes a fresh tree under the controlled
        // ~/.claudette/missions/ root. Auto-allowed (WorkspaceWrite).
        .with_tool_requirement("git_clone", WorkspaceWrite)
        // ── T2 brownfield: mission_* tools ──────────────────────────────
        // mission_status / mission_list / mission_attach only read state
        // (attach loads a marker + flips an in-memory slot; downstream
        // writes still go through their own gates). mission_exit mutates
        // session state with no FS writes. mission_start clones into
        // ~/.claudette/missions/ (WorkspaceWrite, matching git_clone).
        // mission_submit stages/commits/pushes/opens a PR — must be
        // DangerFullAccess to match its worst action (`git push -u`).
        .with_tool_requirement("mission_start", WorkspaceWrite)
        .with_tool_requirement("mission_status", ReadOnly)
        .with_tool_requirement("mission_list", ReadOnly)
        .with_tool_requirement("mission_attach", ReadOnly)
        .with_tool_requirement("mission_exit", WorkspaceWrite)
        .with_tool_requirement("mission_submit", DangerFullAccess)
}

// ────────────────────────────────────────────────────────────────────────────
// Cross-session recall hooks
// ────────────────────────────────────────────────────────────────────────────

/// Whether the post-turn recall indexing is disabled. Off-by-default
/// privacy/perf escape hatch: `CLAUDETTE_RECALL_DISABLE=1`. Anything else
/// (unset, "0", garbage) leaves indexing enabled.
fn recall_disabled() -> bool {
    matches!(
        std::env::var("CLAUDETTE_RECALL_DISABLE").as_deref(),
        Ok("1")
    )
}

/// Index the user input string + the most recent assistant text-blocks
/// into the cross-session recall store. Skips empty or tool-only messages.
///
/// Why we use `user_input` directly instead of walking back to find the
/// "latest user message": on retries, the runtime injects a synthetic
/// nudge user-message into the session (see [`run_turn_with_retry`]). The
/// raw `trimmed` REPL line is what the human actually typed, so we index
/// that and skip the synthetic.
fn index_turn_for_recall(
    user_input: &str,
    runtime: &ConversationRuntime<OllamaApiClient, SecretaryToolExecutor>,
) -> Result<(), String> {
    use crate::recall::{global_index, Role};
    use crate::ContentBlock;

    let user_text = user_input.trim();
    if !user_text.is_empty() {
        global_index(Role::User, user_text)?;
    }

    if let Some(msg) = runtime
        .session()
        .messages
        .iter()
        .rev()
        .find(|m| matches!(m.role, crate::MessageRole::Assistant))
    {
        let mut text = String::new();
        for block in &msg.blocks {
            if let ContentBlock::Text { text: t } = block {
                if !text.is_empty() {
                    text.push('\n');
                }
                text.push_str(t);
            }
        }
        if !text.trim().is_empty() {
            global_index(Role::Assistant, &text)?;
        }
    }

    Ok(())
}

/// Interactive CLI prompter. Prints tool name + a preview of the input,
/// asks `[y/N]`, reads one line from stdin. Used by the REPL and by
/// spawned agents in normal mode (dangerous tools bubble up to the user).
/// The single-shot path passes `None` (no prompter → dangerous tools denied).
pub struct CliPrompter;

impl PermissionPrompter for CliPrompter {
    fn decide(&mut self, request: &PermissionRequest) -> PermissionPromptDecision {
        let stderr = io::stderr();
        let mut err = stderr.lock();
        let _ = writeln!(err);
        let input_chars = request.input.chars().count();
        let _ = writeln!(
            err,
            "  {} {} wants to run ({} chars):",
            theme::warn(theme::WARN_GLYPH),
            theme::accent(&request.tool_name),
            input_chars
        );
        // Show the full command. The old code truncated at 200 chars, which
        // let an adversary-crafted payload hide past the preview edge while
        // bash ran the complete input. Split on newlines so multi-line
        // commands stay readable. `str::lines()` handles a trailing-newline-
        // less single-line case correctly — yields the one line.
        if request.input.is_empty() {
            let _ = writeln!(err, "    {}", theme::dim("(empty input)"));
        } else {
            for line in request.input.lines() {
                let _ = writeln!(err, "    {}", theme::dim(line));
            }
        }
        let _ = write!(err, "  Allow? [y/N] ");
        let _ = err.flush();

        let stdin = io::stdin();
        let mut buf = String::new();
        match stdin.read_line(&mut buf) {
            Ok(_) => {
                let answer = buf.trim().to_lowercase();
                if answer == "y" || answer == "yes" {
                    PermissionPromptDecision::Allow
                } else {
                    PermissionPromptDecision::Deny {
                        reason: "user denied permission".to_string(),
                    }
                }
            }
            Err(_) => PermissionPromptDecision::Deny {
                reason: "could not read user input".to_string(),
            },
        }
    }
}

/// The nudge message appended when the model returns an empty response.
/// Tells the model to use `enable_tools` instead of giving up.
const EMPTY_RESPONSE_NUDGE: &str =
    "Your response was empty. If you need a tool that isn't available, \
     call enable_tools(group) to load it first, then call the tool. \
     Otherwise, answer the question directly with text.";

/// Run a turn with auto-retry on empty response. When the model returns
/// "no content" (common when qwen3:8b wants a tool not in the current
/// schema), this injects a nudge message and retries once. Both the REPL
/// and Telegram mode use this.
pub(crate) fn run_turn_with_retry(
    runtime: &mut ConversationRuntime<OllamaApiClient, SecretaryToolExecutor>,
    input: &str,
    prompter: Option<&mut dyn PermissionPrompter>,
) -> Result<TurnSummary, String> {
    // Stash any file paths from the raw user input — covers Telegram (its
    // single call site) plus any future caller of run_turn_with_retry.
    crate::tools::set_current_turn_paths(crate::tools::extract_user_prompt_paths(input));

    // First attempt.
    match runtime.run_turn(input, prompter) {
        Ok(summary) => return Ok(summary),
        Err(e) => {
            let msg = e.to_string();
            if !msg.contains("no content") {
                return Err(msg);
            }
            // Empty response — retry with a nudge.
            eprintln!(
                "  {} {}",
                theme::dim("▸"),
                theme::dim("empty response — retrying with enable_tools hint...")
            );
        }
    }
    // Retry: feed the nudge as a new user turn so the model gets another chance.
    // No prompter on retry — the nudge is a system-level message, not user input.
    runtime
        .run_turn(EMPTY_RESPONSE_NUDGE, None)
        .map_err(|e| e.to_string())
}

/// Check whether the runtime's session is over the compaction threshold
/// and, if so, compact it in place. Returns `Some(removed)` if compaction
/// happened, `None` otherwise.
///
/// Called from [`run_secretary_repl`] after every model turn. The metric
/// is `crate::estimate_session_tokens` (a char-count heuristic that
/// scales with the actual session size), not the cumulative input-token
/// counter that grows monotonically.
///
/// **Tiered behaviour (P3, 2026-05-04 queue):**
/// - At/above [`compact_threshold`] (1M default): hard compact, preserves
///   [`HARD_COMPACT_PRESERVE`] recent messages.
/// - At/above [`soft_compact_threshold`] but below the hard threshold:
///   soft compact, preserves [`SOFT_COMPACT_PRESERVE`] recent messages.
///   Only fires when the user opts in via `CLAUDETTE_SOFT_COMPACT_THRESHOLD`.
/// - Below both: no-op.
pub(crate) fn maybe_compact_session(
    runtime: &mut ConversationRuntime<OllamaApiClient, SecretaryToolExecutor>,
    telegram: bool,
) -> Option<usize> {
    let estimated = estimate_session_tokens(runtime.session());
    let preserve = pick_compact_preserve(estimated, compact_threshold(), soft_compact_threshold())?;
    let result = compact_session(
        runtime.session(),
        CompactionConfig {
            preserve_recent_messages: preserve,
            // 0 means "force the should_compact gate" — we're already past
            // the size threshold so we want compaction to actually fire.
            max_estimated_tokens: 0,
        },
    );
    if result.removed_message_count == 0 {
        return None;
    }
    let removed = result.removed_message_count;
    *runtime = build_runtime_streaming(result.compacted_session, telegram);
    Some(removed)
}

/// Choose how many recent messages to preserve based on the session's
/// estimated token count and the two thresholds. Returns `None` when
/// neither threshold is crossed (no compaction).
///
/// Pure function — separates the tiering policy from the runtime-mutating
/// half of `maybe_compact_session` so it can be unit-tested without
/// constructing a runtime.
#[must_use]
fn pick_compact_preserve(
    estimated: usize,
    hard_threshold: usize,
    soft_threshold: Option<usize>,
) -> Option<usize> {
    if estimated >= hard_threshold {
        return Some(HARD_COMPACT_PRESERVE);
    }
    if let Some(soft) = soft_threshold {
        if estimated >= soft {
            return Some(SOFT_COMPACT_PRESERVE);
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{ContentBlock, ConversationMessage, MessageRole};
    use std::sync::Mutex;

    /// `std::env::set_var` is process-global and races between parallel
    /// tests. Only the env-var-touching test takes this lock; the rest use
    /// explicit paths via `save_session_at` / `try_load_session_at`.
    static ENV_LOCK: Mutex<()> = Mutex::new(());

    /// Build a unique temp file path for this test invocation. Caller is
    /// responsible for cleaning it up.
    fn temp_session_file(label: &str) -> PathBuf {
        let dir = std::env::temp_dir().join("claudette-test-sessions");
        let _ = std::fs::create_dir_all(&dir);
        dir.join(format!(
            "{label}-{}-{}.json",
            std::process::id(),
            std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map_or(0, |d| d.as_nanos())
        ))
    }

    #[test]
    fn default_session_path_honors_env_var() {
        let _guard = ENV_LOCK.lock().unwrap();
        let path = temp_session_file("env-var");
        let prev = std::env::var("CLAUDETTE_SESSION").ok();
        std::env::set_var("CLAUDETTE_SESSION", &path);

        let resolved = default_session_path();
        assert_eq!(resolved, path);

        match prev {
            Some(v) => std::env::set_var("CLAUDETTE_SESSION", v),
            None => std::env::remove_var("CLAUDETTE_SESSION"),
        }
    }

    #[test]
    fn save_then_load_round_trip() {
        let path = temp_session_file("round-trip");
        let mut session = Session::default();
        session.messages.push(ConversationMessage {
            role: MessageRole::User,
            blocks: vec![ContentBlock::Text {
                text: "remember this".to_string(),
            }],
            usage: None,
        });

        save_session_at(&session, &path).expect("save should succeed");
        let loaded = try_load_session_at(&path)
            .expect("load should not error")
            .expect("session should be present");

        assert_eq!(loaded.messages.len(), 1);
        if let ContentBlock::Text { text } = &loaded.messages[0].blocks[0] {
            assert_eq!(text, "remember this");
        } else {
            panic!("expected text block");
        }

        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn try_load_returns_none_when_missing() {
        let path = temp_session_file("missing");
        let _ = std::fs::remove_file(&path); // belt-and-braces
        let result = try_load_session_at(&path).expect("missing file should not error");
        assert!(result.is_none());
    }

    #[test]
    fn compact_threshold_default_when_env_var_unset() {
        let _guard = ENV_LOCK.lock().unwrap();
        let prev = std::env::var("CLAUDETTE_COMPACT_THRESHOLD").ok();
        std::env::remove_var("CLAUDETTE_COMPACT_THRESHOLD");

        assert_eq!(compact_threshold(), DEFAULT_COMPACT_THRESHOLD);

        if let Some(v) = prev {
            std::env::set_var("CLAUDETTE_COMPACT_THRESHOLD", v);
        }
    }

    #[test]
    fn compact_threshold_honors_env_var() {
        let _guard = ENV_LOCK.lock().unwrap();
        let prev = std::env::var("CLAUDETTE_COMPACT_THRESHOLD").ok();
        std::env::set_var("CLAUDETTE_COMPACT_THRESHOLD", "12345");

        assert_eq!(compact_threshold(), 12345);

        match prev {
            Some(v) => std::env::set_var("CLAUDETTE_COMPACT_THRESHOLD", v),
            None => std::env::remove_var("CLAUDETTE_COMPACT_THRESHOLD"),
        }
    }

    #[test]
    fn compact_threshold_falls_back_on_garbage() {
        let _guard = ENV_LOCK.lock().unwrap();
        let prev = std::env::var("CLAUDETTE_COMPACT_THRESHOLD").ok();
        std::env::set_var("CLAUDETTE_COMPACT_THRESHOLD", "not-a-number");

        assert_eq!(compact_threshold(), DEFAULT_COMPACT_THRESHOLD);

        match prev {
            Some(v) => std::env::set_var("CLAUDETTE_COMPACT_THRESHOLD", v),
            None => std::env::remove_var("CLAUDETTE_COMPACT_THRESHOLD"),
        }
    }

    #[test]
    fn maybe_compact_session_no_op_when_under_threshold() {
        let _guard = ENV_LOCK.lock().unwrap();
        let prev = std::env::var("CLAUDETTE_COMPACT_THRESHOLD").ok();
        std::env::set_var("CLAUDETTE_COMPACT_THRESHOLD", "1000000");

        // Build a runtime around a tiny session — well under 1M tokens.
        let mut session = Session::default();
        session.messages.push(ConversationMessage {
            role: MessageRole::User,
            blocks: vec![ContentBlock::Text {
                text: "tiny".to_string(),
            }],
            usage: None,
        });
        let messages_before = session.messages.len();
        let mut runtime = build_runtime(session);

        let result = maybe_compact_session(&mut runtime, false);
        assert!(
            result.is_none(),
            "should not compact when session is under threshold"
        );
        assert_eq!(runtime.session().messages.len(), messages_before);

        match prev {
            Some(v) => std::env::set_var("CLAUDETTE_COMPACT_THRESHOLD", v),
            None => std::env::remove_var("CLAUDETTE_COMPACT_THRESHOLD"),
        }
    }

    #[test]
    fn maybe_compact_session_fires_when_over_threshold() {
        let _guard = ENV_LOCK.lock().unwrap();
        let prev = std::env::var("CLAUDETTE_COMPACT_THRESHOLD").ok();
        // Threshold of 10 tokens — every realistic session crosses this.
        std::env::set_var("CLAUDETTE_COMPACT_THRESHOLD", "10");

        // Build a session with enough messages to hit the
        // CompactionConfig::preserve_recent_messages = 4 floor; we need
        // strictly more than 4 messages or compact_session is a no-op.
        let mut session = Session::default();
        for i in 0..8 {
            session.messages.push(ConversationMessage {
                role: MessageRole::User,
                blocks: vec![ContentBlock::Text {
                    text: format!("turn {i} content padded long enough to register"),
                }],
                usage: None,
            });
        }
        let mut runtime = build_runtime(session);
        let messages_before = runtime.session().messages.len();

        let result = maybe_compact_session(&mut runtime, false);
        let removed = result.expect("expected compaction to fire");
        assert!(removed > 0, "should remove at least one message");
        // After compaction the runtime is rebuilt around the compacted
        // session. The replacement carries the System summary message
        // plus the preserved tail, so total < before.
        assert!(runtime.session().messages.len() < messages_before);

        match prev {
            Some(v) => std::env::set_var("CLAUDETTE_COMPACT_THRESHOLD", v),
            None => std::env::remove_var("CLAUDETTE_COMPACT_THRESHOLD"),
        }
    }

    #[test]
    fn save_creates_parent_directory() {
        let path = temp_session_file("nested")
            .parent()
            .unwrap()
            .join("nested-subdir")
            .join("session.json");
        let _ = std::fs::remove_dir_all(path.parent().unwrap());

        save_session_at(&Session::default(), &path).expect("save should create parents");
        assert!(path.exists());

        let _ = std::fs::remove_dir_all(path.parent().unwrap());
    }

    // ─── Tiered compaction policy (P3) ──────────────────────────────────────

    #[test]
    fn pick_compact_preserve_returns_none_below_both_thresholds() {
        assert_eq!(
            pick_compact_preserve(50_000, 1_000_000, Some(200_000)),
            None
        );
        assert_eq!(pick_compact_preserve(50_000, 1_000_000, None), None);
    }

    #[test]
    fn pick_compact_preserve_returns_soft_when_only_soft_crossed() {
        assert_eq!(
            pick_compact_preserve(250_000, 1_000_000, Some(200_000)),
            Some(SOFT_COMPACT_PRESERVE)
        );
    }

    #[test]
    fn pick_compact_preserve_returns_hard_when_hard_crossed() {
        assert_eq!(
            pick_compact_preserve(1_500_000, 1_000_000, Some(200_000)),
            Some(HARD_COMPACT_PRESERVE)
        );
    }

    #[test]
    fn pick_compact_preserve_prefers_hard_over_soft_when_both_crossed() {
        // At >= hard, the soft tier is skipped — we want maximally aggressive
        // summarisation when the session is genuinely huge.
        assert_eq!(
            pick_compact_preserve(2_000_000, 1_000_000, Some(200_000)),
            Some(HARD_COMPACT_PRESERVE)
        );
    }

    #[test]
    fn pick_compact_preserve_skips_soft_when_threshold_unset() {
        // No CLAUDETTE_SOFT_COMPACT_THRESHOLD set → only the hard threshold
        // gates compaction, preserving the historical one-tier behaviour.
        assert_eq!(pick_compact_preserve(500_000, 1_000_000, None), None);
    }

    #[test]
    fn soft_compact_threshold_returns_none_when_unset() {
        let _guard = ENV_LOCK.lock().unwrap();
        let prev = std::env::var("CLAUDETTE_SOFT_COMPACT_THRESHOLD").ok();
        std::env::remove_var("CLAUDETTE_SOFT_COMPACT_THRESHOLD");

        assert_eq!(soft_compact_threshold(), None);

        if let Some(v) = prev {
            std::env::set_var("CLAUDETTE_SOFT_COMPACT_THRESHOLD", v);
        }
    }

    #[test]
    fn soft_compact_threshold_returns_some_when_set() {
        let _guard = ENV_LOCK.lock().unwrap();
        let prev = std::env::var("CLAUDETTE_SOFT_COMPACT_THRESHOLD").ok();
        std::env::set_var("CLAUDETTE_SOFT_COMPACT_THRESHOLD", "200000");

        assert_eq!(soft_compact_threshold(), Some(200_000));

        match prev {
            Some(v) => std::env::set_var("CLAUDETTE_SOFT_COMPACT_THRESHOLD", v),
            None => std::env::remove_var("CLAUDETTE_SOFT_COMPACT_THRESHOLD"),
        }
    }

    #[test]
    fn soft_compact_threshold_treats_zero_as_unset() {
        // 0 is a magic "disabled" value — explicit opt-out via env without
        // having to unset.
        let _guard = ENV_LOCK.lock().unwrap();
        let prev = std::env::var("CLAUDETTE_SOFT_COMPACT_THRESHOLD").ok();
        std::env::set_var("CLAUDETTE_SOFT_COMPACT_THRESHOLD", "0");

        assert_eq!(soft_compact_threshold(), None);

        match prev {
            Some(v) => std::env::set_var("CLAUDETTE_SOFT_COMPACT_THRESHOLD", v),
            None => std::env::remove_var("CLAUDETTE_SOFT_COMPACT_THRESHOLD"),
        }
    }

    /// Regression test: every tool name advertised in `secretary_tools_json`
    /// must have a matching entry in `build_permission_policy()` so the
    /// unknown-tool short-circuit (added v0.2.3) does not swallow real tools
    /// before they reach the dispatcher.
    ///
    /// This is the bug class that hit v0.3.0–v0.3.1: the v0.2.0 Life Agent
    /// groups (calendar / gmail / schedule) were never registered in the
    /// permission policy, so every call returned `{"error":"unknown tool"}`
    /// and the morning briefing hallucinated to cover. Fixed in v0.3.1, but
    /// the only thing keeping it fixed without this test is hand-discipline.
    /// (Companion to `every_advertised_tool_is_classified` in
    /// `tool_groups.rs`, which catches the analogous schema↔registry gap.)
    #[test]
    fn every_advertised_tool_has_permission_requirement() {
        let policy = build_permission_policy();
        let full = crate::tools::secretary_tools_json();
        let arr = full.as_array().cloned().unwrap_or_default();

        let mut missing: Vec<String> = Vec::new();
        for tool in arr {
            let Some(name) = tool
                .pointer("/function/name")
                .and_then(serde_json::Value::as_str)
                .map(str::to_string)
            else {
                continue;
            };
            if !policy.is_known(&name) {
                missing.push(name);
            }
        }

        assert!(
            missing.is_empty(),
            "tool(s) advertised but not registered in build_permission_policy() — \
             will be swallowed by the unknown-tool short-circuit and never reach \
             the dispatcher: {missing:?}. Add a `.with_tool_requirement(name, ...)` \
             entry."
        );
    }

    /// Regression test: tools that internally invoke other DangerFullAccess
    /// primitives (or take their actions directly) must themselves be
    /// DangerFullAccess so the [y/N] confirmation reaches the user. The
    /// companion test above is name-coverage; this one is tier-correctness.
    /// Without it, downgrading a high-blast-radius tool silently lets a 4b
    /// brain take an irreversible cross-org action.
    #[test]
    fn high_blast_radius_tools_require_danger_tier() {
        let policy = build_permission_policy();
        // (tool_name, why) — each must be DangerFullAccess. Add new entries
        // here whenever a tool gains internal calls into git_push, edit_file,
        // bash, gh_create_pr, or any other already-DangerFullAccess primitive.
        let cases: &[(&str, &str)] =
            &[("mission_submit", "calls git_push + gh_create_pr internally")];
        for (name, why) in cases {
            let actual = policy.required_mode_for(name);
            assert_eq!(
                actual,
                PermissionMode::DangerFullAccess,
                "{name} must be DangerFullAccess: {why}; got {actual:?}"
            );
        }
    }
}