droidsaw 2.0.0 - Docs.rs

//! MCP server transport, feature-gated behind `--features mcp`.
//!
//! Shape 2 of the droidsaw-mcp → droidsaw-core merge. Every tool is a
//! thin delegate to `crate::commands::*` — the same function the CLI
//! dispatcher calls — serialized to the MCP wire instead of stdout.
//! No shadow structs, no duplicated param plumbing: one code path per
//! tool across both transports.
//!
//! `patch` is **not** ported — hermes patch remains outside the
//! umbrella by policy.

use std::sync::{Arc, Mutex};

use crate::mcp::sanitize::{sanitize_to_mcp_error, ErrorCategory};
use rmcp::handler::server::router::tool::ToolRouter;
use rmcp::handler::server::wrapper::Parameters;
use rmcp::model::{
    AnnotateAble, GetPromptRequestParams, GetPromptResult, ListPromptsResult,
    ListResourcesResult, PaginatedRequestParams, RawResource, ReadResourceRequestParams,
    ReadResourceResult, ResourceContents, ServerCapabilities, ServerInfo,
};
use rmcp::{schemars, tool, tool_handler, tool_router, ErrorData as McpError, ServerHandler};
use tokio_util::sync::CancellationToken;

use crate::context::CrossLayerContext;
use crate::mcp::concurrency::{ConcurrencyConfig, ToolClass};

pub mod classify;
pub mod concurrency;
pub mod prompts;
pub mod resources;
pub mod sanitize;
pub mod subprocess;

/// Droidsaw MCP server. Holds one optional `CrossLayerContext`
/// populated by the `load` tool; subsequent tool calls read from
/// it. `Mutex<Option<...>>` matches droidsaw-mcp's existing pattern
/// and works inside `tokio::spawn`-driven rmcp handler dispatch
/// because the `Send` bound is satisfied by the `CrossLayerContext`
/// fields (apk, `HbcOwned` self_cell, `Vec<DexFile>`).
///
/// ## Concurrency discipline (mcp-concurrency-cap)
///
/// Long-running tools (`audit-full`, `decompile-all`, `taint-analyze`)
/// route through `ConcurrencyConfig::acquire` before doing any work.
/// This enforces per-class caps (default: audit-full=1, decompile-all=1,
/// taint=2) and a per-class rate limit (default 8/min). Refused calls
/// return a typed `ConcurrencyExceeded` or `RateLimitExceeded` MCP error
/// rather than silently queuing behind the in-flight call.
///
/// Heavy sync work (`SQLite::open`, `Apk::parse`, subprocess spawns)
/// routes through `tokio::task::spawn_blocking` so it does not block
/// the tokio async executor.
pub struct DroidsawServer {
    state: Arc<Mutex<Option<CrossLayerContext>>>,
    /// Most recently produced audit-DB path, used as a session-level
    /// fallback for tools that take an `Option<db_path>`
    /// (`query`, `investigate`, `taint`, `triage`). Written by the
    /// `audit` handler on success; cleared by `load` (a new file
    /// invalidates the prior session's DB). When a caller passes
    /// `db_path` explicitly, the explicit value wins — server-side
    /// state is a fallback for "I just ran audit and want to drill
    /// in", not authoritative.
    current_db: Arc<Mutex<Option<std::path::PathBuf>>>,
    concurrency: Arc<ConcurrencyConfig>,
    tool_router: ToolRouter<Self>,
    /// Operator-set list of permitted tool classes. Default is
    /// `[ReadOnly, WritesTempfile]`. Operators expand via the
    /// `--allowed-tool-classes` CLI flag or `MCP_ALLOWED_CLASSES` env
    /// var to permit destructive classes. Dispatch refuses tools whose
    /// class is not in this set with `McpError::invalid_params`
    /// (typed `tool-class-not-allowed`).
    allowed_tool_classes: std::collections::BTreeSet<McpToolClass>,
}

// ── tool-class authorization (mcp-tool-auth-audit minimum subset) ──
//
// MCP tools have widely varying blast radius (pure-read vs. spawns-subprocess
// vs. writes-caller-path). The operator policy is set at startup via
// `--allowed-tool-classes`; runtime dispatch refuses out-of-policy invocations
// without executing.

/// Classification of an MCP tool's blast radius. Used by the dispatch gate
/// to enforce operator policy at the tool boundary.
#[derive(
    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash,
    serde::Serialize, serde::Deserialize,
)]
#[serde(rename_all = "kebab-case")]
pub enum McpToolClass {
    /// Pure-read analysis tool. No filesystem writes, no subprocess spawn,
    /// no persistent-state mutation. Default-allowed.
    ReadOnly,
    /// Writes only to droidsaw's tempdir (not caller-controlled).
    /// Default-allowed (load extracts to tempdir).
    WritesTempfile,
    /// Writes to a caller-supplied path. Default-DENIED; operator opts in.
    WritesCallerPath,
    /// Spawns a subprocess (semgrep, trufflehog, etc.). Default-DENIED.
    SpawnsSubprocess,
    /// Mutates persistent state (audit SQLite DB writes, etc.).
    /// Default-DENIED; operator opts in to allow finding triage etc.
    ManagesState,
}

impl McpToolClass {
    /// kebab-case name for CLI/serde interop.
    pub fn as_kebab(self) -> &'static str {
        match self {
            Self::ReadOnly => "read-only",
            Self::WritesTempfile => "writes-tempfile",
            Self::WritesCallerPath => "writes-caller-path",
            Self::SpawnsSubprocess => "spawns-subprocess",
            Self::ManagesState => "manages-state",
        }
    }

    /// Default operator policy: pure-read + tempfile-writes. Operators
    /// must explicitly opt-in to destructive classes.
    pub fn default_allowed() -> std::collections::BTreeSet<Self> {
        let mut set = std::collections::BTreeSet::new();
        set.insert(Self::ReadOnly);
        set.insert(Self::WritesTempfile);
        set
    }

    /// All known classes (for help text and parse-error suggestions).
    pub fn all() -> [Self; 5] {
        [
            Self::ReadOnly,
            Self::WritesTempfile,
            Self::WritesCallerPath,
            Self::SpawnsSubprocess,
            Self::ManagesState,
        ]
    }
}

impl std::str::FromStr for McpToolClass {
    type Err = String;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.trim() {
            "read-only" => Ok(Self::ReadOnly),
            "writes-tempfile" => Ok(Self::WritesTempfile),
            "writes-caller-path" => Ok(Self::WritesCallerPath),
            "spawns-subprocess" => Ok(Self::SpawnsSubprocess),
            "manages-state" => Ok(Self::ManagesState),
            other => Err(format!(
                "unknown tool class '{other}'; valid: {}",
                Self::all()
                    .iter()
                    .map(|c| c.as_kebab())
                    .collect::<Vec<_>>()
                    .join(", ")
            )),
        }
    }
}

/// Classify a tool by its name. Tools not in the table fail-closed to
/// `ManagesState` (most-restricted class) so a newly-added tool without
/// classification is refused under default policy until classified.
///
/// `pub` so the `droidsaw/fuzz` libFuzzer target (`fuzz_mcp_tool_class_dispatch`)
/// can drive this function directly without needing a `DroidsawServer` instance.
/// The function has no side-effects and never panics — those properties are what
/// the fuzz target verifies.
pub fn tool_class(name: &str) -> McpToolClass {
    match name {
        // Filesystem writes to tempdir during load (extracts APK to droidsaw temp).
        "load" => McpToolClass::WritesTempfile,

        // Pure-read analysis: parse + query, no writes, no spawn.
        "manifest" | "signing" | "info" | "query" | "investigate" | "taint"
        | "strings" | "xrefs" | "frida" | "decompile" | "diff"
        // Newly-classified ReadOnly tools (mcp-tool-auth-audit OQ-6):
        | "apk_decompile"    // delegates to commands::decompile; no writes, no spawn
        | "hbc_info"         // reads HBC metadata; no writes
        | "hbc_functions"    // lists HBC functions; pure read
        | "dex_classes"      // lists DEX classes; pure read
        | "dex_methods"      // lists DEX methods; pure read
        | "module_list"      // lists HBC modules; pure read
        | "native_modules"   // lists native ELF modules; pure read
        | "disasm"           // disassembles one HBC function; pure read
        | "npm_packages"     // lists npm package metadata; pure read
        | "call_graph"       // reads call graph; pure read
        | "apk_entries"      // lists APK ZIP entries; pure read
        | "apk_elf"          // lists ELF files in APK; pure read
        | "apk_webview_assets" // reads webview assets in-memory; no writes
        | "apk_resources"    // reads resources.arsc; pure read
        | "apk_sbom"         // SBOM extraction; pure read
        | "apk_scan_corpus"  // reads APKs from caller paths; no writes (+ per-path gate)
        => McpToolClass::ReadOnly,

        // audit: SpawnsSubprocess (most-restrictive — full/semgrep/trufflehog modes spawn
        // subprocesses; basic mode does not, but the single-class policy requires the
        // most-restrictive classification across all modes). OQ-1 main override.
        // NOTE: operators who use `audit` in basic-only mode still need
        // `spawns-subprocess` in their allowed-tool-classes. See CHANGELOG.
        "audit" => McpToolClass::SpawnsSubprocess,

        // triage UPDATEs rows in the audit SQLite DB.
        "triage" => McpToolClass::ManagesState,

        // Caller-path writes (validated via is_allowed_path before executing).
        "apk_export"          // writes SQLite to caller-supplied path
        | "corpus_ingest"     // creates+populates SQLite at caller-supplied path
        | "apk_semgrep_extract" // writes source tree to caller-supplied (or tempdir) path
        | "apk_trufflehog"    // writes strings to caller-supplied path (OQ-3: most-restrictive)
        => McpToolClass::WritesCallerPath,

        // apk_yara: SpawnsSubprocess — libyara compiles and executes caller-supplied rules
        // in-process; the blast-radius posture is equivalent to subprocess execution.
        // OQ-2 main override (conservative classification).
        "apk_yara" => McpToolClass::SpawnsSubprocess,

        // Unknown / unclassified tools: fail-closed to the most-restricted class.
        // Adding a new tool requires updating this table.
        _ => McpToolClass::ManagesState,
    }
}

// ── tool-tier exposure (operator-set, separate from tool-class auth) ─
//
// The operator can narrow the visible tool surface to a curated
// "core workflow" subset. The full surface (35+ tools) is great for a
// power user / developer poking at every layer; it's noise for an LLM
// agent doing routine triage. Tiering filters the visible surface so
// the model only sees the tools it actually needs.
//
// Orthogonal to tool-class: class governs what an operator can *do*
// (read vs write vs spawn); tier governs how much surface is *visible*
// to the model. A Basic-tier agent still routes destructive ops
// through the class gate; a Full-tier agent still sees the class
// refusal when it tries one without permission.

/// Visibility tier for an MCP tool. Set by the operator at server
/// startup via `--tool-tier`. Default `Full` preserves the
/// pre-tiering behavior — every tool is visible.
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum McpToolTier {
    /// Curated core-workflow surface (12 tools): `load`, `info`,
    /// `manifest`, `signing`, `audit`, `query`, `investigate`,
    /// `taint`, `triage`, `decompile`, `strings`, `xrefs`. Enough
    /// to load an APK, run the detector pipeline, drill into
    /// findings, and triage them.
    Basic,
    /// Full surface — every registered tool. Default.
    Full,
}

impl std::str::FromStr for McpToolTier {
    type Err = String;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.trim() {
            "basic" => Ok(Self::Basic),
            "full"  => Ok(Self::Full),
            other => Err(format!(
                "unknown tool tier '{other}'; valid: basic, full"
            )),
        }
    }
}

/// The 12-tool Basic-tier surface. Every other registered tool
/// is hidden when the operator sets `--tool-tier=basic`.
///
/// Selection rationale:
/// - `load` + `info` — required entry points, must remain visible.
/// - `manifest` + `signing` — security-relevant APK metadata; the
///   answer to "is this APK adversarial?" routinely starts here.
/// - `audit` — the detector pipeline (single tool, modular modes).
/// - `query` + `investigate` + `taint` + `triage` — close the
///   audit-to-decision loop on the persisted DB.
/// - `decompile` + `strings` + `xrefs` — the three primitive
///   inspections an agent needs to validate a finding.
///
/// Notably EXCLUDED from Basic (still available via Full):
/// `hbc_info`, `hbc_functions`, `dex_classes`, `dex_methods`,
/// `disasm`, `module_list`, `npm_packages`, `call_graph`,
/// `native_modules`, `frida`, `diff`, `apk_decompile`, `apk_export`,
/// `apk_elf`, `apk_entries`, `apk_resources`, `apk_sbom`,
/// `apk_scan_corpus`, `apk_semgrep_extract`, `apk_trufflehog`,
/// `apk_webview_assets`, `apk_yara`, `corpus_ingest`. Most of
/// these are subsumed by `audit` + `decompile` + `query` for
/// routine triage; the rest are specialist tools.
pub const BASIC_TIER_TOOLS: &[&str] = &[
    "load",
    "info",
    "manifest",
    "signing",
    "audit",
    "query",
    "investigate",
    "taint",
    "triage",
    "decompile",
    "strings",
    "xrefs",
];

/// Classify a tool by its visibility tier. Tools in
/// [`BASIC_TIER_TOOLS`] are `Basic`; everything else is `Full`.
/// This is the inverse of [`tool_class`]'s fail-closed default —
/// an unknown tool defaults to `Full`, i.e. hidden under Basic.
fn tool_tier(name: &str) -> McpToolTier {
    if BASIC_TIER_TOOLS.contains(&name) {
        McpToolTier::Basic
    } else {
        McpToolTier::Full
    }
}

// ── param structs ──────────────────────────────────────────────────

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct LoadFileParams {
    /// Path to an APK, HBC, or DEX file.
    pub path: String,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct NoParams {}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct StringsParams {
    /// Regex filter applied to each string.
    #[serde(default)]
    pub search: Option<String>,
    /// Minimum string length. Default 4 for `layer:native`; 0 for other layers.
    #[serde(default)]
    pub min_length: Option<usize>,
    /// Cap on returned items (default 200; pass a larger value explicitly).
    #[serde(default = "default_strings_limit")]
    pub limit: Option<usize>,
    /// Layer filter: `"dex"`, `"hbc"`, `"native"`, or omit for all layers.
    /// `"native"` surfaces strings from .rodata + .dynstr of every .so in
    /// the APK. Minimum-length default is 4 when `layer` is `"native"`.
    #[serde(default)]
    pub layer: Option<String>,
}

fn default_strings_limit() -> Option<usize> {
    Some(200)
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct SearchParams {
    /// Regex filter.
    #[serde(default)]
    pub search: Option<String>,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct XrefsParams {
    /// Regex filter over the string being referenced.
    #[serde(default)]
    pub search: Option<String>,
    /// Cap on returned xrefs.
    #[serde(default)]
    pub limit: Option<usize>,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct FridaParams {
    /// Regex pattern; functions referencing matching strings get hooks.
    pub search: String,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct DecompileParams {
    /// Function ID (Hermes) or class name (DEX). Optional if `all=true`.
    #[serde(default)]
    pub target: Option<String>,
    /// Emit valid JavaScript (Hermes only).
    #[serde(default)]
    pub js: bool,
    /// Decompile every function.
    #[serde(default)]
    pub all: bool,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct ManifestParams {
    /// Opt back into hard-fail-on-unknown-chunk parsing. Default
    /// (`false`) is lenient: unknown AXML chunk types (e.g. the
    /// `0x0104` commercial-obfuscator marker observed on
    /// DexGuard-protected builds) are skipped and reported as
    /// `_meta.warnings` rather than crashing the parse. Set to `true`
    /// only when the caller specifically wants to detect malformed /
    /// non-standard AXML at the entry point.
    #[serde(default)]
    pub strict: bool,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct AuditFullParams {
    /// Minimum entropy threshold for high-entropy string scan (bits/char).
    #[serde(default = "default_entropy")]
    pub entropy: f32,
    /// Directory to write extracted source into. Defaults to a temp dir.
    pub output: Option<String>,
    /// Detector selector — `"basic" | "full" | "semgrep" | "trufflehog"`.
    /// Default = `"basic"` (parser-side findings + bundled YARA only;
    /// no subprocess spawns; ~10–30 sec wall on most APKs). `"full"`
    /// is the prior all-or-nothing audit (basic + semgrep + trufflehog).
    /// `"semgrep"` and `"trufflehog"` overlay one subprocess each on
    /// top of basic. Parsed by `AuditMode::from_cli_str`; unknown values
    /// surface as a typed error.
    #[serde(default)]
    pub mode: Option<String>,
    /// Whether to upsert into the existing audit DB (default: `true`).
    /// When `false`, the existing rows for this mode are cleared before
    /// the new run inserts. Re-running the same mode with `update_db =
    /// true` does NOT duplicate findings (UPSERT by stable identity:
    /// `(layer, id_tag, source, detail)` tuple via SHA-256). Re-running
    /// a different mode adds rows under a new `mode` tag.
    #[serde(default)]
    pub update_db: Option<bool>,
}

/// Generate investigation leads from audit results.
/// Returns starter queries (self-service SQL) and capped finding prompts
/// (all Critical + top 3 unique High + top 3 unique Medium, deduplicated).
fn generate_investigation_leads(
    obj: &serde_json::Map<String, serde_json::Value>,
    db_path: &str,
) -> serde_json::Value {
    let taint_count = obj.get("taint_flow_count")
        .and_then(|v| v.as_u64())
        .unwrap_or(0);
    let finding_count = obj.get("finding_count")
        .and_then(|v| v.as_u64())
        .unwrap_or(0);
    let semgrep_ran = obj.get("semgrep")
        .and_then(|v| {
            // `serde_json::Value::index` returns `Value::Null` for missing keys
            // or wrong-typed values rather than panicking; `.as_bool()` on `Null`
            // returns `None`, so `.and_then` short-circuits safely.
            #[allow(
                clippy::indexing_slicing,
                reason = "serde_json::Value indexing returns Null on miss, not panic"
            )]
            v["semgrep_scan"]["ran"].as_bool()
        })
        .unwrap_or(false);

    // Views are pre-created in the DB. Agent just does SELECT * FROM <view>.
    let mut views: Vec<serde_json::Value> = vec![
        serde_json::json!({"view": "audit_summary", "description": "One-row summary: counts of findings, taint flows, secrets, semgrep hits, xrefs"}),
        serde_json::json!({"view": "actionable_findings", "description": "Critical/High semantic findings (noise filtered)"}),
        serde_json::json!({"view": "finding_context", "description": "Findings joined with their xrefs — what strings do flagged classes reference?"}),
        serde_json::json!({"view": "finding_urls", "description": "URLs and deep links referenced by classes with findings"}),
    ];

    if taint_count > 0 {
        views.push(serde_json::json!({"view": "taint_critical", "description": "High/Critical taint flows with source→sink types"}));
    }
    if semgrep_ran {
        views.push(serde_json::json!({"view": "semgrep_hotspots", "description": "Classes with most semgrep findings + which rules hit"}));
    }

    // Build finding-scoped prompts: all Critical + top 3 Medium+.
    let mut finding_prompts: Vec<serde_json::Value> = Vec::new();

    // General data prompt — orientation only. Expert review lives in analyze-apk prompt.
    let semgrep_note = if semgrep_ran { ", semgrep results persisted" } else { "" };
    finding_prompts.push(serde_json::json!({
        "scope": "general",
        "prompt": format!("\
{finding_count} findings, {taint_count} taint flows{semgrep_note}. Audit DB at {db_path}.
Orient with SELECT * FROM audit_summary, then SELECT * FROM actionable_findings. Views are pre-built — see the views list. Use investigate, decompile, xrefs, and manifest to investigate. Filter on gauge_class='Semantic' to skip noise."),
    }));

    // Per-finding prompts from the DB.
    // All Critical + top 3 unique High (by id_tag) + top 3 unique Medium.
    // Deduplicates by id_tag to avoid prompt blowup from repeated YARA hits
    // on binary assets.
    if !db_path.is_empty()
        && let Ok(db) = rusqlite::Connection::open_with_flags(
            db_path,
            rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY,
        )
    {
            // Pick the first (lowest rowid) representative per id_tag per severity band.
            let sql = "\
                SELECT rowid, severity, id_tag, detail FROM findings \
                WHERE severity IN ('Critical', 'High', 'Medium') \
                AND gauge_class = 'Semantic' \
                GROUP BY severity, id_tag \
                HAVING rowid = MIN(rowid) \
                ORDER BY CASE severity \
                    WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 WHEN 'Medium' THEN 2 \
                END, rowid";
            if let Ok(mut stmt) = db.prepare(sql)
                && let Ok(mut rows) = stmt.query([])
            {
                let mut high_count = 0usize;
                let mut medium_count = 0usize;
                while let Ok(Some(row)) = rows.next() {
                    let rowid: i64 = row.get(0).unwrap_or(0);
                    let severity: String = row.get(1).unwrap_or_default();
                    let id_tag: String = row.get(2).unwrap_or_default();
                    let detail: String = row.get(3).unwrap_or_default();

                    // All Critical, top 3 unique High, top 3 unique Medium
                    match severity.as_str() {
                        "Critical" => {}
                        "High" => {
                            if high_count >= 3 { continue; }
                            // WHY: display counter capped at 3 by the guard above; saturating_add is exact here.
                            high_count = high_count.saturating_add(1);
                        }
                        "Medium" => {
                            if medium_count >= 3 { continue; }
                            // WHY: display counter capped at 3 by the guard above; saturating_add is exact here.
                            medium_count = medium_count.saturating_add(1);
                        }
                        _ => continue,
                    }

                    // Count how many total findings share this id_tag
                    let siblings = db.query_row(
                        "SELECT COUNT(*) FROM findings WHERE id_tag = ?1 AND severity = ?2",
                        rusqlite::params![&id_tag, &severity],
                        |r| r.get::<_, i64>(0),
                    ).unwrap_or(1);

                    let plural = if siblings > 1 { "s" } else { "" };
                    finding_prompts.push(serde_json::json!({
                        "scope": "finding",
                        "finding_rowid": rowid,
                        "severity": severity,
                        "id_tag": id_tag,
                        "total_with_this_tag": siblings,
                        "prompt": format!("\
Investigate {severity} finding: {id_tag} ({siblings} instance{plural})
Representative: #{rowid} — {detail}

Determine whether this is a true positive or noise (e.g., YARA pattern matching a binary asset vs. actual crypto misuse in code). If real, assess the impact and what an attacker could do with it. Use decompile to read the source, plus xrefs, finding_xrefs, and manifest to build context."),
                    }));
                }
            }
    }

    let mut leads = serde_json::Map::new();
    leads.insert("views".into(), serde_json::json!(views));
    leads.insert("finding_prompts".into(), serde_json::json!(finding_prompts));
    leads.insert("db_path".into(), serde_json::json!(db_path));

    serde_json::Value::Object(leads)
}

fn default_entropy() -> f32 {
    4.5
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct ExportParams {
    /// Output SQLite file path.
    pub output: String,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct DisasmParams {
    /// Hermes function id (from the `hbc_functions` tool).
    pub func_id: u32,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct CallGraphParams {
    /// Regex filter on caller function name.
    #[serde(default)]
    pub search: Option<String>,
    /// Cap on returned edges (default 50).
    #[serde(default)]
    pub limit: Option<usize>,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct DexDecompileParams {
    /// 0-based global class index across all loaded DEX files.
    #[serde(default)]
    pub class_index: Option<usize>,
    /// Regex on class descriptor (JVM format, e.g. `Lcom/example/Foo;`).
    #[serde(default)]
    pub search: Option<String>,
    /// Output mode: "full" (default; complete decompiled source) or
    /// "outline" (class header + method signatures + first ~20 lines
    /// per body, with `// ... N more lines elided` markers). Outline
    /// mode bounds per-class output for triage on long classes
    /// (deeplink routers, generated facade classes).
    #[serde(default)]
    pub mode: Option<String>,
    /// If set, keep only methods whose name matches an entry in this
    /// list. Match is exact on the method name (last identifier
    /// before `(`); overloads with the same name all fire. Class
    /// header + field decls + matching methods survive; non-matching
    /// methods are stripped.
    #[serde(default)]
    pub methods: Option<Vec<String>>,
    /// When `true`, return only matching class descriptors + estimated
    /// sizes — do NOT invoke the decompiler.  Response shape:
    /// `{classes: [{layer, class_index, class_descriptor,
    /// estimated_method_count, estimated_output_tokens}], _meta}`.
    /// Use this to preview how many classes a regex matches and how
    /// large the output would be before committing to a full decompile.
    /// Returns an empty list (not an error) when zero classes match.
    #[serde(default)]
    pub dry_run: Option<bool>,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct DiffParams {
    /// Path to the new file (APK or HBC). The currently-loaded file is
    /// treated as the baseline.
    pub path: String,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct CorpusIngestParams {
    /// Directory (or single APK path) to ingest; directories are walked
    /// recursively for `.apk` files.
    pub dir: String,
    /// Output SQLite database path.
    pub output: String,
    /// Optional corpus label applied to every ingested row.
    #[serde(default)]
    pub tag: Option<String>,
    /// Skip APKs already present in the database (default true).
    #[serde(default = "default_true")]
    pub skip_existing: bool,
}

fn default_true() -> bool {
    true
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct YaraParams {
    /// Inline YARA rule source to compile and run. Takes priority over `rules`.
    /// Use this to write rules on the fly without needing a file on disk.
    #[serde(default)]
    pub rules_src: Option<String>,
    /// Path to a `.yar`/`.yara` file or a directory of rule files.
    /// Omit (along with `rules_src`) to use the bundled credential ruleset
    /// (AWS, Google API key, Firebase, Stripe, Slack, GitHub PAT, SendGrid,
    /// Twilio, Mailgun, JWT).
    #[serde(default)]
    pub rules: Option<String>,
    /// Scan scope: manifest, dex, resources, native, assets, or all (default all).
    #[serde(default = "default_yara_target")]
    pub target: String,
    /// Cap on returned matches.
    #[serde(default)]
    pub limit: Option<usize>,
}

fn default_yara_target() -> String {
    "all".into()
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct SemgrepParams {
    /// Output directory for the extracted source tree. Defaults to
    /// `./droidsaw-semgrep-<basename>` if omitted.
    #[serde(default)]
    pub output: Option<String>,
}
#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct TrufflehogParams {
    /// Minimum string length to include (default 8).
    #[serde(default = "default_trufflehog_min_length")]
    pub min_length: usize,
    /// Optional regex filter applied line-by-line after collection.
    #[serde(default)]
    pub search: Option<String>,
    /// Output file path. If set, strings are written as newline-delimited
    /// text to this path and the tool returns the path + line count.
    /// Pipe the file to `trufflehog filesystem <path>` for full scanning.
    /// If omitted, a temp file is written automatically.
    #[serde(default)]
    pub output: Option<String>,
}

fn default_trufflehog_min_length() -> usize {
    8
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct ScanCorpusParams {
    /// One or more APK paths (directories are recursed for .apk files).
    pub paths: Vec<String>,
    /// Minimum severity threshold: critical, high, medium, low, info.
    #[serde(default = "default_min_severity")]
    pub min_severity: String,
}

fn default_min_severity() -> String {
    "info".into()
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct EntriesParams {
    /// Regex filter applied to entry names.
    #[serde(default)]
    pub search: Option<String>,
    /// Cap on returned entries.
    #[serde(default)]
    pub limit: Option<usize>,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct ElfParams {
    /// Regex filter applied to native library path.
    #[serde(default)]
    pub search: Option<String>,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct ResourcesParams {
    /// Regex filter over resource key or string value.
    #[serde(default)]
    pub search: Option<String>,
    /// Cap on returned rows.
    #[serde(default)]
    pub limit: Option<usize>,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct WebviewAssetsParams {
    /// Regex filter applied to asset path.
    #[serde(default)]
    pub search: Option<String>,
    /// Extract a single asset by exact path; returns content_utf8.
    #[serde(default)]
    pub extract: Option<String>,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct DbQueryParams {
    /// Path to the SQLite database. Optional — when omitted, defaults
    /// to the DB produced by this session's most recent `audit` call.
    /// Pass an explicit path to query a DB from a different session.
    #[serde(default)]
    pub db_path: Option<String>,
    /// SQL to execute. Either a `SELECT` statement, or one of the
    /// curated read-only PRAGMAs:
    /// `PRAGMA table_info(<table>)`, `PRAGMA table_xinfo(<table>)`,
    /// `PRAGMA index_list(<table>)`, `PRAGMA foreign_key_list(<table>)`.
    /// Side-effecting PRAGMAs and
    /// the assignment form (`PRAGMA <name> = <value>`) are rejected.
    pub sql: String,
    /// Cap on returned rows (default 200).
    #[serde(default = "default_db_query_limit")]
    pub limit: usize,
}

fn default_db_query_limit() -> usize {
    200
}

/// Curated set of read-only PRAGMAs the `query` tool accepts in addition
/// to `SELECT`. Each PRAGMA is documented as read-only schema introspection
/// per https://sqlite.org/pragma.html: `table_info` returns a column list,
/// `index_list` returns indices on a table, `foreign_key_list` returns FK
/// constraints. None of these can mutate database state.
///
/// PRAGMAs not on this list (notably side-effecting ones like
/// `journal_mode`, `writable_schema`, `defer_foreign_keys`,
/// `secure_delete`, `foreign_keys`) are rejected unconditionally, and
/// the assignment form (`PRAGMA <name> = <value>`) is rejected even for
/// names that appear on the allowlist.
const PRAGMA_ALLOWLIST: &[&str] = &["table_info", "table_xinfo", "index_list", "foreign_key_list"];

/// Default per-MCP-call parse budget in bytes (100 MiB).
///
/// MCP `load` is the trust boundary: every server-side parse charges
/// `data.len()` against this budget before starting, so adversarial inputs
/// cannot grow the parser's RSS without bound. Materially below the
/// 4 GiB CLI default (`ParseBudget::default_production`) but large enough
/// for any realistic APK / HBC / DEX that an MCP caller would send.
///
/// The parallel-sibling `parse` / `parse_budgeted` API let MCP `load`
/// silently bypass the budget; the single canonical
/// `parse(..., Option<&mut ParseBudget>)` plus this constant make the
/// MCP trust-boundary choice explicit.
const MCP_LOAD_DEFAULT_BUDGET_BYTES: usize = 100 * 1024 * 1024;

/// Validate a SQL string for the `query` MCP tool. Accepts:
/// - any `SELECT ...` statement (current behavior, case-insensitive prefix)
/// - `PRAGMA <name>(<arg>)` where `<name>` is in [`PRAGMA_ALLOWLIST`] and
///   `<arg>` is non-empty
///
/// Rejects everything else: writes (`INSERT`/`UPDATE`/`DELETE`/`CREATE`/
/// `DROP`/`ATTACH`/etc.), the PRAGMA assignment form
/// (`PRAGMA <name> = <value>`) regardless of whether `<name>` is allowlisted,
/// and any PRAGMA whose name is not on the allowlist.
///
/// Returns `Ok(())` if the SQL is permitted, otherwise a static-string
/// rejection reason suitable for `McpError::invalid_params`.
pub fn is_allowed_query_sql(sql: &str) -> Result<(), &'static str> {
    let trimmed = sql.trim_start();
    let lower = trimmed.to_ascii_lowercase();

    if lower.starts_with("select") {
        return Ok(());
    }

    if let Some(rest) = lower.strip_prefix("pragma") {
        // Require whitespace separator after `pragma` to avoid matching
        // identifiers like `pragmatic_view`.
        let after_kw = match rest.chars().next() {
            Some(c) if c.is_ascii_whitespace() => rest.trim_start(),
            _ => return Err("only SELECT statements and curated read-only PRAGMAs are permitted"),
        };

        // Reject the assignment form unconditionally. SQLite accepts
        // `PRAGMA name = value` and `PRAGMA name(value)` syntax; the
        // assignment form is the side-effecting one for many PRAGMAs.
        // Closing it off entirely keeps the rejection surface tight.
        if after_kw.contains('=') {
            return Err("PRAGMA assignment form (PRAGMA <name> = <value>) is not permitted");
        }

        // Parse `<name>(<arg>)`. Find the opening paren; everything
        // before is the PRAGMA name (after trim). Require a closing
        // paren and a non-empty arg between.
        let lparen = after_kw
            .find('(')
            .ok_or("PRAGMA must use parenthesized form: PRAGMA <name>(<arg>)")?;
        // `str::find` always returns a char-boundary index, so slicing
        // `..lparen` cannot split a multibyte codepoint.
        #[allow(
            clippy::string_slice,
            reason = "lparen is a char-boundary index returned by str::find"
        )]
        let name = after_kw[..lparen].trim();
        if !PRAGMA_ALLOWLIST.contains(&name) {
            return Err(
                "PRAGMA name is not on the allowlist (allowed: table_info, table_xinfo, index_list, foreign_key_list)",
            );
        }

        // Use saturating_add to keep clippy::arithmetic_side_effects
        // happy on the index math; in practice `lparen` is bounded by
        // `after_kw.len()` so the arithmetic never overflows on real
        // input, but the lint forces us to be explicit about it.
        let after_lparen_start = lparen.saturating_add(1);
        let after_lparen = after_kw
            .get(after_lparen_start..)
            .ok_or("PRAGMA must use parenthesized form: PRAGMA <name>(<arg>)")?;
        let rparen = after_lparen
            .rfind(')')
            .ok_or("PRAGMA must use parenthesized form: PRAGMA <name>(<arg>)")?;
        let arg = after_lparen
            .get(..rparen)
            .ok_or("PRAGMA must use parenthesized form: PRAGMA <name>(<arg>)")?
            .trim();
        if arg.is_empty() {
            return Err("PRAGMA argument must be non-empty");
        }

        // Anything after the closing paren must be whitespace or empty
        // (no trailing statement chaining like `PRAGMA table_info(x); DROP ...`).
        let after_rparen_start = rparen.saturating_add(1);
        let trailing = after_lparen.get(after_rparen_start..).unwrap_or("");
        if !trailing.trim().is_empty() {
            return Err("trailing content after PRAGMA <name>(<arg>) is not permitted");
        }

        return Ok(());
    }

    Err("only SELECT statements and curated read-only PRAGMAs are permitted")
}

/// Role of a caller-supplied path at the MCP parameter boundary.
///
/// Each role expresses a contract on the canonicalized result: existence,
/// kind (file vs directory), and (for write targets) parent-existence.
/// The discriminator is also used to select the forbidden-prefix policy:
/// `WriteOutput` applies an extended denylist that covers configuration,
/// library, and system-binary trees in addition to the base read-side
/// restrictions.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PathRole {
    /// Path must point to an existing regular file readable as a parser
    /// input (APK / AAB / DEX / HBC / ZIP).
    LoadInput,
    /// Path must point to an existing regular file (a SQLite database
    /// produced by a prior `audit` call).
    Database,
    /// Path must point to an existing directory (corpus ingest etc.).
    LoadDirectory,
    /// Path must point to an existing regular file OR directory. Used
    /// by `apk_yara`'s `rules` parameter — the YARA command accepts
    /// either a single `.yar` file OR a directory of `.yar` files.
    /// The MCP boundary check doesn't know the operator's intent yet,
    /// so it accepts either kind here and the downstream `commands::yara`
    /// dispatcher branches on `is_dir` / `is_file`.
    LoadInputOrDirectory,
    /// Path is a write target (SQLite, directory tree, or text output —
    /// unified policy today; per-artifact variant split deferred until
    /// policy diverges). The file itself need not exist, but its parent
    /// directory must canonicalize (i.e., already exist and be reachable).
    /// Used for `output` / `--out` parameters that materialize a fresh
    /// artifact alongside existing input.
    ///
    /// WHY: single write policy — no per-artifact denylist divergence today;
    /// extend to `WriteSqlite` / `WriteDirectory` / `WriteText` variants
    /// when policy splits.
    WriteOutput,
}

// Forbidden prefixes that apply to ALL roles (read and write).
// Closing the A4 gap: `/etc/` was previously absent from the read-side
// denylist. It is now universal so a LoadInput caller cannot read
// `/etc/passwd` or `/etc/shadow` under default config.
//
// Mac note: `/etc`, `/var`, and `/tmp` are symlinks to `/private/etc`,
// `/private/var`, and `/private/tmp` respectively on macOS. The
// `is_allowed_path` helper canonicalizes incoming paths before the
// prefix check, so a caller-supplied `/etc/passwd` resolves to
// `/private/etc/passwd` and the bare `/etc/` prefix would miss it.
// We include the `/private/...` aliases so the denylist catches the
// canonicalized form on macOS. On Linux these paths don't exist and
// add no surface.
const READ_FORBIDDEN: &[&str] = &[
    "/proc/",
    "/sys/",
    "/dev/",
    "/run/secrets/",
    "/etc/",
    "/private/etc/",
];

// Additional forbidden prefixes that apply only to WriteOutput paths.
// Configuration, library, and system-binary trees that are writable on
// many Linux installs but must never be reachable via the MCP boundary.
// `/private/var/` mirrors the macOS-symlink case (see READ_FORBIDDEN note).
const WRITE_EXTRA_FORBIDDEN: &[&str] = &[
    "/var/lib/",
    "/private/var/lib/",
    "/usr/bin/",
    "/usr/sbin/",
    "/bin/",
    "/sbin/",
];

/// Validate a caller-supplied filesystem path at the MCP parameter
/// boundary and return its canonicalized form.
///
/// Handles both read and write roles via the `PathRole` discriminator.
/// Write paths (`PathRole::WriteOutput`) receive an extended forbidden-
/// prefix check that adds system-binary and library trees on top of the
/// base read-side restrictions.
///
/// **Rejection reasons** (returned as `McpError::invalid_params`):
/// - Empty string.
/// - Embedded NUL byte (string-layer).
/// - Path that fails to canonicalize (`std::fs::canonicalize` Err — typically
///   missing entry; symlink loops and access-denied also land here).
///   For `WriteOutput`, the parent directory is canonicalized instead
///   (the file itself need not exist yet).
/// - Canonicalized path resolves into a restricted system tree.
///   Read roles: `/proc/`, `/sys/`, `/dev/`, `/run/secrets/`, `/etc/`.
///   Write role: all of the above plus `/var/lib/`, `/usr/bin/`,
///   `/usr/sbin/`, `/bin/`, `/sbin/`.
///   The post-canonicalize check catches symlinks into these trees.
/// - Wrong kind for the role (LoadInput / Database expect a regular file;
///   LoadDirectory expects a directory).
/// - When `DROIDSAW_MCP_ROOT` env var is set + non-empty, paths resolving
///   outside the canonicalized root are rejected for ALL roles (read and
///   write). One env var sandboxes both directions — operators set it once.
///   Unset = no base-prefix constraint (local-dev back-compat per brief).
///
/// **Returns** the canonicalized [`PathBuf`] on success — callers should
/// use this rather than re-constructing from `&params.<field>`, since
/// it has symlinks resolved + `..` segments normalized.
///
pub fn is_allowed_path(p: &str, role: PathRole) -> Result<std::path::PathBuf, McpError> {
    if p.is_empty() {
        return Err(McpError::invalid_params("path is empty", None));
    }
    if p.contains('\0') {
        return Err(McpError::invalid_params(
            "path contains NUL byte",
            None,
        ));
    }

    let path = std::path::PathBuf::from(p);
    let canon = match role {
        PathRole::WriteOutput => {
            // Write targets may not exist yet; canonicalize the parent
            // and re-attach the file-name component.
            let parent = path.parent().ok_or_else(|| {
                McpError::invalid_params(
                    "write path has no parent directory component",
                    None,
                )
            })?;
            let name = path.file_name().ok_or_else(|| {
                McpError::invalid_params(
                    "write path has no file-name component",
                    None,
                )
            })?;
            // Empty parent ("foo" as a relative bare filename) canonicalizes
            // to "." — we honor that by treating CWD as the parent.
            let parent_canon = if parent.as_os_str().is_empty() {
                std::fs::canonicalize(".").map_err(|e| {
                    McpError::invalid_params(
                        format!("write path parent (cwd) canonicalization failed: {e}"),
                        None,
                    )
                })?
            } else {
                std::fs::canonicalize(parent).map_err(|e| {
                    McpError::invalid_params(
                        format!("write path parent canonicalization failed: {e}"),
                        None,
                    )
                })?
            };
            parent_canon.join(name)
        }
        _ => std::fs::canonicalize(&path).map_err(|e| {
            McpError::invalid_params(
                format!("path canonicalization failed (missing entry or access denied): {e}"),
                None,
            )
        })?,
    };

    // Post-canonicalize prefix reject — catches direct references AND
    // symlinks pointing into restricted system trees.
    // WriteOutput gets the extended set (read-forbidden + write-extra).
    let canon_str = canon.to_str().ok_or_else(|| {
        McpError::invalid_params(
            "canonicalized path contains non-UTF-8 bytes",
            None,
        )
    })?;
    // Operator opt-in escape hatch: an explicit allow-prefix exempts the
    // path from the role-specific denylist. Configured via
    // `DROIDSAW_MCP_ALLOWED_READ_PATH` (read roles) and
    // `DROIDSAW_MCP_ALLOWED_WRITE_PATH` (WriteOutput); the CLI flags
    // `--allowed-read-path` and `--allowed-write-path` set these env vars
    // at startup. Multiple paths colon-separated. Canonicalized once per
    // call (acceptable — short config-time list, not request-hot).
    //
    // Security note: DROIDSAW_MCP_ROOT below STILL applies even when an
    // explicit allow matches — operators can punch denylist holes but
    // cannot escape the sandbox root.
    let explicit_allow_env = if matches!(role, PathRole::WriteOutput) {
        "DROIDSAW_MCP_ALLOWED_WRITE_PATH"
    } else {
        "DROIDSAW_MCP_ALLOWED_READ_PATH"
    };
    let explicitly_allowed = std::env::var(explicit_allow_env)
        .ok()
        .filter(|s| !s.is_empty())
        .is_some_and(|allow| {
            allow.split(':').any(|p| {
                !p.is_empty()
                    && std::fs::canonicalize(p)
                        .ok()
                        .and_then(|c| c.to_str().map(|s| canon_str.starts_with(s)))
                        .unwrap_or(false)
            })
        });

    // Read roles: READ_FORBIDDEN only. WriteOutput: READ_FORBIDDEN + WRITE_EXTRA_FORBIDDEN.
    // Skipped entirely when the operator explicitly allowed this prefix.
    if !explicitly_allowed {
        let base = READ_FORBIDDEN.iter();
        let extra: &[&str] = if matches!(role, PathRole::WriteOutput) {
            WRITE_EXTRA_FORBIDDEN
        } else {
            &[]
        };
        for prefix in base.chain(extra.iter()) {
            if canon_str.starts_with(prefix) {
                return Err(McpError::invalid_params(
                    format!(
                        "path resolves into restricted system tree {} (canonicalized form withheld)",
                        prefix
                    ),
                    None,
                ));
            }
        }
    }

    // Role-specific kind check.
    match role {
        PathRole::LoadInput | PathRole::Database => {
            if !canon.is_file() {
                return Err(McpError::invalid_params(
                    "path does not refer to a regular file",
                    None,
                ));
            }
        }
        PathRole::LoadDirectory => {
            if !canon.is_dir() {
                return Err(McpError::invalid_params(
                    "path does not refer to a directory",
                    None,
                ));
            }
        }
        PathRole::LoadInputOrDirectory => {
            if !canon.is_file() && !canon.is_dir() {
                return Err(McpError::invalid_params(
                    "path does not refer to a regular file or directory",
                    None,
                ));
            }
        }
        PathRole::WriteOutput => {
            // Parent existence was checked above (canonicalize succeeded);
            // we deliberately do NOT enforce the file-name component yet,
            // since the caller may be creating it for the first time.
        }
    }

    // Optional base-prefix enforcement via `DROIDSAW_MCP_ROOT` env.
    // Covers BOTH read and write roles — one env var sandboxes both
    // directions. Operators who set it get full MCP coverage.
    // Unset or empty = no constraint (preserves local-dev ergonomics;
    // see brief Behavior contract clause).
    if let Ok(root) = std::env::var("DROIDSAW_MCP_ROOT")
        && !root.is_empty()
    {
        let root_canon = std::fs::canonicalize(&root).map_err(|e| {
            McpError::invalid_params(
                format!("DROIDSAW_MCP_ROOT canonicalization failed: {e}"),
                None,
            )
        })?;
        if !canon.starts_with(&root_canon) {
            return Err(McpError::invalid_params(
                "path is outside the configured DROIDSAW_MCP_ROOT",
                None,
            ));
        }
    }

    Ok(canon)
}

/// Back-compat alias: [`is_allowed_path`] with a read-only role.
///
/// Call sites that only ever pass read roles may continue to use this
/// name; new call sites should prefer `is_allowed_path` directly.
#[inline]
pub fn is_allowed_load_path(p: &str, role: PathRole) -> Result<std::path::PathBuf, McpError> {
    is_allowed_path(p, role)
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct TriageParams {
    /// Path to the audit SQLite database. Optional — when omitted,
    /// defaults to the DB produced by this session's most recent
    /// `audit` call.
    #[serde(default)]
    pub db_path: Option<String>,
    /// Finding rowid to triage.
    pub rowid: i64,
    /// Action: "confirm" or "dismiss".
    pub action: String,
    /// Reason (required for dismiss, optional for confirm).
    #[serde(default)]
    pub reason: Option<String>,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct FindingContextParams {
    /// Path to the audit SQLite database returned by `audit`. Optional
    /// — when omitted, defaults to the DB produced by this session's
    /// most recent `audit` call.
    #[serde(default)]
    pub db_path: Option<String>,
    /// Finding rowid to look up (from `db_queries.all_high`).
    #[serde(default)]
    pub rowid: Option<i64>,
    /// FTS5 search term to find a finding by detail text (alternative to rowid).
    #[serde(default)]
    pub search: Option<String>,
    /// Also decompile the top callers (expensive for large HBC bundles).
    #[serde(default)]
    pub decompile: bool,
}

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct TaintFlowsParams {
    /// Path to the audit SQLite database returned by `audit`. Optional
    /// — when omitted, defaults to the DB produced by this session's
    /// most recent `audit` call.
    #[serde(default)]
    pub db_path: Option<String>,
    /// Filter by source type (e.g. IntentExtra, NetworkResponse, SharedPreferencesRead).
    #[serde(default)]
    pub source_type: Option<String>,
    /// Filter by sink type (e.g. SqlExecute, RuntimeExec, WebViewLoad, LogCall).
    #[serde(default)]
    pub sink_type: Option<String>,
}

// ── SQL helpers ────────────────────────────────────────────────────

/// Build the static `WHERE` template + bind-values slice for the `taint`
/// MCP tool's parameterised SQL.
///
/// SECURITY: the returned template is one of four fixed shapes (no user
/// input flows into the SQL string itself); the `source` and `sink`
/// values are returned separately to be bound via `rusqlite::params!` /
/// `params_from_iter`. This is the gauge that prevents SQL injection
/// through `params.source_type` / `params.sink_type` — replacing the
/// previous `format!("source_type = '{s}'")` interpolation.
///
/// Returned shapes:
/// - both present: `"WHERE source_type = ?1 AND sink_type = ?2"` + `[source, sink]`
/// - source only:  `"WHERE source_type = ?1"`                     + `[source]`
/// - sink only:    `"WHERE sink_type = ?1"`                       + `[sink]`
/// - neither:      `""`                                            + `[]`
///
/// Tested by `tests/mcp_taint_sql_injection.rs`.
pub fn build_taint_where_clause<'a>(
    source: Option<&'a str>,
    sink: Option<&'a str>,
) -> (&'static str, Vec<&'a str>) {
    match (source, sink) {
        (Some(s), Some(t)) => (
            "WHERE source_type = ?1 AND sink_type = ?2",
            vec![s, t],
        ),
        (Some(s), None) => ("WHERE source_type = ?1", vec![s]),
        (None, Some(t)) => ("WHERE sink_type = ?1", vec![t]),
        (None, None) => ("", vec![]),
    }
}

// ── free function helpers ───────────────────────────────────────────

/// Blocking-pool-safe version of `DroidsawServer::run_core_audit`.
///
/// Takes Arc-cloned state so it can be moved into `tokio::task::spawn_blocking`
/// without borrowing `&self` (which is not `'static`). Mirrors the logic of
/// `run_core_audit` exactly but uses the Arc directly rather than through
/// `self.with_ctx`.
///
/// Called from the `audit` async handler's `spawn_blocking` closure.
///
/// `abort` is a cooperative-cancellation flag fired by the async layer
/// when the client disconnects (`CancellationToken::cancelled()` → flag
/// set via a watcher task). This function polls it at each phase
/// boundary and threads it into the trufflehog subprocess runner so an
/// in-flight child gets SIGTERM'd within ~250 ms of cancellation.
///
/// ## Shared response shape
///
/// The returned map is the serialized form of
/// [`crate::commands::audit_envelope::AuditEnvelope`].  Both this MCP adapter
/// and [`crate::commands::audit_light_with_mode`] (CLI adapter) MUST produce
/// the same envelope type — the intentional difference is in which optional
/// fields are populated (MCP: `db_path`, `db_queries`, `top_findings`,
/// `timings_ms`; CLI: `findings` inline, `detectors`), not in the struct
/// shape.  Any change to the response layout must go through `AuditEnvelope`
/// so both adapters stay in sync.
fn run_core_audit_blocking(
    state: Arc<Mutex<Option<CrossLayerContext>>>,
    _concurrency: Arc<ConcurrencyConfig>,
    mode: droidsaw_cli_contract::AuditMode,
    entropy: f32,
    update_db: bool,
    abort: crate::mcp::subprocess::AbortFlag,
) -> Result<serde_json::Map<String, serde_json::Value>, McpError> {
    // Phase-boundary cancellation helper. Each call site below uses this
    // before starting a phase; on fire, return a typed Cancelled error
    // so the async layer surfaces it identically to the existing
    // pre-spawn check.
    let check_abort = |phase: &'static str| -> Result<(), McpError> {
        if abort.load(std::sync::atomic::Ordering::Relaxed) {
            return Err(McpError::new(
                rmcp::model::ErrorCode(-32000),
                format!("audit cancelled before {phase}: client disconnected"),
                Some(serde_json::json!({"type": "Cancelled", "phase": phase})),
            ));
        }
        Ok(())
    };
    check_abort("audit-prelude")?;
    let t_core_start = std::time::Instant::now();
    let ts = std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .unwrap_or_default()
        .as_secs();

    // DB path selection.
    let db_path = if update_db {
        let hash = {
            let guard = state.lock().unwrap_or_else(|e| e.into_inner());
            guard.as_ref().map(|c| {
                CrossLayerContext::hash_path(std::path::Path::new(&c.path))
            })
        };
        match hash {
            Some(h) => std::env::temp_dir().join(format!("droidsaw-audit-{h}.db")),
            None => std::env::temp_dir().join(format!("droidsaw-audit-{ts}.db")),
        }
    } else {
        std::env::temp_dir().join(format!("droidsaw-audit-{ts}.db"))
    };

    let run_id = format!("{}-{ts}", mode.as_cli_str());

    let t_findings = std::time::Instant::now();
    let (severity_summary, top_findings, total, findings_persisted, taint_count, finding_xrefs_written, severity_by_gauge, apk_summary) = {
        let guard = state.lock().unwrap_or_else(|e| e.into_inner());
        let ctx = guard
            .as_ref()
            .ok_or_else(|| McpError::invalid_params("no file loaded — call load first", None))?;
        let hash = CrossLayerContext::hash_path(std::path::Path::new(&ctx.path));
        let findings_result = droidsaw_common::diag::with_input_hash(&hash, || -> anyhow::Result<_> {
            let findings = crate::commands::collect_findings(ctx, entropy)?;
            let total = findings.len();
            // Canonical taint-flow accounting — same helper the CLI
            // audit path uses, so the envelope's `taint_flow_count` is
            // identical regardless of transport.
            let taint_count =
                crate::commands::audit_envelope::AuditEnvelope::count_taint_flow_findings(
                    &findings,
                );

            // Compute the post-dedup count in memory — the
            // `findings_signature_hash_uniq` UNIQUE index collapses
            // findings sharing `(layer, id, source, detail)`, so the
            // raw `total` over-reports what actually lands in the
            // `findings` table. Use a BTreeSet on signature_hash to
            // get the count an LLM consumer would observe via
            // `SELECT COUNT(*) FROM findings WHERE run_id = ...`.
            let findings_persisted: usize = findings
                .iter()
                .map(crate::commands::finding_signature_hash)
                .collect::<std::collections::BTreeSet<_>>()
                .len();

            crate::commands::write_findings_db_with_run(
                &findings,
                &db_path,
                Some(&run_id),
                Some(mode.as_cli_str()),
                update_db,
            )?;
            crate::commands::write_taint_flows_db(&findings, &db_path)?;
            crate::commands::write_cross_layer_taint_flows_db(&findings, &db_path)?;

            let mut counts = std::collections::BTreeMap::<String, usize>::new();
            for f in &findings {
                let c = counts.entry(format!("{:?}", f.severity)).or_insert(0);
                // WHY: display severity tally; saturating_add is exact within usize findings count.
                *c = c.saturating_add(1);
            }

            // Gauge-stratified severity histogram + Semantic-first capped
            // projection, both from the same in-memory `findings` slice that
            // fed `counts` (the flat severity_summary), through the canonical
            // helpers shared with the CLI path so the shape cannot drift
            // between transports.
            let severity_by_gauge =
                crate::commands::audit_envelope::AuditEnvelope::stratify_by_gauge(&findings);
            let top = crate::commands::audit_envelope::AuditEnvelope::rank_top_findings(
                &findings,
                crate::commands::audit_envelope::TOP_FINDINGS_CAP,
            );

            let finding_xrefs_written =
                crate::commands::write_finding_xrefs_db(ctx, &findings, &db_path).unwrap_or(0);

            Ok((counts, top, total, findings_persisted, taint_count, finding_xrefs_written, severity_by_gauge))
        })
        .map_err(|e| sanitize_to_mcp_error("audit findings", &e, ErrorCategory::InternalError))?;

        // Derive per-APK shape stats while still holding the ctx lock.
        // Same logic as the CLI path in audit_light_with_mode.
        let apk_summary: Option<crate::commands::audit_envelope::ApkSummary> = {
            let has_hbc = ctx.hbc.is_some();
            let hbc_bytes: u64 = ctx
                .hbc
                .as_ref()
                .map(|h| h.bytes().len())
                .unwrap_or(0)
                .try_into()
                .unwrap_or(u64::MAX);
            let hbc_function_count: u32 = ctx
                .hbc
                .as_ref()
                .map(|h| h.hbc().function_count)
                .unwrap_or(0);
            let dex_methods_total: u64 = ctx.dex.iter().fold(0u64, |acc, df| {
                let per_dex: u64 = df.class_datas.values().fold(0u64, |a, cd| {
                    a.saturating_add(cd.direct_methods.len() as u64)
                        .saturating_add(cd.virtual_methods.len() as u64)
                });
                acc.saturating_add(per_dex)
            });
            let dex_classes_total: u64 = ctx
                .dex
                .iter()
                .fold(0u64, |acc, df| acc.saturating_add(df.class_defs.len() as u64));

            if let Some(apk) = ctx.apk.as_ref() {
                let dex_count = apk.dex.len().try_into().unwrap_or(u32::MAX);
                let dex_total_bytes: u64 = apk
                    .dex
                    .iter()
                    .fold(0u64, |acc, entry| acc.saturating_add(entry.data.len() as u64));
                Some(crate::commands::audit_envelope::ApkSummary {
                    has_hbc,
                    hbc_bytes,
                    hbc_function_count,
                    dex_count,
                    dex_total_bytes,
                    dex_methods_total,
                    dex_classes_total,
                })
            } else if !ctx.dex.is_empty() {
                let dex_count = ctx.dex.len().try_into().unwrap_or(u32::MAX);
                let dex_total_bytes: u64 = ctx
                    .dex_direct_bytes
                    .as_ref()
                    .map(|b| b.len() as u64)
                    .unwrap_or(0);
                Some(crate::commands::audit_envelope::ApkSummary {
                    has_hbc,
                    hbc_bytes,
                    hbc_function_count,
                    dex_count,
                    dex_total_bytes,
                    dex_methods_total,
                    dex_classes_total,
                })
            } else if has_hbc {
                Some(crate::commands::audit_envelope::ApkSummary {
                    has_hbc,
                    hbc_bytes,
                    hbc_function_count,
                    dex_count: 0,
                    dex_total_bytes: 0,
                    dex_methods_total: 0,
                    dex_classes_total: 0,
                })
            } else {
                None
            }
        };

        let (severity_summary, top_findings, total, findings_persisted, taint_count, finding_xrefs_written, severity_by_gauge) = findings_result;
        (severity_summary, top_findings, total, findings_persisted, taint_count, finding_xrefs_written, severity_by_gauge, apk_summary)
    };
    #[allow(
        clippy::as_conversions,
        clippy::cast_possible_truncation,
        reason = "PROOF: Instant::elapsed().as_millis() returns u128 measuring wall-clock since `t_findings`. The (u128 -> u64) truncation only matters once the elapsed exceeds u64::MAX ms (~584 million years). The value surfaces as `collect_findings` in timings_ms — an operator metric."
    )]
    let collect_findings_ms = t_findings.elapsed().as_millis() as u64;

    // Build the shared db_queries value. Kept as a local so we can
    // conditionally extend it with finding_xrefs queries below.
    let mut db_queries_map = serde_json::json!({
        "all_high": "SELECT severity,id_tag,detail FROM findings WHERE severity IN ('Critical','High') ORDER BY severity",
        "semantic_only": "SELECT severity,id_tag,detail FROM findings WHERE gauge_class='Semantic' AND severity IN ('Critical','High') ORDER BY severity",
        "fts_search": "SELECT detail FROM findings_fts WHERE findings_fts MATCH 'secret OR token OR key' ORDER BY rank",
        "by_severity": "SELECT severity, COUNT(*) FROM findings GROUP BY severity ORDER BY COUNT(*) DESC",
        "by_gauge": "SELECT gauge_class, COUNT(*) FROM findings GROUP BY gauge_class",
        "taint_flows": "SELECT source_type,sink_type,func_id,severity,cwe FROM taint_flows ORDER BY CASE severity WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 ELSE 2 END",
        "taint_fts": "SELECT source_type,sink_type FROM taint_flows_fts WHERE taint_flows_fts MATCH 'SqlExecute OR RuntimeExec OR WebView' ORDER BY rank",
    });
    if let Some(q) = db_queries_map.as_object_mut() {
        q.insert(
            "finding_xrefs".into(),
            serde_json::json!("SELECT f.severity, f.id_tag, f.detail, fx.string_value, fx.function_name FROM findings f JOIN finding_xrefs fx ON f.rowid = fx.finding_rowid ORDER BY f.severity, fx.string_value"),
        );
        q.insert(
            "finding_xrefs_fts".into(),
            serde_json::json!("SELECT string_value, function_name FROM finding_xrefs_fts WHERE finding_xrefs_fts MATCH 'token OR oauth OR secret' ORDER BY rank"),
        );
        q.insert(
            "semgrep_high".into(),
            serde_json::json!("SELECT check_id, class_name, severity, message, cwe FROM semgrep_results WHERE severity='ERROR' ORDER BY check_id"),
        );
        q.insert(
            "semgrep_by_class".into(),
            serde_json::json!("SELECT class_name, COUNT(*) as n FROM semgrep_results GROUP BY class_name ORDER BY n DESC LIMIT 20"),
        );
        q.insert(
            "semgrep_taint_join".into(),
            serde_json::json!("SELECT sr.check_id, sr.class_name, sr.message, tf.source_type, tf.sink_type, tf.severity FROM semgrep_results sr JOIN taint_flows tf ON sr.class_name LIKE '%' || REPLACE(REPLACE(SUBSTR(tf.source_type, 1, INSTR(tf.source_type, '→')-1), 'L', ''), '/', '.') || '%' LIMIT 20"),
        );
        q.insert(
            "credentials_verified".into(),
            serde_json::json!("SELECT detector,raw,extra FROM credentials WHERE verified=1"),
        );
        q.insert(
            "credentials_all".into(),
            serde_json::json!("SELECT detector,raw,verified FROM credentials ORDER BY verified DESC LIMIT 20"),
        );
    }

    check_abort("trufflehog-phase")?;

    // Trufflehog gate: skip extraction + subprocess unless the mode
    // says it should run. The extract + invoke + persist chain lives
    // in `crate::trufflehog::run::run_and_persist`; the helper is the
    // shared implementation called by both this MCP handler and the
    // CLI `audit_full_with_mode` / `audit_light_with_mode` paths so
    // the two cannot drift on argv, envelope shape, or
    // `write_credentials_db` semantics.
    let (trufflehog_extract_ms, trufflehog_subprocess_ms, trufflehog_result) =
        if mode.runs_trufflehog() {
            let t_th_start = std::time::Instant::now();
            // NamedTempFile owns the strings-dump path; its `Drop` at the
            // end of this scope unlinks the file. Closes the /tmp leak
            // surface by ensuring temp files are cleaned up automatically.
            // The `_ts` parameter is no longer used for filename uniqueness —
            // NamedTempFile's random suffix provides the concurrent-audit
            // isolation we previously got from the timestamp.
            let strings_file_handle = tempfile::Builder::new()
                .prefix("droidsaw-strings-")
                .suffix(".txt")
                .tempfile()
                .map_err(|e| {
                    McpError::internal_error(
                        format!("failed to create strings tempfile: {e}"),
                        None,
                    )
                })?;
            let strings_file = strings_file_handle.path().to_path_buf();

            let result = {
                let guard = state.lock().unwrap_or_else(|e| e.into_inner());
                let ctx = guard
                    .as_ref()
                    .ok_or_else(|| McpError::invalid_params("no file loaded — call load first", None))?;
                let hash = CrossLayerContext::hash_path(std::path::Path::new(&ctx.path));
                droidsaw_common::diag::with_input_hash(&hash, || {
                    crate::trufflehog::run::run_and_persist(
                        ctx,
                        crate::trufflehog::run::DEFAULT_MIN_LENGTH,
                        &db_path,
                        Some(&abort),
                        Some(&strings_file),
                    )
                })
                .unwrap_or_else(|e| serde_json::json!({"ran": false, "error": e.to_string()}))
            };
            // Keep strings_file_handle alive until here — Drop unlinks.
            drop(strings_file_handle);

            // Helper does extract + invoke as one call. The prior MCP
            // envelope split `trufflehog_extract_ms` /
            // `trufflehog_subprocess_ms`; preserve the two keys for
            // back-compat with downstream parsers but report the
            // total under `_extract_ms` (the larger fraction) and 0
            // under `_subprocess_ms`. Operators read the total.
            #[allow(
                clippy::as_conversions,
                clippy::cast_possible_truncation,
                reason = "PROOF: Instant::elapsed().as_millis() returns u128 measuring wall-clock since `t_th_start`. The (u128 -> u64) truncation only matters once the elapsed exceeds u64::MAX milliseconds (~584 million years). The value is surfaced as `trufflehog_extract_ms` in the envelope's timings_ms — an operator metric, not a security invariant."
            )]
            let total_ms = t_th_start.elapsed().as_millis() as u64;
            (total_ms, 0u64, result)
        } else {
            (
                0u64,
                0u64,
                serde_json::json!({
                    "ran": false,
                    "skipped_by_mode": mode.as_cli_str(),
                }),
            )
        };

    // `severity_summary` entries are usize from the detector pipeline; cast
    // to u64 for the AuditEnvelope type (usize ≤ u64 on all supported targets).
    #[allow(
        clippy::as_conversions,
        reason = "PROOF: usize -> u64 widen of a detector severity-bucket count. usize is ≤ 64 bits on every supported droidsaw target (Linux/macOS x86_64 + aarch64), so the cast is lossless by platform invariant. Counts are bounded by total findings emitted, which is also usize-bounded."
    )]
    let severity_summary_u64: std::collections::BTreeMap<String, u64> = severity_summary
        .into_iter()
        .map(|(k, v)| (k, v as u64))
        .collect();

    let top_findings_len = top_findings.len();
    #[allow(
        clippy::as_conversions,
        clippy::cast_possible_truncation,
        reason = "PROOF: u64 -> usize narrow. The sum is over `severity_summary_u64` values, each of which was originally a usize detector count widened to u64 on the same row above. On 64-bit targets (the supported set), usize = u64, so the narrow is identity. On a hypothetical 32-bit target the sum could exceed usize::MAX only if the detector emitted >4G findings — well beyond memory limits."
    )]
    let top_findings_truncated = top_findings_len
        < severity_summary_u64
            .iter()
            .filter(|(k, _)| matches!(k.as_str(), "Critical" | "High"))
            .map(|(_, v)| *v)
            .sum::<u64>() as usize;

    // Build the canonical AuditEnvelope. Both adapters (CLI and MCP) MUST produce
    // this shape; the intentional divergence is in which optional fields are
    // populated, not the struct layout. MCP populates db_path + db_queries +
    // top_findings + timings_ms; CLI populates findings + detectors.
    //
    // The `as u64` casts in this struct literal all widen detector usize counts
    // (findings_persisted, total, taint_count, finding_xrefs_written,
    // top_findings_len) to the AuditEnvelope's u64 fields. usize <= u64 on
    // every supported 64-bit target, so the widens are lossless. The
    // `as u64` on `t_core_start.elapsed().as_millis()` truncates u128 -> u64
    // (operator timing metric; truncation horizon ~584M years).
    #[allow(
        clippy::as_conversions,
        clippy::cast_possible_truncation,
        reason = "PROOF: AuditEnvelope construction casts — see comment above. usize -> u64 widens are lossless on supported 64-bit targets; the lone u128 -> u64 (elapsed.as_millis()) is an operator timing metric with a >584M-year truncation horizon."
    )]
    let envelope = crate::commands::audit_envelope::AuditEnvelope {
        schema_version: crate::commands::audit_envelope::AUDIT_ENVELOPE_VERSION,
        findings: vec![],
        // `finding_count` is the post-dedup count (what an LLM observes via
        // `SELECT COUNT(*) FROM findings`). `findings_emitted` is the pre-dedup
        // raw count from the detector pipeline.
        finding_count: findings_persisted as u64,
        findings_emitted: total as u64,
        taint_flow_count: taint_count,
        severity_summary: severity_summary_u64,
        severity_by_gauge,
        top_findings,
        truncated: top_findings_truncated,
        db_path: Some(db_path.display().to_string()),
        db_queries: Some(db_queries_map),
        finding_xrefs_written: Some(finding_xrefs_written as u64),
        detectors: None,
        trufflehog: Some(trufflehog_result),
        semgrep: None,
        timings_ms: Some(serde_json::json!({
            "collect_findings": collect_findings_ms,
            "trufflehog_extract": trufflehog_extract_ms,
            "trufflehog_subprocess": trufflehog_subprocess_ms,
            "core_total": t_core_start.elapsed().as_millis() as u64,
        })),
        apk_summary,
        meta: crate::commands::audit_envelope::AuditMeta {
            count: top_findings_len as u64,
            truncated: top_findings_truncated,
            hint: format!(
                "{findings_persisted} findings, {taint_count} taint flows. \
                 Audit DB at {db_path}. \
                 Orient with SELECT * FROM audit_summary, then SELECT * FROM actionable_findings. \
                 Views are pre-built — see the views list. \
                 Use investigate, decompile, xrefs, and manifest to investigate. \
                 Filter on gauge_class='Semantic' to skip noise.",
                db_path = db_path.display(),
            ),
            related: vec![
                "query".to_string(),
                "investigate".to_string(),
                "decompile".to_string(),
                "xrefs".to_string(),
            ],
            thread_pool_size: rayon::current_num_threads(),
        },
    };

    // Serialize the envelope to a serde_json::Map so callers (MCP audit handler)
    // can insert additional phase-specific keys (semgrep) without re-parsing.
    let obj = serde_json::to_value(&envelope)
        .map_err(|e| sanitize_to_mcp_error("audit envelope serialize", &anyhow::anyhow!("{}", e), ErrorCategory::InternalError))?
        .as_object()
        .cloned()
        .ok_or_else(|| McpError::new(
            rmcp::model::ErrorCode::INTERNAL_ERROR,
            "audit envelope did not serialize to JSON object".to_string(),
            None,
        ))?;

    Ok(obj)
}

// ── handler impl ───────────────────────────────────────────────────

impl Default for DroidsawServer {
    fn default() -> Self {
        Self::new()
    }
}

#[tool_router(router = tool_router)]
impl DroidsawServer {
    /// Create a server with default concurrency caps:
    /// - `audit-full`: max 1 concurrent, 8/min.
    /// - `decompile-all`: max 1 concurrent, 8/min.
    /// - `taint-analyze`: max 2 concurrent, 8/min.
    /// - default class: max 2 concurrent, 8/min.
    ///
    /// CLI callers can override via `DroidsawServer::with_concurrency`.
    pub fn new() -> Self {
        Self::with_concurrency(
            ConcurrencyConfig::new(
                /* audit_full */    1,
                /* decompile_all */ 1,
                /* taint_analyze */ 2,
                /* default */       2,
                /* max_per_min */   8,
            ),
        )
    }

    /// Create a server with a caller-supplied concurrency configuration.
    /// Used by the `droidsaw-mcp` binary to wire CLI flags into the server.
    pub fn with_concurrency(concurrency: ConcurrencyConfig) -> Self {
        Self {
            state: Arc::new(Mutex::new(None)),
            current_db: Arc::new(Mutex::new(None)),
            concurrency: Arc::new(concurrency),
            tool_router: Self::tool_router(),
            allowed_tool_classes: McpToolClass::default_allowed(),
        }
    }

    /// Construct with an explicit operator policy. Used by `bin/mcp.rs`
    /// after parsing `--allowed-tool-classes`. Uses default concurrency caps;
    /// callers needing both custom concurrency AND custom allowed-classes can
    /// chain via `with_concurrency(...).with_allowed_classes_override(...)`
    /// if such a builder lands later — for v1.0 the two policies are set
    /// independently at startup, never together.
    pub fn with_allowed_classes(
        allowed: std::collections::BTreeSet<McpToolClass>,
    ) -> Self {
        let concurrency = ConcurrencyConfig::new(
            /* audit_full */    1,
            /* decompile_all */ 1,
            /* taint_analyze */ 2,
            /* default */       2,
            /* max_per_min */   8,
        );
        Self {
            state: Arc::new(Mutex::new(None)),
            current_db: Arc::new(Mutex::new(None)),
            concurrency: Arc::new(concurrency),
            tool_router: Self::tool_router(),
            allowed_tool_classes: allowed,
        }
    }

    /// Builder: narrow the visible tool surface to the configured
    /// tier. `McpToolTier::Full` is a no-op (the default surface).
    /// `McpToolTier::Basic` walks every tool name registered in the
    /// `tool_router` and disables those classified as `Full` by
    /// [`tool_tier`]. Hidden tools are absent from `tools/list` and
    /// `call` returns `tool not found` — the same shape an unknown
    /// tool would produce. The operator's tool-class policy still
    /// applies on top: a Basic-tier surface can still refuse a
    /// destructive op if the class isn't allowed.
    ///
    /// The `disable_route` is applied for every name returned by
    /// `tool_router.list_all()` so newly-added tools are tier-aware
    /// without explicit listing here.
    pub fn with_tool_tier(mut self, tier: McpToolTier) -> Self {
        if matches!(tier, McpToolTier::Full) {
            return self;
        }
        let all_names: Vec<String> = self
            .tool_router
            .list_all()
            .into_iter()
            .map(|t| t.name.to_string())
            .collect();
        for name in all_names {
            if matches!(tool_tier(&name), McpToolTier::Full) {
                self.tool_router.disable_route(name);
            }
        }
        self
    }

    /// Refuse a tool whose class is not in the operator's allowed-class set.
    /// Called as the first action of every `#[tool]`-decorated method.
    /// Returns a typed `McpError::invalid_params` carrying the tool name,
    /// the rejected class, and the allowed-class list (so the caller can
    /// surface a useful diagnostic).
    fn enforce_tool_class(&self, tool_name: &str) -> Result<(), McpError> {
        let class = tool_class(tool_name);
        if self.allowed_tool_classes.contains(&class) {
            return Ok(());
        }
        let allowed = self
            .allowed_tool_classes
            .iter()
            .map(|c| c.as_kebab())
            .collect::<Vec<_>>()
            .join(", ");
        Err(McpError::invalid_params(
            format!(
                "tool-class-not-allowed: tool '{tool_name}' (class '{}') refused by operator policy; \
                 allowed classes: [{allowed}]; expand via --allowed-tool-classes",
                class.as_kebab()
            ),
            None,
        ))
    }

    /// Resolve an effective audit-DB path for `query` / `investigate` /
    /// `taint` / `triage`. Caller-supplied `override_` (the
    /// `Option<db_path>` MCP param) wins; otherwise fall back to the
    /// session's `current_db` slot, populated by the most recent
    /// `audit` call. Returns a clean typed error when neither is
    /// available so the agent gets actionable guidance ("run `audit`
    /// first or pass `db_path` explicitly") instead of a generic
    /// invalid-param trace.
    ///
    /// Validation: explicit overrides flow through `is_allowed_path`
    /// (`PathRole::Database`) so the operator's path allowlist still
    /// applies. The session slot was validated server-side when
    /// written; we re-stat it on read because the file may have been
    /// deleted, moved, or unmounted between the audit call and now.
    /// On stale-slot detection we drop the slot and return the same
    /// "no session DB" error rather than a confusing "unable to open
    /// database file" error six tool calls later.
    fn resolve_db_path(&self, override_: Option<&str>) -> Result<std::path::PathBuf, McpError> {
        match override_ {
            Some(p) => is_allowed_load_path(p, PathRole::Database),
            None => {
                let cached = {
                    let guard = self.current_db.lock().unwrap_or_else(|e| e.into_inner());
                    guard.clone()
                };
                let path = cached.ok_or_else(|| {
                    McpError::invalid_params(
                        "no db_path provided and no session DB yet — \
                         run `audit` first, or pass `db_path` explicitly to \
                         query a DB produced by a previous session",
                        None,
                    )
                })?;
                if !path.is_file() {
                    // Stale slot — clear and surface the same actionable
                    // error the no-session case returns. Redact the
                    // path so the remote MCP caller doesn't observe
                    // server-internal tempfile layout.
                    let mut guard = self.current_db.lock().unwrap_or_else(|e| e.into_inner());
                    *guard = None;
                    let raw = format!(
                        "session DB at {} is gone (deleted or moved); \
                         re-run `audit`, or pass `db_path` explicitly",
                        path.display()
                    );
                    return Err(McpError::invalid_params(
                        crate::mcp::sanitize::redact_paths(&raw),
                        None,
                    ));
                }
                Ok(path)
            }
        }
    }

    /// Grab the loaded context or return an MCP error telling the
    /// caller to run `load` first. Every non-load tool starts
    /// with this.
    ///
    /// Installs a `diag::with_input_hash` scope around the closure body
    /// using `ctx.path`'s hash — so any panic inside `f` (e.g. a
    /// downstream parser hitting an adversarial edge case) lands in a
    /// `<16-hex>/` bundle instead of `unknown-<siphash>/`. Parallels the
    /// CLI dispatch-level wraps so MCP and CLI produce byte-compatible
    /// bundle names for the same input.
    fn with_ctx<F, R>(&self, f: F) -> Result<R, McpError>
    where
        F: FnOnce(&CrossLayerContext) -> anyhow::Result<R>,
    {
        let guard = self.state.lock().unwrap_or_else(|e| e.into_inner());
        let ctx = guard
            .as_ref()
            .ok_or_else(|| McpError::invalid_params("no file loaded — call load first", None))?;
        let hash = CrossLayerContext::hash_path(std::path::Path::new(&ctx.path));
        droidsaw_common::diag::with_input_hash(&hash, || f(ctx))
            .map_err(|e| sanitize_to_mcp_error("with_ctx", &e, ErrorCategory::InternalError))
    }

    /// `spawn_blocking` variant of `with_ctx` for heavy synchronous work.
    ///
    /// Clones the `Arc<Mutex<...>>` state so the closure can be moved into
    /// `spawn_blocking` without borrowing `&self`. The closure runs on the
    /// blocking thread pool, freeing the tokio executor for other tasks.
    ///
    /// On `JoinError` (panic inside `spawn_blocking`), returns a typed
    /// `McpError::internal_error` with `"type": "BlockingTaskJoin"` in
    /// the data field (krata#431 pattern).
    ///
    /// Used by: fast tool handlers that do non-trivial sync work (xrefs,
    /// investigate, decompile on large inputs). The `audit` handler uses
    /// `run_core_audit_blocking` (free function) instead because it needs
    /// to capture the `ConcurrencyConfig` Arc as well.
    #[allow(dead_code, reason = "documented API; available for future tool migrations")]
    async fn with_ctx_blocking<F, R>(&self, f: F) -> Result<R, McpError>
    where
        F: FnOnce(&CrossLayerContext) -> anyhow::Result<R> + Send + 'static,
        R: Send + 'static,
    {
        let state = Arc::clone(&self.state);
        tokio::task::spawn_blocking(move || {
            let guard = state.lock().unwrap_or_else(|e| e.into_inner());
            let ctx = guard
                .as_ref()
                .ok_or_else(|| McpError::invalid_params("no file loaded — call load first", None))?;
            let hash = CrossLayerContext::hash_path(std::path::Path::new(&ctx.path));
            droidsaw_common::diag::with_input_hash(&hash, || f(ctx))
                .map_err(|e| sanitize_to_mcp_error("with_ctx_blocking", &e, ErrorCategory::InternalError))
        })
        .await
        .map_err(|join_err| {
            McpError::new(
                rmcp::model::ErrorCode::INTERNAL_ERROR,
                format!("blocking task panicked: {join_err}"),
                Some(serde_json::json!({
                    "type": "BlockingTaskJoin",
                    "detail": join_err.to_string(),
                })),
            )
        })?
    }

    #[tool(description = "Load an APK, HBC, or DEX file into the server. \
Subsequent tools operate on the loaded context. Returns a summary of \
layers discovered (hbc/dex). Must be called first.")]
    pub async fn load(
        &self,
        Parameters(params): Parameters<LoadFileParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("load")?;
        let path = is_allowed_load_path(&params.path, PathRole::LoadInput)?;
        // Wrap parse in the hash-scope so parse-time panics (malformed
        // magic, corrupted header, downstream parser edge cases) land in
        // a hash-named bundle. Same hash shape `with_ctx` uses for
        // subsequent tool calls so all bundles for this input collide.
        let hash = CrossLayerContext::hash_path(&path);
        // MCP is the trust boundary: every load gets a per-call budget so
        // adversarial inputs cannot consume unbounded resources. The 100 MiB
        // default is materially below the dev-host limits while still large
        // enough for any realistic APK. The prior parallel-sibling
        // `parse` / `parse_budgeted` API let MCP `load` silently bypass
        // the budget; the single canonical `parse(..., Option<&mut ParseBudget>)`
        // makes the choice explicit at every public call site, and MCP's
        // choice is "always Some".
        let mut budget = droidsaw_common::budget::ParseBudget {
            memory_bytes_remaining: MCP_LOAD_DEFAULT_BUDGET_BYTES,
            steps_remaining: usize::MAX,
            deadline: None,
        };
        let ctx = droidsaw_common::diag::with_input_hash(&hash, || {
            CrossLayerContext::parse(&path, Some(&mut budget))
        })
        .map_err(|e| sanitize_to_mcp_error("parse", &e, ErrorCategory::InternalError))?;
        let summary = serde_json::json!({
            "path": params.path,
            "hbc_present": ctx.hbc.is_some(),
            // Honest layer state for the agent: a bundle the container
            // carried but droidsaw could not parse. `null` when the
            // bundle parsed or none exists; hbc-targeted tools on this
            // session will return this typed error.
            "hbc_parse_error": ctx.hbc_parse_error.as_ref().map(|f| f.message()),
            "dex_count": ctx.dex.len(),
        });
        let mut guard = self.state.lock().unwrap_or_else(|e| e.into_inner());
        *guard = Some(ctx);
        // The prior session's audit-DB belongs to the prior file; a
        // new `load` invalidates it so follow-up tools that omit
        // `db_path` fail loudly instead of silently reading stale
        // results.
        {
            let mut db_guard = self.current_db.lock().unwrap_or_else(|e| e.into_inner());
            *db_guard = None;
        }
        // Surface the concurrency config in the load response so clients
        // can observe the effective caps without calling a separate tool.
        let concurrency_info = serde_json::json!({
            "mcp_concurrency_refused_total": self.concurrency.refused_total(),
        });
        let mut summary_obj = summary.as_object().cloned().unwrap_or_default();
        summary_obj.insert("_concurrency".into(), concurrency_info);
        Ok(serde_json::Value::Object(summary_obj).to_string())
    }


    #[tool(description = "AndroidManifest analysis. Returns permissions, \
components, exported surface, and findings. Defaults to lenient parsing \
— unknown AXML chunk types (e.g. the 0x0104 commercial-obfuscator marker \
on DexGuard-protected builds) are skipped and reported in `_meta.warnings` \
rather than crashing the parse. Pass `strict: true` to opt back into the \
historical hard-fail behaviour.")]
    pub async fn manifest(
        &self,
        Parameters(params): Parameters<ManifestParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("manifest")?;
        let cfg = if params.strict {
            droidsaw_apk::ParseConfig::strict()
        } else {
            droidsaw_apk::ParseConfig::lenient()
        };
        let value = self.with_ctx(|ctx| crate::commands::manifest_with_config(ctx, &cfg))?;
        Ok(value.to_string())
    }

    #[tool(description = "APK signing info. Returns v1 cert + v2/v3/v4 \
block presence + per-signer verification verdict and public key material.")]
    pub async fn signing(
        &self,
        Parameters(_): Parameters<NoParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("signing")?;
        let value = self.with_ctx(crate::commands::signing)?;
        Ok(value.to_string())
    }

    #[tool(description = "Lightweight APK summary: package, version, layers \
present, finding count by severity, up to 5 top critical/high findings, and \
a `signer_summary` array projecting per-block signing cert identity \
(scheme, sha256_fingerprint, plus subject_cn/subject_o/not_before/not_after \
for the v1 block; v2/v3 entries leave subject DN + validity fields null \
because the per-signer struct doesn't surface them — call `signing` to \
drill in). A sibling `signer_summary_status` field carries \"ok\" or \
\"parse_failed\" so an empty array on a corrupted PKCS#7 envelope is \
distinguishable from an empty array on an unsigned APK. For the full \
findings list use `audit` (writes a queryable SQLite DB) + `query`. \
Returns {apk_info: {...}, _meta}.")]
    pub async fn info(
        &self,
        Parameters(_): Parameters<NoParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("info")?;
        let value = self.with_ctx(crate::commands::apk_info)?;
        Ok(value.to_string())
    }

    #[tool(description = "Modular security audit across all layers. \
Writes findings to a SQLite DB with source and confidence tracking; \
returns `db_path` for follow-up queries. `output` sets the DB path; \
omit to use a stable per-input temp path (recommended — re-runs upsert \
into the same file). \
`mode` selects which detectors run: `\"basic\"` (default) is parser-side \
findings + bundled YARA only — no subprocess spawns, ~10-30 sec wall on \
most APKs. `\"full\"` adds semgrep + trufflehog (typically 1-15 minutes \
on real-world apps; semgrep subprocess dominates latency). `\"semgrep\"` \
and `\"trufflehog\"` overlay one subprocess each on top of basic. \
Source confidence: taint/manifest = high (verified paths/facts), \
semgrep = medium (pattern on real code), trufflehog = low-medium \
(string pattern, unverified), yara = low (byte pattern, high FP on \
non-code). DB semantics: re-running the same mode upserts by stable \
finding identity (no duplicates); running a different mode adds rows \
under a new `mode` tag; `update_db = false` clears prior rows for this \
mode before inserting. See `timings_ms` for per-phase breakdown. \
Use investigate + decompile to confirm or dismiss findings, \
then triage to persist the decision. \
Concurrency: full/semgrep/trufflehog modes are rate-limited (max 1 \
concurrent, 8/min by default); basic mode is uncapped. \
Related: query (explore DB), triage (confirm/dismiss).")]
    pub async fn audit(
        &self,
        Parameters(params): Parameters<AuditFullParams>,
        ct: CancellationToken,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("audit")?;
        let t_audit_start = std::time::Instant::now();

        // Parse mode (default Basic). Unknown values surface as a typed
        // McpError instead of silently falling back, so a misspelled
        // detector name does not get masked as "ran the default".
        let mode = match params.mode.as_deref() {
            None => droidsaw_cli_contract::AuditMode::Basic,
            Some(s) => droidsaw_cli_contract::AuditMode::from_cli_str(s).ok_or_else(|| {
                McpError::invalid_params(
                    format!("unknown audit mode: {s:?} (expected basic|full|semgrep|trufflehog)"),
                    None,
                )
            })?,
        };

        // Concurrency gate: full/semgrep/trufflehog modes spawn subprocesses
        // (1–15 min wall). Basic is cheap (no subprocesses); leave it uncapped
        // so informational agents are never blocked by an in-flight full audit.
        let tool_class = if mode.runs_semgrep() || mode.runs_trufflehog() {
            ToolClass::AuditFull
        } else {
            ToolClass::ReadOnly
        };
        let _permit = self.concurrency.acquire(tool_class)?;

        let update_db = params.update_db.unwrap_or(true);

        let output = params.output.as_deref()
            .map(|p| is_allowed_path(p, PathRole::WriteOutput))
            .transpose()?;

        let entropy = params.entropy;

        // Check cancellation before the long core audit phase.
        // Implementation note: run_core_audit and subprocess invocations below
        // are synchronous work. Cancellation is checked *between* phases here
        // (not mid-operation). The subprocess timeout bounds the maximum
        // in-flight subprocess duration to DROIDSAW_MCP_SUBPROCESS_TIMEOUT_SEC
        // (default 600s) so the server cannot be held indefinitely.
        if ct.is_cancelled() {
            return Err(McpError::new(
                rmcp::model::ErrorCode(-32000),
                "audit cancelled: client disconnected".to_owned(),
                Some(serde_json::json!({"type": "Cancelled"})),
            ));
        }

        // Run the heavy core audit on the blocking thread pool so
        // the tokio executor is not blocked while SQLite + parser work runs.
        // `run_core_audit` is sync and I/O-heavy (SQLite writes, YARA, DEX
        // analysis, trufflehog subprocess). Moving it off the executor ensures
        // rmcp's async dispatch layer stays responsive for other tools while
        // an audit-full is in progress.
        //
        // Safety: DroidsawServer fields needed by run_core_audit are all Send:
        // - state: Arc<Mutex<Option<CrossLayerContext>>>
        // - concurrency: Arc<ConcurrencyConfig>
        // The spawn_blocking closure captures Arc clones (not &self borrows).
        //
        // Cooperative-cancellation hookup: spawn a watcher that waits on
        // `ct.cancelled()` and sets a shared `AbortFlag`. The blocking
        // task polls this flag at each phase boundary and on the same
        // 250 ms tick as the subprocess timeout; on fire, an in-flight
        // child gets SIGTERM'd and a typed `Cancelled` error returns
        // within ~250 ms of the client disconnect.
        //
        // The watcher task is detached when the await on the blocking
        // task completes; on the success path it dies harmlessly because
        // `ct` will eventually be dropped (or the watcher's select sees
        // a never-fires Future). The semaphore cap (max 1 concurrent
        // audit-full) bounds resource pressure if a watcher leaks.
        let abort_flag = crate::mcp::subprocess::new_abort_flag();
        let watcher = {
            let ct = ct.clone();
            let abort_flag = Arc::clone(&abort_flag);
            tokio::spawn(async move {
                ct.cancelled().await;
                abort_flag.store(true, std::sync::atomic::Ordering::Relaxed);
            })
        };
        let state_arc = Arc::clone(&self.state);
        let concurrency_arc = Arc::clone(&self.concurrency);
        let abort_for_blocking = Arc::clone(&abort_flag);
        let obj_result = tokio::task::spawn_blocking(move || {
            // Replicate run_core_audit inline using the Arc-cloned state.
            // We do NOT call self.run_core_audit (can't move &self into
            // spawn_blocking); instead we use the same Arc pattern as
            // with_ctx_blocking.
            run_core_audit_blocking(
                state_arc,
                concurrency_arc,
                mode,
                entropy,
                update_db,
                abort_for_blocking,
            )
        })
        .await
        .map_err(|e| McpError::new(
            rmcp::model::ErrorCode::INTERNAL_ERROR,
            format!("blocking task panicked: {e}"),
            Some(serde_json::json!({"type": "BlockingTaskJoin"})),
        ))?;
        // Best-effort detach: blocking task finished, the watcher is no
        // longer needed. Aborting it before the FF avoids a leaked task
        // when ct never fires.
        watcher.abort();

        let mut obj = obj_result?;

        // Check cancellation again between phases.
        if ct.is_cancelled() {
            return Err(McpError::new(
                rmcp::model::ErrorCode(-32000),
                "audit cancelled: client disconnected".to_owned(),
                Some(serde_json::json!({"type": "Cancelled"})),
            ));
        }

        // Semgrep gate: skip extraction + subprocess unless the mode
        // says it should run. The basic and trufflehog modes both
        // bypass semgrep entirely; the JSON envelope still carries a
        // `semgrep` key so consumers can observe the gate.
        let db_path_str = obj.get("db_path")
            .and_then(|v| v.as_str())
            .unwrap_or("")
            .to_string();
        let db_path = std::path::PathBuf::from(&db_path_str);

        let (semgrep_extract_ms, semgrep_subprocess_ms) = if mode.runs_semgrep() {
            let t_sg_extract = std::time::Instant::now();
            // MCP path uses default SemgrepArgs (no CLI flags). User-rule
            // composition still applies via the DROIDSAW_SEMGREP_RULES env
            // var, which `compose_config_args` reads from the process env.
            let mcp_semgrep_args = crate::semgrep::SemgrepArgs::default();
            let sg_val = self.with_ctx(|ctx| {
                crate::commands::semgrep(ctx, output.as_deref(), &mcp_semgrep_args)
            }).unwrap_or_else(|e| serde_json::json!({"error": e.to_string()}));
            #[allow(
                clippy::as_conversions,
                clippy::cast_possible_truncation,
                reason = "PROOF: Instant::elapsed().as_millis() u128 -> u64 truncation. The value surfaces in timings_ms as `semgrep_extract` — an operator metric with a >584M-year truncation horizon."
            )]
            let semgrep_extract_ms = t_sg_extract.elapsed().as_millis() as u64;

            #[allow(
                clippy::indexing_slicing,
                reason = "serde_json::Value indexing returns Null on miss, not panic"
            )]
            let output_dir = sg_val["output_dir"].as_str().unwrap_or("").to_string();

            // Cancellation check before the long semgrep subprocess.
            if ct.is_cancelled() {
                return Err(McpError::new(
                    rmcp::model::ErrorCode(-32000),
                    "audit cancelled: client disconnected before semgrep subprocess".to_owned(),
                    Some(serde_json::json!({"type": "Cancelled"})),
                ));
            }

            // Subprocess + DB-write half is shared with `scan semgrep
            // --persist` via crate::semgrep::run_and_persist. The helper
            // emits the same "ran/results_persisted/high_severity" JSON
            // shape this block produced inline before the refactor.
            // Semgrep runs through the timeout-aware wrapper in
            // semgrep/run.rs.
            let t_sg_subprocess = std::time::Instant::now();
            let semgrep_scan = if !output_dir.is_empty() {
                crate::semgrep::run_and_persist(
                    std::path::Path::new(&output_dir),
                    &mcp_semgrep_args,
                    &db_path,
                    Some(&abort_flag),
                )
                .unwrap_or_else(|e| serde_json::json!({"ran": false, "error": e.to_string()}))
            } else {
                #[allow(
                    clippy::indexing_slicing,
                    reason = "serde_json::Value indexing returns Null on miss, not panic"
                )]
                let sg_cmd = sg_val["command"]
                    .as_str()
                    .unwrap_or("semgrep --config auto <output_dir>/")
                    .to_string();
                serde_json::json!({"ran": false, "command": sg_cmd})
            };
            #[allow(
                clippy::as_conversions,
                clippy::cast_possible_truncation,
                reason = "PROOF: Instant::elapsed().as_millis() u128 -> u64 truncation. The value surfaces in timings_ms as `semgrep_subprocess` — an operator metric with a >584M-year truncation horizon."
            )]
            let semgrep_subprocess_ms = t_sg_subprocess.elapsed().as_millis() as u64;

            let mut sg_obj = sg_val.as_object().cloned().unwrap_or_default();
            sg_obj.insert("semgrep_scan".into(), semgrep_scan);
            obj.insert("semgrep".into(), serde_json::Value::Object(sg_obj));
            (semgrep_extract_ms, semgrep_subprocess_ms)
        } else {
            // Mode does not include semgrep — emit a stub `{"ran": false,
            // "skipped_by_mode": "<mode>"}` so consumers can tell the
            // difference between "binary not in PATH" and "mode said no."
            obj.insert("semgrep".into(), serde_json::json!({
                "semgrep_scan": {
                    "ran": false,
                    "skipped_by_mode": mode.as_cli_str(),
                },
            }));
            (0u64, 0u64)
        };

        // Generate investigation leads for the LLM consumer.
        let leads = generate_investigation_leads(&obj, &db_path_str);
        obj.insert("leads".into(), leads);

        // Mode envelope — agents downstream can introspect what ran and
        // whether the DB was upserted-into or cleared-then-rewritten.
        obj.insert("mode".into(), serde_json::json!(mode.as_cli_str()));
        obj.insert("update_db".into(), serde_json::json!(update_db));

        // Surface the concurrency refused counter so monitoring agents can
        // detect saturation without polling a separate endpoint.
        obj.insert("mcp_concurrency_refused_total".into(),
            serde_json::json!(self.concurrency.refused_total()));

        // Extend run_core_audit's `timings_ms` object with the semgrep phases
        // and the audit-level wall-clock total. Consumers read these to locate
        // the hot-path pole (extraction vs subprocess) on real-target APKs.
        if let Some(t) = obj.get_mut("timings_ms").and_then(|v| v.as_object_mut()) {
            t.insert("semgrep_extract".into(), serde_json::json!(semgrep_extract_ms));
            t.insert("semgrep_subprocess".into(), serde_json::json!(semgrep_subprocess_ms));
            #[allow(
                clippy::as_conversions,
                clippy::cast_possible_truncation,
                reason = "PROOF: Instant::elapsed().as_millis() u128 -> u64 truncation. The value surfaces in timings_ms as `audit_total` — an operator metric with a >584M-year truncation horizon."
            )]
            let audit_total_ms = t_audit_start.elapsed().as_millis() as u64;
            t.insert("audit_total".into(), serde_json::json!(audit_total_ms));
        }

        // Cache the audit-DB path as the session's current DB, so
        // follow-up `query` / `investigate` / `taint` / `triage` calls
        // can omit the `db_path` arg. Only write on the happy path:
        // run_core_audit must have produced a non-empty db_path string.
        if !db_path_str.is_empty() {
            let mut guard = self.current_db.lock().unwrap_or_else(|e| e.into_inner());
            *guard = Some(db_path.clone());
        }

        Ok(serde_json::Value::Object(obj).to_string())
    }

    #[tool(description = "Query the audit SQLite DB. Views: \
audit_summary, actionable_findings, finding_context, finding_urls, \
taint_critical, semgrep_hotspots. Every table has a _fts variant \
for full-text search: SELECT * FROM findings_fts WHERE findings_fts \
MATCH 'token OR secret'. SELECT plus a curated set of read-only \
PRAGMAs is permitted: PRAGMA table_info(<table>), PRAGMA \
table_xinfo(<table>), PRAGMA index_list(<table>), \
PRAGMA foreign_key_list(<table>). The PRAGMA \
assignment form and side-effecting PRAGMAs are rejected. \
`db_path` is optional — defaults to this session's most recent `audit` DB; \
pass explicitly to query a DB from another session. \
Related: investigate (finding→xref→decompile shortcut), taint.")]
    pub async fn query(
        &self,
        Parameters(params): Parameters<DbQueryParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("query")?;
        // Reject anything outside the SELECT + curated-PRAGMA allowlist
        // to prevent writes/drops and side-effecting PRAGMAs.
        if let Err(reason) = is_allowed_query_sql(&params.sql) {
            return Err(McpError::invalid_params(reason, None));
        }
        let db_path = self.resolve_db_path(params.db_path.as_deref())?;
        let db = rusqlite::Connection::open_with_flags(
            &db_path,
            rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY,
        )
        .map_err(|e| sanitize_to_mcp_error("db_query open", &e, ErrorCategory::InternalError))?;

        let mut stmt = db
            .prepare(&params.sql)
            .map_err(|e| sanitize_to_mcp_error("db_query prepare", &e, ErrorCategory::InternalError))?;
        let col_names: Vec<String> = stmt
            .column_names()
            .into_iter()
            .map(String::from)
            .collect();
        let col_count = col_names.len();
        let limit = params.limit;

        let rows: Vec<Vec<serde_json::Value>> = stmt
            .query_map([], |row| {
                let mut cells = Vec::with_capacity(col_count);
                for i in 0..col_count {
                    let cell = match row.get_ref(i) {
                        Ok(rusqlite::types::ValueRef::Null) => serde_json::Value::Null,
                        Ok(rusqlite::types::ValueRef::Integer(n)) => serde_json::json!(n),
                        Ok(rusqlite::types::ValueRef::Real(f)) => serde_json::json!(f),
                        Ok(rusqlite::types::ValueRef::Text(s)) => {
                            serde_json::json!(String::from_utf8_lossy(s))
                        }
                        Ok(rusqlite::types::ValueRef::Blob(b)) => {
                            serde_json::json!(format!("<blob {} bytes>", b.len()))
                        }
                        Err(_) => serde_json::Value::Null,
                    };
                    cells.push(cell);
                }
                Ok(cells)
            })
            .map_err(|e| sanitize_to_mcp_error("db_query execute", &e, ErrorCategory::InternalError))?
            .take(limit)
            .filter_map(|r| r.ok())
            .collect();

        let row_count = rows.len();
        let out = serde_json::json!({
            "columns": col_names,
            "rows": rows,
            "row_count": row_count,
            "truncated": row_count == limit,
            "db_path_resolved": db_path.display().to_string(),
        });
        Ok(out.to_string())
    }

    #[tool(description = "Finding → xrefs → decompile in one call. \
Use this instead of manually chaining query + xrefs + decompile. \
Pass `rowid` (from query on findings) or `search` (FTS5 term). \
`db_path` is optional — defaults to this session's most recent `audit` DB. \
Returns {finding, xrefs, callers}. \
Related: query (browse findings), decompile (deeper drill-down).")]
    pub async fn investigate(
        &self,
        Parameters(params): Parameters<FindingContextParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("investigate")?;
        // Step 1: resolve finding from DB.
        let db_path = self.resolve_db_path(params.db_path.as_deref())?;
        let db = rusqlite::Connection::open_with_flags(
            &db_path,
            rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY,
        )
        .map_err(|e| sanitize_to_mcp_error("finding_context open", &e, ErrorCategory::InternalError))?;

        let (finding_detail, finding_id, finding_sev) = if let Some(rowid) = params.rowid {
            db.query_row(
                "SELECT id_tag, detail, severity FROM findings WHERE rowid = ?1",
                rusqlite::params![rowid],
                |row| Ok((row.get::<_, String>(1)?, row.get::<_, String>(0)?, row.get::<_, String>(2)?)),
            )
            .map_err(|e| sanitize_to_mcp_error("finding not found", &e, ErrorCategory::NotFound))?
        } else if let Some(ref q) = params.search {
            // findings_fts mirrors id_tag/detail/severity columns from
            // findings, so unqualified names collide on JOIN. Qualify
            // every selected column with the base table; the
            // findings_fts side contributes only the MATCH predicate
            // and the `rank` ORDER BY.
            db.query_row(
                "SELECT findings.id_tag, findings.detail, findings.severity FROM findings \
                 JOIN findings_fts ON findings.rowid = findings_fts.rowid \
                 WHERE findings_fts MATCH ?1 ORDER BY rank LIMIT 1",
                rusqlite::params![q],
                |row| Ok((row.get::<_, String>(1)?, row.get::<_, String>(0)?, row.get::<_, String>(2)?)),
            )
            .map_err(|e| sanitize_to_mcp_error("finding_context fts", &e, ErrorCategory::InternalError))?
        } else {
            return Err(McpError::invalid_params(
                "provide either `rowid` or `search`",
                None,
            ));
        };

        // Step 2: extract a searchable token from the finding detail.
        // Use the longest whitespace-free token that isn't a known tag.
        let search_token: String = finding_detail
            .split_whitespace()
            .filter(|t| t.len() > 6 && !t.starts_with("APK_") && !t.starts_with("DART_"))
            .max_by_key(|t| t.len())
            .unwrap_or(&finding_detail)
            .to_string();

        // Step 3: xrefs for that token.
        let xref_val = self
            .with_ctx(|ctx| crate::commands::xrefs(ctx, Some(&search_token), Some(20)))?;
        #[allow(
            clippy::indexing_slicing,
            reason = "serde_json::Value indexing returns Null on miss, not panic"
        )]
        let xrefs = xref_val["xrefs"].as_array().cloned().unwrap_or_default();

        // Step 4: optionally decompile top callers.
        let callers: Vec<serde_json::Value> = if params.decompile && !xrefs.is_empty() {
            // Collect up to 3 unique func_ids from hbc xrefs.
            let func_ids: Vec<String> = xrefs
                .iter()
                .flat_map(|x| {
                    x["functions"]
                        .as_array()
                        .cloned()
                        .unwrap_or_default()
                        .into_iter()
                        .filter_map(|f| {
                            let s = f.as_str()?.to_string();
                            // Extract numeric id from "name(#N)"
                            // `rfind("(#")` returns i at a char-boundary; the
                            // ASCII `(#` pair is 2 bytes so `i + 2` is the byte
                            // immediately after, also at a char boundary.
                            #[allow(
                                clippy::string_slice,
                                reason = "i + 2 lands at a char boundary; (# is ASCII"
                            )]
                            let id = s.rfind("(#")
                                .and_then(|i| s[i.saturating_add(2)..].strip_suffix(')'))
                                .map(String::from)?;
                            Some(id)
                        })
                })
                .collect::<std::collections::BTreeSet<_>>()
                .into_iter()
                .take(3)
                .collect();

            func_ids
                .iter()
                .filter_map(|id| {
                    self.with_ctx(|ctx| {
                        crate::commands::decompile(ctx, Some(id), false, false)
                    })
                    .ok()
                    .and_then(|v| {
                        #[allow(
                            clippy::indexing_slicing,
                            reason = "serde_json::Value indexing returns Null on miss, not panic"
                        )]
                        let funcs = v["functions"].as_array().and_then(|a| a.first().cloned());
                        funcs
                    })
                })
                .collect()
        } else {
            vec![]
        };

        let out = serde_json::json!({
            "finding": {
                "id_tag": finding_id,
                "severity": finding_sev,
                "detail": finding_detail,
                "search_token": search_token,
            },
            "xrefs": xrefs,
            "callers": callers,
            "db_path_resolved": db_path.display().to_string(),
        });
        Ok(out.to_string())
    }

    #[tool(description = "Surface taint flows from an audit DB. Shortcut \
for querying the taint_flows table produced by audit. Covers three \
finding IDs: DEX_TAINT_FLOW (interprocedural Java, cross-DEX, depth 4), \
BRIDGE_TAINT_FLOW (JS NativeModule → @ReactMethod Java), HBC_TAINT_FLOW \
(Hermes DirectEval + bridge Call sinks). Returns \
{taint_count, critical, high, source_summary, sink_summary}. Filter by \
source_type (e.g. IntentExtra, NetworkResponse, SharedPreferencesRead, \
ReactBridgeParam, UserInput) or sink_type (e.g. SqlExecute, RuntimeExec, \
WebViewLoad, LogOutput, Eval, NativeModuleArg). ReactBridgeParam sources \
are cross-layer: JS-controlled inputs flowing from @NativeModule calls \
through the React Native bridge into @ReactMethod Java bodies. \
`db_path` is optional — defaults to this session's most recent `audit` \
DB (any mode — basic, semgrep, trufflehog, or full).")]
    pub async fn taint(
        &self,
        Parameters(params): Parameters<TaintFlowsParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("taint")?;
        let db_path = self.resolve_db_path(params.db_path.as_deref())?;
        let db = rusqlite::Connection::open_with_flags(
            &db_path,
            rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY,
        )
        .map_err(|e| sanitize_to_mcp_error("taint_flows open", &e, ErrorCategory::InternalError))?;

        // SECURITY: bind user-controlled `source_type` / `sink_type` via `?N`
        // placeholders, never concatenate via `format!`. The WHERE template is
        // chosen from a fixed allowlist of 4 shapes (both / source-only /
        // sink-only / neither) so the SQL is fully static; values flow only
        // through rusqlite's parameter API. Regression: `tests/mcp_taint_sql_injection.rs`.
        let (where_clause, bind_values) =
            build_taint_where_clause(params.source_type.as_deref(), params.sink_type.as_deref());

        let count_sql = format!("SELECT COUNT(*) FROM taint_flows {where_clause}");
        let count: i64 = db
            .query_row(
                &count_sql,
                rusqlite::params_from_iter(bind_values.iter()),
                |r| r.get(0),
            )
            .unwrap_or(0);

        let sql = format!(
            "SELECT rowid,layer,func_id,source_type,sink_type,severity,cwe,source_offset,sink_offset \
             FROM taint_flows {where_clause} \
             ORDER BY CASE severity WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 ELSE 2 END \
             LIMIT 50"
        );
        let mut stmt = db.prepare(&sql)
            .map_err(|e| sanitize_to_mcp_error("taint prepare", &e, ErrorCategory::InternalError))?;

        let mut critical = vec![];
        let mut high = vec![];
        let mut source_counts: std::collections::BTreeMap<String, usize> = Default::default();
        let mut sink_counts: std::collections::BTreeMap<String, usize> = Default::default();

        let rows = stmt
            .query_map(rusqlite::params_from_iter(bind_values.iter()), |row| {
                Ok((
                    row.get::<_, i64>(0)?,
                    row.get::<_, String>(1)?,
                    row.get::<_, i64>(2)?,
                    row.get::<_, String>(3)?,
                    row.get::<_, String>(4)?,
                    row.get::<_, String>(5)?,
                    row.get::<_, Option<i64>>(6)?,
                    row.get::<_, Option<i64>>(7)?,
                    row.get::<_, Option<i64>>(8)?,
                ))
            })
            .map_err(|e| sanitize_to_mcp_error("taint query", &e, ErrorCategory::InternalError))?;

        for row in rows.flatten() {
            let (rowid, layer, func_id, source, sink, sev, cwe, source_offset, sink_offset) = row;
            // WHY: display source/sink tally over sqlite rows; saturating_add is exact within usize row count.
            let src_c = source_counts.entry(source.clone()).or_insert(0);
            *src_c = src_c.saturating_add(1);
            let snk_c = sink_counts.entry(sink.clone()).or_insert(0);
            *snk_c = snk_c.saturating_add(1);
            let entry = serde_json::json!({
                "rowid": rowid, "layer": layer, "func_id": func_id,
                "source_type": source, "sink_type": sink,
                "severity": sev, "cwe": cwe,
                "source_offset": source_offset, "sink_offset": sink_offset,
            });
            match sev.as_str() {
                "Critical" => critical.push(entry),
                "High"     => high.push(entry),
                _          => {}
            }
        }

        Ok(serde_json::json!({
            "taint_count": count,
            "critical": critical,
            "high": high,
            "source_summary": source_counts,
            "sink_summary": sink_counts,
            "db_path_resolved": db_path.display().to_string(),
        }).to_string())
    }

    #[tool(description = "Triage a finding: confirm or dismiss. Updates \
the audit DB so triage state persists across sessions. Dismissed \
findings are excluded from actionable_findings. Use after investigating \
with decompile/xrefs. `db_path` is optional — defaults to this session's \
most recent `audit` DB. Related: query (check state), investigate.")]
    pub async fn triage(
        &self,
        Parameters(params): Parameters<TriageParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("triage")?;
        let db_path = self.resolve_db_path(params.db_path.as_deref())?;
        let mut result = crate::commands::triage_finding(
            &db_path,
            params.rowid,
            &params.action,
            params.reason.as_deref(),
        )
        .map_err(|e| sanitize_to_mcp_error("triage", &e, ErrorCategory::InternalError))?;
        // Surface which DB the mutation hit; without this, an agent
        // reading the response can't tell whether it triaged in the
        // session DB or a caller-passed override.
        if let Some(obj) = result.as_object_mut() {
            obj.insert(
                "db_path_resolved".into(),
                serde_json::json!(db_path.display().to_string()),
            );
        }
        Ok(result.to_string())
    }

    #[tool(description = "Search strings across all loaded layers (Hermes \
+ DEX + native .so + resources.arsc) in one call. Do not grep raw files — \
this searches parsed string pools and ELF read-only sections. Returns \
{strings: [...], _meta}. Default limit 200. Use `search` regex and \
`min_length` to narrow. Use `layer` to restrict: \"dex\", \"hbc\", \
\"native\" (.rodata + .dynstr of every .so in the APK; min_length \
defaults to 4), or \"arsc\" (resources.arsc global string pool — URLs, \
config values, JWT-shaped tokens that live only in compiled resources). \
Related: xrefs (trace a string to its callers).")]
    pub async fn strings(
        &self,
        Parameters(params): Parameters<StringsParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("strings")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::strings(
                ctx,
                params.search.as_deref(),
                params.min_length,
                params.limit,
                params.layer.as_deref(),
            )
        })?;
        Ok(value.to_string())
    }

    #[tool(description = "Cross-reference strings, type descriptors, and \
method descriptors to the functions that reference them. Bidirectional: \
find which functions use a string/type/method, or what each function \
references. Do not grep extracted sources — xrefs covers all layers in \
one call. Returns {xrefs: [{layer, kind, string, functions}], _meta}; \
`kind` is one of \"string\" (const-string load), \"type\" (new-instance / \
check-cast / instance-of / new-array / filled-new-array / const-class), \
or \"method\" (any invoke-*). Default limit 50. `search` is a regex \
matched against the key (string value, type descriptor, or callee triple \
`class->name+proto`). Empty pattern and patterns longer than 4 KiB are \
rejected. Related: decompile (read the function), investigate \
(finding→xref→decompile).")]
    pub async fn xrefs(
        &self,
        Parameters(params): Parameters<XrefsParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("xrefs")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::xrefs(ctx, params.search.as_deref(), params.limit)
        })?;
        Ok(value.to_string())
    }

    #[tool(description = "Bulk-decompile envelope across DEX + HBC layers. \
Returns `{functions: [{layer, function_id, name, source}], findings, _meta}` \
where each entry is one DEX class (layer `dex<n>`, function_id=class_idx, \
name=descriptor, source=Java) or one HBC function (layer `hbc`, \
function_id=fid, name=function name, source=JS). \
Use `all: true` for the union envelope across every loaded layer — \
hybrid APKs (React Native: HBC bundle + DEX classes) surface both layers \
in one response. \
`target` = single HBC function ID or DEX class descriptor \
(e.g. `Lcom/example/Foo;`); `js: true` forces HBC JS emit. \
For narrow DEX queries (regex search, outline mode, method filtering, \
dry_run) prefer `decompile` — it returns `{classes: ...}` shape tuned \
for per-class browsing. Use `apk_decompile` when you want every class \
emitted as a single envelope (bench-style head-to-head, audit sweep).")]
    pub async fn apk_decompile(
        &self,
        Parameters(params): Parameters<DecompileParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_decompile")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::decompile(ctx, params.target.as_deref(), params.js, params.all)
        })?;
        Ok(value.to_string())
    }

    #[tool(description = "Generate Frida hooks for functions referencing \
strings that match `search`. Returns {hooks: [...], _meta}. Output is \
ready to paste into a Frida JS file. Pair with `xrefs` to identify \
which function IDs are worth hooking.")]
    pub async fn frida(
        &self,
        Parameters(params): Parameters<FridaParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("frida")?;
        let value = self.with_ctx(|ctx| crate::commands::frida(ctx, &params.search))?;
        Ok(value.to_string())
    }

    #[tool(description = "Hermes bytecode bundle metadata: format version, \
string pool size, function count, and section layout with byte offsets. \
HBC layer only — errors if no HBC bundle is loaded. Use `info` for a \
cross-layer summary that includes HBC. Related: hbc_functions (list functions), \
module_list (bundle segment structure).")]
    pub async fn hbc_info(
        &self,
        Parameters(_): Parameters<NoParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("hbc_info")?;
        let value = self.with_ctx(crate::commands::hbc_info)?;
        Ok(value.to_string())
    }

    #[tool(description = "List or search Hermes bytecode function names in \
the loaded HBC bundle. HBC layer only — use `dex_methods` for DEX/Java. \
`search` accepts a regex. Returns function IDs, names, and byte offsets. \
Use the returned func_id with `decompile` (readable JS) or `disasm` (raw \
bytecode). Related: decompile, disasm, call_graph, dex_methods (DEX equiv).")]
    pub async fn hbc_functions(
        &self,
        Parameters(params): Parameters<SearchParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("hbc_functions")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::hbc_functions(ctx, params.search.as_deref())
        })?;
        Ok(value.to_string())
    }

    #[tool(description = "List or search DEX class descriptors across all \
loaded DEX files. `search` accepts a regex on the descriptor \
(e.g. `^Lcom/example/`). Returns class names and DEX index. Use the \
index with `decompile` (`class_index` param). DEX layer only — use \
`hbc_functions` for HBC. Related: dex_methods (method browser), decompile.")]
    pub async fn dex_classes(
        &self,
        Parameters(params): Parameters<SearchParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("dex_classes")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::dex_classes(ctx, params.search.as_deref())
        })?;
        Ok(value.to_string())
    }

    #[tool(description = "List or search DEX method signatures across all \
loaded DEX files. `search` accepts a regex on the method descriptor. \
Returns class, method name, parameter types, and return type. DEX layer \
only — use `hbc_functions` for HBC. Related: dex_classes (class browser), \
decompile (decompile the containing class), call_graph.")]
    pub async fn dex_methods(
        &self,
        Parameters(params): Parameters<SearchParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("dex_methods")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::dex_methods(ctx, params.search.as_deref(), false)
        })?;
        Ok(value.to_string())
    }

    #[tool(description = "Export the loaded session's findings to a SQLite \
database at `output` path. The schema matches the DB produced by `audit` — \
use `query`, `investigate`, `taint`, and `triage` on it. Prefer `audit` \
for new sessions (it runs detectors AND writes the DB). Use `apk_export` \
when you want to persist findings from a session already in memory to a \
specific caller-chosen path. Related: audit (preferred), query.")]
    pub async fn apk_export(
        &self,
        Parameters(params): Parameters<ExportParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_export")?;
        let output = is_allowed_path(&params.output, PathRole::WriteOutput)?;
        let output_str = output.to_str().ok_or_else(|| {
            McpError::invalid_params("non-UTF-8 output path", None)
        })?;
        let value = self.with_ctx(|ctx| crate::commands::export(ctx, output_str))?;
        Ok(value.to_string())
    }

    #[tool(description = "List Hermes bundle module segments — the named \
chunks that make up a React Native JS bundle (e.g. \
`node_modules/react/index.js`). HBC layer only. Shows segment names and \
byte ranges. Distinct from `npm_packages`: module_list shows bundle \
segments by file path; npm_packages shows semantic npm metadata (name, \
version, license). Related: npm_packages, functions, hbc_info.")]
    pub async fn module_list(
        &self,
        Parameters(_): Parameters<NoParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("module_list")?;
        let value = self.with_ctx(crate::commands::module_list)?;
        Ok(value.to_string())
    }

    #[tool(description = "List native shared libraries (.so files) present \
in the APK. Returns ELF file paths, target architectures, and sizes. Use \
for a quick native-layer inventory before drilling into symbols with \
`apk_elf`. Related: apk_elf (ELF symbol details), strings (search \
native .rodata and .dynstr strings with `layer: \"native\"`).")]
    pub async fn native_modules(
        &self,
        Parameters(_): Parameters<NoParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("native_modules")?;
        let value = self.with_ctx(crate::commands::native_modules)?;
        Ok(value.to_string())
    }

    #[tool(description = "Disassemble a single Hermes bytecode function to \
raw HBC instructions. Pass `func_id` from `hbc_functions`. Use when `decompile` \
output doesn't expose enough detail — e.g. tracing the exact origin of a \
DirectEval operand. Prefer `decompile` for readable JS output; use \
`disasm` only for low-level bytecode inspection. HBC layer only. \
Related: hbc_functions (get func_id), decompile (readable alternative).")]
    pub async fn disasm(
        &self,
        Parameters(params): Parameters<DisasmParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("disasm")?;
        let value = self.with_ctx(|ctx| crate::commands::disasm(ctx, params.func_id))?;
        Ok(value.to_string())
    }

    #[tool(description = "List npm packages bundled in the loaded Hermes / \
React Native bundle. Returns package name, version, and license for each \
detected package. HBC layer only. Use for supply-chain surface mapping. \
Distinct from `module_list`: npm_packages shows semantic package metadata; \
module_list shows raw bundle segment paths. Related: module_list, strings.")]
    pub async fn npm_packages(
        &self,
        Parameters(_): Parameters<NoParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("npm_packages")?;
        let value = self.with_ctx(crate::commands::npm_packages)?;
        Ok(value.to_string())
    }

    #[tool(description = "Function call graph for the loaded file. Returns \
caller→callee edges. Use `search` (regex) to focus on functions matching \
a name pattern; `limit` caps returned edges (default 50). DEX and HBC \
layers supported. Useful for tracing which code reaches a sensitive \
function. Related: decompile (read a function body), xrefs \
(string→function edges), dex_methods, hbc_functions.")]
    pub async fn call_graph(
        &self,
        Parameters(params): Parameters<CallGraphParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("call_graph")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::call_graph(ctx, params.search.as_deref(), params.limit)
        })?;
        Ok(value.to_string())
    }

    #[tool(description = "Decompile to readable source. DEX → Java \
(SSA-optimized, structured control flow), Hermes → JS (OXC-validated). \
Do not use external decompilers — this tool \
produces high-fidelity output across all supported layers. \
Pass `class_index` (0-based) or `search` (regex on class descriptor). \
Search decompiles EVERY match — use a tight regex. For long classes \
(deeplink routers, generated facades) pass `mode: \"outline\"` to bound \
per-class output (class header + method signatures + first ~20 lines \
per body) or `methods: [\"name1\", \"name2\"]` to limit emit to listed \
methods. Both filters compose. \
Pass `dry_run: true` to preview how many classes a regex matches and \
their estimated sizes WITHOUT invoking the decompiler — avoids token \
blowup on broad regex searches (e.g. `^LIL1/.*;$` matching 78 classes). \
Returns `{classes: [{layer, class_index, descriptor, source}], _meta}` — \
narrow per-class shape. For the bulk envelope across every layer \
(DEX + HBC union, one entry per class/function), use `apk_decompile` \
with `all: true`. \
Related: xrefs (find which class to decompile), investigate (finding→decompile in one call), apk_decompile (bulk envelope).")]
    pub async fn decompile(
        &self,
        Parameters(params): Parameters<DexDecompileParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("decompile")?;

        // dry_run: resolve matching classes only; do not invoke the decompiler.
        // No concurrency permit needed — no CPU-heavy decompile pipeline fires.
        if params.dry_run == Some(true) {
            let value = self.with_ctx(|ctx| {
                crate::commands::dex_decompile_dry_run(
                    ctx,
                    params.class_index,
                    params.search.as_deref(),
                )
            })?;
            return Ok(value.to_string());
        }

        // Concurrency gate: decompile on large obfuscated DEX inputs is
        // CPU-heavy (CFG reconstruction, SSA, region structuring). Four
        // concurrent calls on a 2-core machine saturate all tokio workers,
        // blocking ping/info/manifest. Gate through ToolClass::Default (max 2
        // concurrent) to bound starvation. _permit is held by RAII until return.
        let _permit = self.concurrency.acquire(ToolClass::Default)?;

        let mode = match params.mode.as_deref() {
            Some("outline") => crate::commands::DecompileMode::Outline,
            Some("full") | None => crate::commands::DecompileMode::Full,
            Some(other) => {
                return Err(McpError::invalid_params(
                    format!("unknown decompile mode {other:?}; expected \"full\" or \"outline\""),
                    None,
                ));
            }
        };
        let methods = params.methods.as_deref();
        let value = self.with_ctx(|ctx| {
            crate::commands::dex_decompile_filtered(
                ctx,
                params.class_index,
                params.search.as_deref(),
                mode,
                methods,
            )
        })?;
        Ok(value.to_string())
    }

    #[tool(description = "Diff the currently-loaded Hermes bundle \
against another file. Load the baseline first via load, then \
call diff with the new version's path. Returns {old_version, \
new_version, string_counts, function_counts, added_strings, \
removed_strings, _meta}. Hermes/HBC only — both the loaded file and \
the new path must contain an HBC bundle; DEX and full APK diff are \
not yet supported.")]
    pub async fn diff(
        &self,
        Parameters(params): Parameters<DiffParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("diff")?;
        let new_path = is_allowed_load_path(&params.path, PathRole::LoadInput)?;
        let value = self.with_ctx(|ctx| crate::commands::diff(ctx, &new_path))?;
        Ok(value.to_string())
    }

    pub async fn corpus_ingest(
        &self,
        Parameters(params): Parameters<CorpusIngestParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("corpus_ingest")?;
        // Concurrency gate: corpus_ingest reads multiple APK files
        // synchronously; on large corpora this is multi-minute work.
        // Gate through ToolClass::Default (max 2 concurrent). _permit held
        // by RAII until return.
        let _permit = self.concurrency.acquire(ToolClass::Default)?;

        let paths = vec![is_allowed_load_path(&params.dir, PathRole::LoadDirectory)?];
        let output_canon = is_allowed_path(&params.output, PathRole::WriteOutput)?;
        let output_str = output_canon.to_str().ok_or_else(|| {
            McpError::invalid_params("non-UTF-8 output path", None)
        })?;
        let value = crate::commands::corpus_ingest(
            &paths,
            output_str,
            params.tag.as_deref(),
            params.skip_existing,
        )
        .map_err(|e| sanitize_to_mcp_error("corpus_ingest", &e, ErrorCategory::InternalError))?;
        Ok(value.to_string())
    }

    pub async fn apk_entries(
        &self,
        Parameters(params): Parameters<EntriesParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_entries")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::entries(ctx, params.search.as_deref(), params.limit)
        })?;
        Ok(value.to_string())
    }

    pub async fn apk_elf(
        &self,
        Parameters(params): Parameters<ElfParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_elf")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::elf(ctx, params.search.as_deref())
        })?;
        Ok(value.to_string())
    }

    pub async fn apk_webview_assets(
        &self,
        Parameters(params): Parameters<WebviewAssetsParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_webview_assets")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::webview_assets(
                ctx,
                params.search.as_deref(),
                params.extract.as_deref(),
            )
        })?;
        Ok(value.to_string())
    }

    pub async fn apk_resources(
        &self,
        Parameters(params): Parameters<ResourcesParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_resources")?;
        let value = self.with_ctx(|ctx| {
            crate::commands::resources(ctx, params.search.as_deref(), params.limit)
        })?;
        Ok(value.to_string())
    }

    pub async fn apk_sbom(
        &self,
        Parameters(_): Parameters<NoParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_sbom")?;
        let value = self.with_ctx(crate::commands::sbom)?;
        Ok(value.to_string())
    }

    pub async fn apk_yara(
        &self,
        Parameters(params): Parameters<YaraParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_yara")?;
        // Security pre-check: validate inline rule source for restricted
        // directives BEFORE entering `with_ctx`. This produces a typed
        // `invalid_params` rejection (distinguishable from internal errors)
        // without leaking filesystem layout via compiler error messages.
        //
        // The `with_ctx` helper maps all `anyhow::Error` to
        // `internal_error`, which would lose the typed
        // `YaraRuleSourceRestricted` discrimination. Running the check here
        // preserves the distinction: callers get `invalid_params` for policy
        // violations and `internal_error` for genuine server faults.
        if let Some(ref src) = params.rules_src
            && let Err(e) = droidsaw_apk::yara_scan::check_directive_policy(src)
        {
            return Err(sanitize_to_mcp_error("yara rules policy", &e, ErrorCategory::BadRequest));
        }

        // A4 gap closure: validate caller-supplied `rules` filesystem
        // path through the same allowlist gate every other path-bearing
        // MCP parameter goes through. The `apk_yara` command accepts
        // either a single rule file or a directory of rule files —
        // `PathRole::LoadInputOrDirectory` accepts either kind.
        let canon_rules: Option<std::path::PathBuf> = params
            .rules
            .as_deref()
            .map(|p| is_allowed_path(p, PathRole::LoadInputOrDirectory))
            .transpose()?;
        let rules_path = canon_rules.as_deref();
        let value = self.with_ctx(|ctx| {
            crate::commands::yara(
                ctx,
                params.rules_src.as_deref(),
                rules_path,
                &params.target,
                params.limit,
            )
        })?;
        Ok(value.to_string())
    }

    pub async fn apk_semgrep_extract(
        &self,
        Parameters(params): Parameters<SemgrepParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_semgrep_extract")?;
        // OQ-5: when `output` is omitted, use a stable tempdir path keyed on
        // the loaded APK's path hash, mirroring the audit DB pattern. The prior
        // default of `./droidsaw-semgrep-<name>` resolved relative to the
        // server process CWD — subtle and potentially surprising. Tempdir is
        // consistently safe under any server launch context.
        let explicit_output = params.output.as_deref()
            .map(|p| is_allowed_path(p, PathRole::WriteOutput))
            .transpose()?;
        // Resolve tempdir fallback now (before entering with_ctx so we can
        // borrow &self freely).
        let tempdir_fallback: Option<std::path::PathBuf> = if explicit_output.is_none() {
            let hash = {
                let guard = self.state.lock().unwrap_or_else(|e| e.into_inner());
                guard.as_ref().map(|c| CrossLayerContext::hash_path(std::path::Path::new(&c.path)))
            };
            let key = hash.unwrap_or_else(|| "unknown".to_string());
            Some(std::env::temp_dir().join(format!("droidsaw-semgrep-{key}")))
        } else {
            None
        };
        let effective_output: Option<std::path::PathBuf> = explicit_output.or(tempdir_fallback);
        // MCP semgrep_extract surface does not expose --rules / --no-auto
        // params today — DROIDSAW_SEMGREP_RULES env still applies via
        // SemgrepArgs::default()'s effective_rules() call.
        let semgrep_args = crate::semgrep::SemgrepArgs::default();
        let value = self.with_ctx(|ctx| {
            crate::commands::semgrep(ctx, effective_output.as_deref(), &semgrep_args)
        })?;
        Ok(value.to_string())
    }

    pub async fn apk_trufflehog(
        &self,
        Parameters(params): Parameters<TrufflehogParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_trufflehog")?;
        // Validate caller-supplied write path at the MCP boundary before
        // entering with_ctx. The None branch falls back to temp_dir()
        // which is always safe and bypasses the check intentionally.
        let validated_out: Option<std::path::PathBuf> = params.output.as_deref()
            .map(|p| is_allowed_path(p, PathRole::WriteOutput))
            .transpose()?;

        let value = self.with_ctx(|ctx| {
            let mut buf: Vec<u8> = Vec::new();
            crate::commands::trufflehog(ctx, params.min_length, &mut buf)?;
            let text = String::from_utf8_lossy(&buf);
            let filter = match params.search.as_deref() {
                Some(pat) => Some(regex::Regex::new(pat)?),
                None => None,
            };
            let lines: Vec<&str> = text
                .lines()
                .filter(|l| filter.as_ref().is_none_or(|r| r.is_match(l)))
                .collect();
            let line_count = lines.len();

            // Write to file — never return raw strings over the transport.
            // Use the pre-validated path when the caller supplied one;
            // fall back to a NamedTempFile + keep() (atomic-create with
            // random suffix; persists after Drop because the caller is
            // expected to consume it via downstream `trufflehog filesystem`).
            //
            // ⚠ This is an INTRINSIC leak surface: the tool returns a
            // path the client owns cleanup for. Responsibility for cleanup
            // lies with downstream consumers. The tempfile change here
            // eliminates timestamp-collision FNs (1-second granularity
            // could collide under concurrent calls). Does
            // NOT close the leak. Real fix needs API redesign — stream
            // output over the MCP transport, OR delete-on-read handle,
            // OR per-session temp dir cleaned up at server shutdown.
            // Filed as TODO: droidsaw-strings-mcp-leak-api-redesign.
            let out_path = match validated_out.clone() {
                Some(p) => p,
                None => {
                    let tf = tempfile::Builder::new()
                        .prefix("droidsaw-strings-")
                        .suffix(".txt")
                        .tempfile()?;
                    let (_, p) = tf.keep()?;
                    p
                }
            };
            std::fs::write(&out_path, lines.join("\n"))?;

            Ok(serde_json::json!({
                "output_file": out_path.display().to_string(),
                "lines": line_count,
                "command": format!("trufflehog filesystem {} --no-verification", out_path.display()),
                "_meta": {
                    "hint": "pipe output_file to trufflehog filesystem for full credential scanning",
                    "related": ["apk_yara", "audit"],
                },
            }))
        })?;
        Ok(value.to_string())
    }

    pub async fn apk_scan_corpus(
        &self,
        Parameters(params): Parameters<ScanCorpusParams>,
    ) -> Result<String, McpError> {
        self.enforce_tool_class("apk_scan_corpus")?;
        // OQ-4: apply is_allowed_path to each caller-supplied path.
        // Without this gate a caller can direct the scanner at /etc/passwd or any
        // path the server process can read. ReadOnly classification is correct (no
        // writes), but the path-traversal risk still applies to read operations.
        // Branch on is_dir(): scan_corpus() recurses directories for .apk files,
        // so directory inputs must use LoadDirectory rather than LoadInput (which
        // enforces is_file() and would reject directories with invalid_params).
        let paths: Vec<std::path::PathBuf> = params
            .paths
            .iter()
            .map(|p| {
                if std::path::Path::new(p).is_dir() {
                    is_allowed_path(p, PathRole::LoadDirectory)
                } else {
                    is_allowed_path(p, PathRole::LoadInput)
                }
            })
            .collect::<Result<Vec<_>, _>>()?;
        let mut buf: Vec<u8> = Vec::new();
        crate::commands::scan_corpus(&paths, &params.min_severity, &mut buf)
            .map_err(|e| sanitize_to_mcp_error("scan_corpus", &e, ErrorCategory::InternalError))?;
        let records: Vec<serde_json::Value> = String::from_utf8_lossy(&buf)
            .lines()
            .filter_map(|line| serde_json::from_str(line).ok())
            .collect();
        let out = serde_json::json!({
            "records": records,
            "_meta": {
                "count": records.len(),
                "truncated": false,
                "hint": "findings are already filtered by `min_severity`; use apk_audit for a single-APK deep dive",
                "related": ["apk_audit", "corpus_ingest", "export"],
            },
        });
        Ok(out.to_string())
    }

}

#[tool_handler(router = self.tool_router)]
impl ServerHandler for DroidsawServer {
    fn get_info(&self) -> ServerInfo {
        ServerInfo::new(
            ServerCapabilities::builder()
                .enable_tools()
                .enable_prompts()
                .enable_resources()
                .build(),
        )
    }

    async fn list_prompts(
        &self,
        _request: Option<PaginatedRequestParams>,
        _context: rmcp::service::RequestContext<rmcp::RoleServer>,
    ) -> Result<ListPromptsResult, McpError> {
        Ok(ListPromptsResult {
            prompts: prompts::build_prompts(),
            ..Default::default()
        })
    }

    async fn get_prompt(
        &self,
        request: GetPromptRequestParams,
        _context: rmcp::service::RequestContext<rmcp::RoleServer>,
    ) -> Result<GetPromptResult, McpError> {
        let args = request.arguments.unwrap_or_default();
        prompts::render_prompt(&request.name, &args)
    }

    async fn list_resources(
        &self,
        _request: Option<PaginatedRequestParams>,
        _context: rmcp::service::RequestContext<rmcp::RoleServer>,
    ) -> Result<ListResourcesResult, McpError> {
        let resource_list = resources::resource_entries()
            .iter()
            .map(|(uri, name, desc, mime, contents)| {
                RawResource {
                    uri: (*uri).into(),
                    name: (*name).into(),
                    title: None,
                    description: Some((*desc).into()),
                    mime_type: Some((*mime).into()),
                    #[allow(
                        clippy::as_conversions,
                        clippy::cast_possible_truncation,
                        reason = "PROOF: contents.len() -> u32 narrow for the rmcp RawResource.size field. `contents` is a `&'static str` from `include_str!` macros in `mcp/resources.rs` — `skill.md` (~98 bytes) and `guide.md` (~191 bytes). Build-time bounded; both well under u32::MAX. Adding a resource >4 GiB would be a compile-time bug, not a runtime narrowing."
                    )]
                    size: Some(contents.len() as u32),
                    icons: None,
                    meta: None,
                }
                .no_annotation()
            })
            .collect();
        Ok(ListResourcesResult {
            resources: resource_list,
            ..Default::default()
        })
    }

    async fn read_resource(
        &self,
        request: ReadResourceRequestParams,
        _context: rmcp::service::RequestContext<rmcp::RoleServer>,
    ) -> Result<ReadResourceResult, McpError> {
        for (uri, _, _, mime, contents) in resources::resource_entries() {
            if *uri == request.uri {
                return Ok(ReadResourceResult::new(vec![
                    ResourceContents::text(*contents, *uri).with_mime_type(*mime),
                ]));
            }
        }
        Err(McpError::invalid_params(
            format!(
                "unknown resource URI: {} — call resources/list to see available URIs",
                request.uri
            ),
            None,
        ))
    }
}

#[cfg(test)]
mod tool_class_tests {
    use super::*;
    use std::str::FromStr;

    #[test]
    fn parses_kebab_case() {
        assert_eq!(
            McpToolClass::from_str("read-only").unwrap(),
            McpToolClass::ReadOnly
        );
        assert_eq!(
            McpToolClass::from_str("writes-tempfile").unwrap(),
            McpToolClass::WritesTempfile
        );
        assert_eq!(
            McpToolClass::from_str("writes-caller-path").unwrap(),
            McpToolClass::WritesCallerPath
        );
        assert_eq!(
            McpToolClass::from_str("spawns-subprocess").unwrap(),
            McpToolClass::SpawnsSubprocess
        );
        assert_eq!(
            McpToolClass::from_str("manages-state").unwrap(),
            McpToolClass::ManagesState
        );
    }

    #[test]
    fn rejects_unknown_kebab() {
        let err = McpToolClass::from_str("read_only").unwrap_err();
        assert!(err.contains("unknown tool class"));
        assert!(err.contains("read-only"));
    }

    #[test]
    fn trims_whitespace() {
        assert_eq!(
            McpToolClass::from_str("  read-only  ").unwrap(),
            McpToolClass::ReadOnly
        );
    }

    #[test]
    fn default_is_read_only_plus_tempfile() {
        let d = McpToolClass::default_allowed();
        assert!(d.contains(&McpToolClass::ReadOnly));
        assert!(d.contains(&McpToolClass::WritesTempfile));
        assert!(!d.contains(&McpToolClass::WritesCallerPath));
        assert!(!d.contains(&McpToolClass::SpawnsSubprocess));
        assert!(!d.contains(&McpToolClass::ManagesState));
    }

    #[test]
    fn known_tool_classifications() {
        // ReadOnly tools: pure-read analysis paths.
        for name in [
            "manifest", "signing", "info", "query", "investigate", "taint",
            "strings", "xrefs", "frida", "decompile", "diff",
        ] {
            assert_eq!(
                tool_class(name),
                McpToolClass::ReadOnly,
                "{name} should be ReadOnly"
            );
        }
        // WritesTempfile: load extracts to droidsaw tempdir.
        assert_eq!(tool_class("load"), McpToolClass::WritesTempfile);
        // SpawnsSubprocess: audit (most-restrictive across all modes, OQ-1).
        assert_eq!(tool_class("audit"), McpToolClass::SpawnsSubprocess);
        // ManagesState: triage UPDATEs DB rows.
        assert_eq!(tool_class("triage"), McpToolClass::ManagesState);
    }

    #[test]
    fn newly_classified_readonly_tools() {
        // All ReadOnly tools in this set must return ReadOnly.
        for name in [
            "apk_decompile",
            "hbc_info",
            "hbc_functions",
            "dex_classes",
            "dex_methods",
            "module_list",
            "native_modules",
            "disasm",
            "npm_packages",
            "call_graph",
            "apk_entries",
            "apk_elf",
            "apk_webview_assets",
            "apk_resources",
            "apk_sbom",
            "apk_scan_corpus",
        ] {
            assert_eq!(
                tool_class(name),
                McpToolClass::ReadOnly,
                "{name} should be ReadOnly"
            );
        }
    }

    #[test]
    fn newly_classified_writes_caller_path_tools() {
        for name in ["apk_export", "corpus_ingest", "apk_semgrep_extract", "apk_trufflehog"] {
            assert_eq!(
                tool_class(name),
                McpToolClass::WritesCallerPath,
                "{name} should be WritesCallerPath"
            );
        }
    }

    #[test]
    fn newly_classified_spawns_subprocess_tools() {
        // apk_yara: in-process libyara; blast-radius conservative (OQ-2 main override).
        assert_eq!(tool_class("apk_yara"), McpToolClass::SpawnsSubprocess);
        // audit: most-restrictive across all modes (OQ-1 main override).
        assert_eq!(tool_class("audit"), McpToolClass::SpawnsSubprocess);
    }

    #[test]
    fn newly_classified_manages_state_tools() {
        // triage (UPDATEs rows in the audit SQLite DB).
        assert_eq!(tool_class("triage"), McpToolClass::ManagesState);
    }

    #[test]
    fn unknown_tool_fails_closed() {
        // A tool name not in the classification table defaults to the
        // most-restricted class — the operator opts in to permit it.
        assert_eq!(
            tool_class("hypothetical_new_tool"),
            McpToolClass::ManagesState
        );
        assert_eq!(tool_class(""), McpToolClass::ManagesState);
    }

    #[test]
    fn enforce_default_policy_permits_read_only_tools() {
        let server = DroidsawServer::new();
        // Default policy permits ReadOnly + WritesTempfile.
        assert!(server.enforce_tool_class("info").is_ok());
        assert!(server.enforce_tool_class("load").is_ok());
        assert!(server.enforce_tool_class("query").is_ok());
    }

    #[test]
    fn enforce_default_policy_refuses_destructive_tools() {
        let server = DroidsawServer::new();
        // audit is SpawnsSubprocess (OQ-1: most-restrictive across modes) — default-DENIED.
        assert!(server.enforce_tool_class("audit").is_err());
        // triage is ManagesState — default-DENIED.
        assert!(server.enforce_tool_class("triage").is_err());
        // apk_yara is SpawnsSubprocess (OQ-2: conservative blast-radius) — default-DENIED.
        assert!(server.enforce_tool_class("apk_yara").is_err());
        // apk_export is WritesCallerPath — default-DENIED.
        assert!(server.enforce_tool_class("apk_export").is_err());
        // corpus_ingest is WritesCallerPath — default-DENIED.
        assert!(server.enforce_tool_class("corpus_ingest").is_err());
        // apk_trufflehog is WritesCallerPath (OQ-3: most-restrictive) — default-DENIED.
        assert!(server.enforce_tool_class("apk_trufflehog").is_err());
    }

    #[test]
    fn enforce_error_message_names_tool_and_class() {
        let server = DroidsawServer::new();
        let err = server.enforce_tool_class("triage").unwrap_err();
        let msg = format!("{err:?}");
        assert!(msg.contains("triage"));
        assert!(msg.contains("manages-state"));
        assert!(msg.contains("tool-class-not-allowed"));
    }

    #[test]
    fn enforce_with_expanded_policy_permits_destructive() {
        let mut allowed = McpToolClass::default_allowed();
        allowed.insert(McpToolClass::ManagesState);
        allowed.insert(McpToolClass::WritesCallerPath);
        let server = DroidsawServer::with_allowed_classes(allowed);
        // Now triage (ManagesState) and apk_export (WritesCallerPath) are permitted.
        assert!(server.enforce_tool_class("triage").is_ok());
        assert!(server.enforce_tool_class("apk_export").is_ok());
        // SpawnsSubprocess not added — audit and apk_yara are still refused.
        assert!(server.enforce_tool_class("audit").is_err());
        assert!(server.enforce_tool_class("apk_yara").is_err());
    }

    #[test]
    fn accept_reject_matrix_per_class() {
        // Verify each class has clear examples under read-only-only policy.

        // ReadOnly server: accepts read-only tools, refuses everything else.
        let read_only_server = DroidsawServer::with_allowed_classes(
            [McpToolClass::ReadOnly].into_iter().collect()
        );
        // Accept examples (ReadOnly class)
        assert!(read_only_server.enforce_tool_class("info").is_ok());
        assert!(read_only_server.enforce_tool_class("hbc_functions").is_ok());
        assert!(read_only_server.enforce_tool_class("dex_classes").is_ok());
        assert!(read_only_server.enforce_tool_class("apk_scan_corpus").is_ok());
        // Reject examples
        assert!(read_only_server.enforce_tool_class("load").is_err());          // WritesTempfile
        assert!(read_only_server.enforce_tool_class("audit").is_err());         // SpawnsSubprocess
        assert!(read_only_server.enforce_tool_class("apk_yara").is_err());      // SpawnsSubprocess
        assert!(read_only_server.enforce_tool_class("apk_export").is_err());    // WritesCallerPath
        assert!(read_only_server.enforce_tool_class("triage").is_err());        // ManagesState

        // SpawnsSubprocess server: can run audit and apk_yara.
        let spawn_server = DroidsawServer::with_allowed_classes(
            [McpToolClass::ReadOnly, McpToolClass::WritesTempfile, McpToolClass::SpawnsSubprocess]
                .into_iter().collect()
        );
        assert!(spawn_server.enforce_tool_class("audit").is_ok());
        assert!(spawn_server.enforce_tool_class("apk_yara").is_ok());
        assert!(spawn_server.enforce_tool_class("info").is_ok());
        assert!(spawn_server.enforce_tool_class("apk_export").is_err());   // WritesCallerPath still denied
        assert!(spawn_server.enforce_tool_class("triage").is_err());       // ManagesState still denied

        // WritesCallerPath server: can run apk_export, corpus_ingest, apk_trufflehog.
        let write_server = DroidsawServer::with_allowed_classes(
            [McpToolClass::ReadOnly, McpToolClass::WritesTempfile, McpToolClass::WritesCallerPath]
                .into_iter().collect()
        );
        assert!(write_server.enforce_tool_class("apk_export").is_ok());
        assert!(write_server.enforce_tool_class("corpus_ingest").is_ok());
        assert!(write_server.enforce_tool_class("apk_trufflehog").is_ok());
        assert!(write_server.enforce_tool_class("apk_semgrep_extract").is_ok());
        assert!(write_server.enforce_tool_class("audit").is_err());        // SpawnsSubprocess denied
        assert!(write_server.enforce_tool_class("triage").is_err());       // ManagesState denied

        // ManagesState server: can run triage.
        let state_server = DroidsawServer::with_allowed_classes(
            [McpToolClass::ReadOnly, McpToolClass::WritesTempfile, McpToolClass::ManagesState]
                .into_iter().collect()
        );
        assert!(state_server.enforce_tool_class("triage").is_ok());
        assert!(state_server.enforce_tool_class("audit").is_err());        // SpawnsSubprocess denied
    }

    #[test]
    fn enforce_with_all_classes_permits_everything() {
        let allowed: std::collections::BTreeSet<McpToolClass> =
            McpToolClass::all().into_iter().collect();
        let server = DroidsawServer::with_allowed_classes(allowed);
        for name in ["info", "load", "audit", "triage", "frida", "decompile"] {
            assert!(
                server.enforce_tool_class(name).is_ok(),
                "{name} should be allowed under `all` policy"
            );
        }
    }

    /// FIX-1 regression test: apk_scan_corpus path gate must accept directories
    /// (LoadDirectory role) and files (LoadInput role), and reject paths outside
    /// the allowlist regardless of kind.
    #[test]
    fn apk_scan_corpus_path_gate_accepts_directories_and_files() {
        // Directory paths must use LoadDirectory (is_dir() branch) — not LoadInput
        // which enforces is_file() and would reject directories with invalid_params.
        let tmpdir = std::env::temp_dir();
        let tmpdir_str = tmpdir.to_str().unwrap();
        // A real directory (e.g. $TMPDIR) must pass LoadDirectory gate.
        assert!(
            is_allowed_path(tmpdir_str, PathRole::LoadDirectory).is_ok(),
            "temp dir should pass LoadDirectory gate"
        );
        // A directory path must FAIL the LoadInput gate (confirming the without the guard bug).
        assert!(
            is_allowed_path(tmpdir_str, PathRole::LoadInput).is_err(),
            "directory path must fail LoadInput (is_file check)"
        );

        // A real file (this source file's compiled artifact doesn't exist yet;
        // use a known temp file instead).
        let tmp_file = tmpdir.join("droidsaw-path-gate-test.tmp");
        std::fs::write(&tmp_file, b"test").unwrap();
        let tmp_file_str = tmp_file.to_str().unwrap();
        assert!(
            is_allowed_path(tmp_file_str, PathRole::LoadInput).is_ok(),
            "file path should pass LoadInput gate"
        );
        assert!(
            is_allowed_path(tmp_file_str, PathRole::LoadDirectory).is_err(),
            "file path must fail LoadDirectory (is_dir check)"
        );
        let _ = std::fs::remove_file(&tmp_file);

        // Paths in restricted system trees must be rejected regardless of kind.
        // Use a path that canonicalization would reject (non-existent within /proc or
        // a forbidden prefix). We test with a clearly invalid path instead.
        assert!(
            is_allowed_path("", PathRole::LoadInput).is_err(),
            "empty path must be rejected"
        );
        assert!(
            is_allowed_path("/nonexistent/droidsaw/test/path", PathRole::LoadInput).is_err(),
            "non-existent path must fail canonicalization"
        );
    }
}

#[cfg(test)]
mod current_db_session_tests {
    //! Lock the session-state semantics of `current_db`:
    //!
    //! - No session DB + no override → typed `invalid_params` error
    //!   pointing the caller at `audit` / explicit `db_path`.
    //! - Session DB present + no override → returns the session path.
    //! - Override present → wins over the session slot (but must pass
    //!   the path allowlist; the session slot does not, having been
    //!   written server-side after audit's own validation).
    use super::*;

    fn fresh_server() -> DroidsawServer {
        DroidsawServer::with_concurrency(ConcurrencyConfig::new(1, 1, 2, 2, 8))
    }

    #[test]
    fn resolve_without_override_and_without_session_errors() {
        let server = fresh_server();
        let err = server.resolve_db_path(None).expect_err("should error");
        let msg = format!("{err:?}");
        assert!(
            msg.contains("no db_path provided"),
            "error must explain why: got {msg}",
        );
    }

    #[test]
    fn resolve_uses_session_slot_when_override_absent_and_path_lives() {
        let server = fresh_server();
        // Touch a real tempfile — the stat-on-read in resolve_db_path
        // rejects ghost paths.
        let tmpdir = std::env::temp_dir();
        let real_path = tmpdir.join("droidsaw-session-resolve-test.db");
        std::fs::write(&real_path, b"sqlite-placeholder").unwrap();
        {
            let mut g = server
                .current_db
                .lock()
                .unwrap_or_else(|e| e.into_inner());
            *g = Some(real_path.clone());
        }
        let resolved = server.resolve_db_path(None).expect("should resolve");
        assert_eq!(resolved, real_path);
        let _ = std::fs::remove_file(&real_path);
    }

    #[test]
    fn resolve_stale_slot_clears_and_errors() {
        // Brooker's call: if the session-cached path no longer exists
        // (deleted, moved, unmounted), drop the slot and surface a
        // typed error pointing the caller at `audit`. Avoids the
        // confusing "unable to open database file" symptom.
        let server = fresh_server();
        let synthetic = std::path::PathBuf::from("/tmp/droidsaw-stale-never-created.db");
        {
            let mut g = server
                .current_db
                .lock()
                .unwrap_or_else(|e| e.into_inner());
            *g = Some(synthetic);
        }
        let err = server.resolve_db_path(None).expect_err("stale slot must error");
        let msg = format!("{err:?}");
        assert!(
            msg.contains("is gone"),
            "error must mention staleness: got {msg}",
        );
        // Slot must have been cleared by the failed resolve so the
        // next call returns the no-session-yet error instead.
        let guard = server.current_db.lock().unwrap_or_else(|e| e.into_inner());
        assert!(guard.is_none(), "stale slot must be cleared on miss");
    }

    #[test]
    fn resolve_override_present_path_must_pass_allowlist() {
        // An override path that doesn't exist on disk is rejected by
        // is_allowed_path's canonicalization step, even when a valid
        // session slot is set. Override always flows through validation.
        let server = fresh_server();
        let synthetic = std::path::PathBuf::from("/tmp/droidsaw-session-test.db");
        {
            let mut g = server
                .current_db
                .lock()
                .unwrap_or_else(|e| e.into_inner());
            *g = Some(synthetic);
        }
        let res = server.resolve_db_path(Some("/nonexistent/path/audit.db"));
        assert!(res.is_err(), "override must flow through path allowlist");
    }
}

#[cfg(test)]
mod tool_tier_tests {
    //! Lock the McpToolTier visibility semantics:
    //!
    //! - Default (Full) exposes every registered tool.
    //! - `with_tool_tier(Basic)` hides every tool not in
    //!   `BASIC_TIER_TOOLS` from `tool_router.list_all()`.
    //! - A disabled tool returns "tool not found" on call() (the
    //!   same shape as an unknown tool, matching rmcp's contract).
    //! - The classifier is exhaustive: every name registered in the
    //!   router classifies to a known tier (no silent UNKNOWN).
    use super::*;
    use std::str::FromStr;

    #[test]
    fn parses_kebab_case() {
        assert_eq!(McpToolTier::from_str("basic").unwrap(), McpToolTier::Basic);
        assert_eq!(McpToolTier::from_str("full").unwrap(), McpToolTier::Full);
        assert_eq!(McpToolTier::from_str("  basic  ").unwrap(), McpToolTier::Basic);
    }

    #[test]
    fn rejects_unknown_tier() {
        let err = McpToolTier::from_str("medium").unwrap_err();
        assert!(err.contains("unknown tool tier"));
        assert!(err.contains("basic"));
        assert!(err.contains("full"));
    }

    #[test]
    fn basic_tier_set_has_exactly_twelve_tools() {
        // Documented size — if you change it, update the docstring on
        // `McpToolTier::Basic` and the `--tool-tier` CLI flag help.
        assert_eq!(BASIC_TIER_TOOLS.len(), 12);
    }

    #[test]
    fn tool_tier_classifies_basic_set_as_basic() {
        for name in BASIC_TIER_TOOLS {
            assert_eq!(
                tool_tier(name),
                McpToolTier::Basic,
                "expected {name} in BASIC_TIER_TOOLS to classify as Basic",
            );
        }
    }

    #[test]
    fn tool_tier_full_for_non_basic_examples() {
        // Spot-check a handful of tools that should NOT be Basic-tier.
        for name in ["hbc_info", "dex_classes", "disasm", "apk_yara", "frida", "diff"] {
            assert_eq!(
                tool_tier(name),
                McpToolTier::Full,
                "{name} should classify as Full",
            );
        }
    }

    #[test]
    fn default_server_exposes_full_surface() {
        let server = DroidsawServer::new();
        let visible: Vec<String> = server
            .tool_router
            .list_all()
            .into_iter()
            .map(|t| t.name.to_string())
            .collect();
        // Full-tier-only example: hbc_info must be visible by default.
        assert!(
            visible.iter().any(|n| n == "hbc_info"),
            "default server must expose hbc_info; got {visible:?}",
        );
        assert!(visible.iter().any(|n| n == "audit"));
        assert!(visible.iter().any(|n| n == "load"));
    }

    #[test]
    fn basic_tier_hides_full_tools_from_list_all() {
        let server = DroidsawServer::new().with_tool_tier(McpToolTier::Basic);
        let visible: Vec<String> = server
            .tool_router
            .list_all()
            .into_iter()
            .map(|t| t.name.to_string())
            .collect();

        // Every Basic-tier tool must still appear.
        for name in BASIC_TIER_TOOLS {
            assert!(
                visible.iter().any(|n| n == name),
                "{name} must remain visible under Basic; got {visible:?}",
            );
        }

        // Spot-check the most common Full-tier tools are hidden.
        for hidden in ["hbc_info", "dex_classes", "disasm", "apk_yara", "frida"] {
            assert!(
                !visible.iter().any(|n| n == hidden),
                "{hidden} must be hidden under Basic; got {visible:?}",
            );
        }
    }

    #[test]
    fn full_tier_is_no_op_passthrough() {
        let baseline = DroidsawServer::new();
        let baseline_visible: std::collections::BTreeSet<String> = baseline
            .tool_router
            .list_all()
            .into_iter()
            .map(|t| t.name.to_string())
            .collect();

        let tiered = DroidsawServer::new().with_tool_tier(McpToolTier::Full);
        let tiered_visible: std::collections::BTreeSet<String> = tiered
            .tool_router
            .list_all()
            .into_iter()
            .map(|t| t.name.to_string())
            .collect();

        assert_eq!(
            baseline_visible, tiered_visible,
            "with_tool_tier(Full) must be a no-op against the default surface",
        );
    }

    #[test]
    fn every_registered_tool_classifies_under_basic_or_full() {
        // If a new #[tool] handler is added but not classified, this
        // test fails-closed (the default tier is Full, which is
        // correct fail-closed behavior — new tools stay visible to
        // Full-tier operators). The assertion just ensures the
        // classifier handles every registered name without panicking.
        let server = DroidsawServer::new();
        for tool in server.tool_router.list_all() {
            let t = tool_tier(tool.name.as_ref());
            assert!(matches!(t, McpToolTier::Basic | McpToolTier::Full));
        }
    }
}