sifs 0.2.1 - Docs.rs

use anyhow::{Context, Result, bail};
use clap::{Args, CommandFactory, Parser, Subcommand, ValueEnum};
use serde_json::{Value, json};
use sifs::daemon::{
    DaemonClient, DaemonRequest, DaemonResult, DaemonRuntimeOptions, IndexRuntimeOptions,
    SearchOptionsWire, SourceSpec, default_daemon_paths, run_foreground,
};
use sifs::{
    CacheConfig, EncoderSpec, IndexOptions, IndexStats, ModelLoadPolicy, ModelOptions, SearchMode,
    SearchOptions, SearchResult, SifsIndex, cache_summary, format_results, is_git_url,
    load_model_with_options, model_status, platform_cache_root, resolve_chunk,
};
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command as ProcessCommand, Output, Stdio};
use std::thread;
use std::time::{Duration, Instant};

#[derive(Parser)]
#[command(
    name = "sifs",
    version,
    about = "SIFS Is Fast Search: instant local code search for agents."
)]
struct Cli {
    #[command(subcommand)]
    command: Option<Command>,
}

#[derive(Subcommand)]
enum Command {
    #[command(about = "Search a local directory or Git URL with natural-language or code queries.")]
    Search {
        #[arg(help = "Natural-language, code, symbol, or literal query to search for.")]
        query: String,
        #[arg(
            default_value = ".",
            help = "Local directory or Git URL to index and search."
        )]
        path: String,
        #[arg(
            short = 'k',
            long = "top-k",
            default_value_t = 5,
            help = "Maximum number of ranked chunks to print."
        )]
        top_k: usize,
        #[arg(short = 'm', long = "mode", value_enum, default_value_t = ModeArg::Hybrid, help = "Ranking mode: hybrid for most searches, bm25 for exact symbols, semantic for conceptual queries.")]
        mode: ModeArg,
        #[arg(
            long = "language",
            help = "Only search chunks with this language label. Repeatable."
        )]
        languages: Vec<String>,
        #[arg(
            long = "path",
            help = "Only search this repository-relative file path. Repeatable."
        )]
        filter_paths: Vec<String>,
        #[arg(
            long,
            default_value_t = 0,
            help = "Include surrounding lines when local source files are available."
        )]
        context_lines: usize,
        #[arg(
            long,
            help = "Include query, ranking, and filter metadata in the output."
        )]
        explain: bool,
        #[command(flatten)]
        output: OutputArgs,
        #[arg(long, help = "Model path or Hugging Face model id.")]
        model: Option<String>,
        #[arg(long, value_enum, default_value_t = EncoderArg::Model2Vec, help = "Encoder for semantic and hybrid search.")]
        encoder: EncoderArg,
        #[arg(long, help = "Disable model downloads and remote Git sources.")]
        offline: bool,
        #[arg(long = "no-download", help = "Disable model downloads.")]
        no_download: bool,
        #[arg(long, help = "Use a custom persistent index cache directory.")]
        cache_dir: Option<PathBuf>,
        #[arg(long, help = "Disable persistent index caches.")]
        no_cache: bool,
        #[arg(long, help = "Use a project-local .sifs cache.")]
        project_cache: bool,
    },
    #[command(about = "Find chunks related to a known file and one-based line number.")]
    FindRelated {
        #[arg(help = "Repository-relative file path, usually copied from a search result.")]
        file_path: String,
        #[arg(help = "One-based line number inside the source chunk.")]
        line: usize,
        #[arg(
            default_value = ".",
            help = "Local directory or Git URL to index and search."
        )]
        path: String,
        #[arg(
            short = 'k',
            long = "top-k",
            default_value_t = 5,
            help = "Maximum number of related chunks to print."
        )]
        top_k: usize,
        #[command(flatten)]
        output: OutputArgs,
        #[arg(long, help = "Model path or Hugging Face model id.")]
        model: Option<String>,
        #[arg(long, value_enum, default_value_t = EncoderArg::Model2Vec, help = "Encoder for related-code search.")]
        encoder: EncoderArg,
        #[arg(long, help = "Disable model downloads and remote Git sources.")]
        offline: bool,
        #[arg(long = "no-download", help = "Disable model downloads.")]
        no_download: bool,
        #[arg(long, help = "Use a custom persistent index cache directory.")]
        cache_dir: Option<PathBuf>,
        #[arg(long, help = "Disable persistent index caches.")]
        no_cache: bool,
        #[arg(long, help = "Use a project-local .sifs cache.")]
        project_cache: bool,
    },
    #[command(about = "Start, install, or inspect the SIFS stdio MCP server.")]
    Mcp {
        #[command(subcommand)]
        command: Option<McpCommand>,
        #[arg(help = "Local directory or git URL to pre-index.")]
        path: Option<String>,
        #[arg(long = "ref", help = "Branch or tag to check out for a Git URL.")]
        ref_name: Option<String>,
        #[arg(long, help = "Model path or Hugging Face model id.")]
        model: Option<String>,
        #[arg(long, help = "Disable model downloads and remote Git sources.")]
        offline: bool,
        #[arg(long = "no-download", help = "Disable model downloads.")]
        no_download: bool,
        #[arg(long, help = "Use a custom persistent index cache directory.")]
        cache_dir: Option<PathBuf>,
        #[arg(long, help = "Disable persistent index caches.")]
        no_cache: bool,
        #[arg(long, help = "Use a project-local .sifs cache.")]
        project_cache: bool,
    },
    #[command(about = "Run or inspect the shared SIFS daemon used by agent integrations.")]
    Daemon {
        #[command(subcommand)]
        command: DaemonCommand,
    },
    #[command(about = "List repository-relative file paths included in the index.")]
    Files {
        #[arg(
            default_value = ".",
            help = "Local directory or Git URL to index and inspect."
        )]
        path: String,
        #[arg(
            long,
            default_value_t = 200,
            help = "Maximum number of file paths to print."
        )]
        limit: usize,
        #[command(flatten)]
        output: OutputArgs,
        #[arg(long, help = "Model path or Hugging Face model id.")]
        model: Option<String>,
        #[arg(long, help = "Disable model downloads and remote Git sources.")]
        offline: bool,
        #[arg(long = "no-download", help = "Disable model downloads.")]
        no_download: bool,
    },
    #[command(about = "Print index status for a local directory or Git URL.")]
    Status {
        #[arg(
            default_value = ".",
            help = "Local directory or Git URL to index and inspect."
        )]
        path: String,
        #[arg(long, help = "Print structured JSON output.")]
        json: bool,
        #[arg(long, help = "Model path or Hugging Face model id.")]
        model: Option<String>,
        #[arg(long, help = "Disable model downloads and remote Git sources.")]
        offline: bool,
        #[arg(long = "no-download", help = "Disable model downloads.")]
        no_download: bool,
    },
    #[command(about = "Print the indexed chunk containing a file and one-based line number.")]
    Get {
        #[arg(help = "Repository-relative file path.")]
        file_path: String,
        #[arg(help = "One-based line number inside the source chunk.")]
        line: usize,
        #[arg(
            default_value = ".",
            help = "Local directory or Git URL to index and inspect."
        )]
        path: String,
        #[command(flatten)]
        output: OutputArgs,
        #[arg(long, help = "Model path or Hugging Face model id.")]
        model: Option<String>,
        #[arg(long, help = "Disable model downloads and remote Git sources.")]
        offline: bool,
        #[arg(long = "no-download", help = "Disable model downloads.")]
        no_download: bool,
    },
    #[command(about = "Remove SIFS cache artifacts from a local directory.")]
    Clean {
        #[arg(
            default_value = ".",
            help = "Local directory whose .sifs cache should be removed."
        )]
        path: String,
    },
    #[command(about = "Inspect or clean SIFS persistent index caches.")]
    Cache {
        #[command(subcommand)]
        command: CacheCommand,
    },
    #[command(about = "Check SIFS model, cache, and offline readiness.")]
    Doctor {
        #[arg(long, help = "Model path or Hugging Face model id.")]
        model: Option<String>,
        #[arg(long, value_enum, default_value_t = EncoderArg::Model2Vec)]
        encoder: EncoderArg,
        #[arg(
            default_value = ".",
            help = "Local directory to inspect for cache readiness."
        )]
        path: String,
        #[arg(long, help = "Disable model downloads and remote Git sources.")]
        offline: bool,
        #[arg(long = "no-download", help = "Disable model downloads.")]
        no_download: bool,
    },
    #[command(about = "Manage the SIFS embedding model cache.")]
    Model {
        #[command(subcommand)]
        command: ModelCommand,
    },
    #[command(about = "Create the Claude agent file at .claude/agents/sifs-search.md.")]
    Init {
        #[arg(long, help = "Overwrite an existing generated agent file.")]
        force: bool,
    },
    #[command(about = "Print SIFS agent, CLI, and MCP capabilities.")]
    Capabilities,
}

#[derive(Subcommand)]
enum McpCommand {
    #[command(about = "Configure SIFS as a local stdio MCP server for Codex and/or Claude Code.")]
    Install {
        #[arg(long, value_enum, default_value_t = McpClientArg::All)]
        client: McpClientArg,
        #[arg(long, value_enum)]
        scope: Option<McpScopeArg>,
        #[arg(
            long,
            help = "Local directory or Git URL to expose through the MCP server."
        )]
        source: Option<String>,
        #[arg(
            long = "ref",
            help = "Branch or tag to check out for a Git URL source."
        )]
        ref_name: Option<String>,
        #[arg(long, default_value = "sifs", help = "MCP server name to configure.")]
        name: String,
        #[arg(long, help = "Disable model downloads and remote Git sources.")]
        offline: bool,
        #[arg(long = "no-download", help = "Disable model downloads.")]
        no_download: bool,
        #[arg(long, help = "Use a custom persistent index cache directory.")]
        cache_dir: Option<PathBuf>,
        #[arg(long, help = "Disable persistent index caches.")]
        no_cache: bool,
        #[arg(long, help = "Use a project-local .sifs cache.")]
        project_cache: bool,
        #[arg(long, help = "Replace an existing same-name MCP server.")]
        force: bool,
        #[arg(
            long,
            help = "Print commands and config without changing client state."
        )]
        dry_run: bool,
    },
    #[command(about = "Inspect local MCP install readiness.")]
    Doctor {
        #[arg(
            default_value = ".",
            help = "Local directory or Git URL the MCP server would expose."
        )]
        source: String,
        #[arg(
            long = "ref",
            help = "Branch or tag to check out for a Git URL source."
        )]
        ref_name: Option<String>,
        #[arg(long, help = "Disable model downloads and remote Git sources.")]
        offline: bool,
        #[arg(long = "no-download", help = "Disable model downloads.")]
        no_download: bool,
        #[arg(long, help = "Use a custom persistent index cache directory.")]
        cache_dir: Option<PathBuf>,
        #[arg(long, help = "Disable persistent index caches.")]
        no_cache: bool,
        #[arg(long, help = "Use a project-local .sifs cache.")]
        project_cache: bool,
    },
}

#[derive(Subcommand)]
enum DaemonCommand {
    #[command(about = "Run the SIFS daemon in the foreground.")]
    Run {
        #[arg(
            long,
            help = "Remove an existing socket before binding. Useful after an unclean shutdown."
        )]
        replace_existing_socket: bool,
    },
    #[command(about = "Ping the running SIFS daemon.")]
    Ping,
    #[command(about = "Print daemon process and cached-index status.")]
    Status {
        #[arg(long, help = "Print structured JSON output.")]
        json: bool,
    },
    #[command(about = "Install a macOS LaunchAgent that keeps the SIFS daemon running.")]
    InstallAgent {
        #[arg(long, help = "Print the LaunchAgent plist without writing it.")]
        dry_run: bool,
        #[arg(long, help = "Replace an existing SIFS LaunchAgent plist.")]
        force: bool,
    },
    #[command(about = "Remove the macOS LaunchAgent for the SIFS daemon.")]
    UninstallAgent {
        #[arg(long, help = "Print what would be removed without changing files.")]
        dry_run: bool,
    },
}

#[derive(Subcommand)]
enum ModelCommand {
    #[command(about = "Download or validate the embedding model in the Hugging Face cache.")]
    Pull {
        #[arg(long)]
        model: Option<String>,
    },
    #[command(about = "Alias for `pull`: download or validate the embedding model.")]
    Fetch {
        #[arg(long)]
        model: Option<String>,
    },
    #[command(about = "Report whether the embedding model is available locally.")]
    Status {
        #[arg(long)]
        model: Option<String>,
        #[arg(long)]
        json: bool,
    },
}

#[derive(Subcommand)]
enum CacheCommand {
    #[command(about = "Report persistent index cache location and size.")]
    Status {
        #[arg(long)]
        cache_dir: Option<PathBuf>,
        #[arg(long)]
        json: bool,
    },
    #[command(about = "Delete the SIFS persistent index cache.")]
    Clean {
        #[arg(long)]
        cache_dir: Option<PathBuf>,
        #[arg(long)]
        dry_run: bool,
    },
}

#[derive(Clone, Copy, Debug, ValueEnum)]
enum ModeArg {
    Hybrid,
    Semantic,
    Bm25,
}

#[derive(Clone, Copy, Debug, ValueEnum)]
enum EncoderArg {
    #[value(name = "model2vec")]
    Model2Vec,
    Hashing,
}

#[derive(Clone, Copy, Debug, ValueEnum)]
enum McpClientArg {
    Codex,
    Claude,
    All,
}

#[derive(Clone, Copy, Debug, ValueEnum)]
enum McpScopeArg {
    Local,
    Project,
    User,
}

#[derive(Clone, Copy, Debug, ValueEnum)]
enum TextFormat {
    Human,
    Compact,
}

#[derive(Args)]
struct OutputArgs {
    #[arg(long, conflicts_with = "jsonl", help = "Print one pretty JSON object.")]
    json: bool,
    #[arg(long, help = "Print newline-delimited JSON records.")]
    jsonl: bool,
    #[arg(long, value_enum, default_value_t = TextFormat::Human, help = "Text output format.")]
    format: TextFormat,
}

impl From<ModeArg> for SearchMode {
    fn from(value: ModeArg) -> Self {
        match value {
            ModeArg::Hybrid => SearchMode::Hybrid,
            ModeArg::Semantic => SearchMode::Semantic,
            ModeArg::Bm25 => SearchMode::Bm25,
        }
    }
}

fn main() -> Result<()> {
    let cli = Cli::parse();
    match cli.command {
        Some(Command::Search {
            query,
            path,
            top_k,
            mode,
            languages,
            filter_paths,
            context_lines,
            explain,
            output,
            model,
            encoder,
            offline,
            no_download,
            cache_dir,
            no_cache,
            project_cache,
        }) => run_search(SearchCommand {
            query,
            path,
            top_k,
            mode: SearchMode::from(mode),
            languages,
            filter_paths,
            context_lines,
            explain,
            output,
            model,
            encoder,
            offline,
            no_download,
            cache: cache_config(cache_dir, no_cache, project_cache),
        })?,
        Some(Command::FindRelated {
            file_path,
            line,
            path,
            top_k,
            output,
            model,
            encoder,
            offline,
            no_download,
            cache_dir,
            no_cache,
            project_cache,
        }) => {
            if let Some(result) = try_daemon_find_related(
                &path,
                &file_path,
                line,
                top_k,
                encoder,
                model.as_deref(),
                offline,
                no_download,
                cache_config(cache_dir.clone(), no_cache, project_cache),
            )? {
                print_find_related_output(
                    &result.source.source,
                    &file_path,
                    line,
                    top_k,
                    result.stats,
                    u128::from(result.elapsed_ms),
                    &result.results,
                    &output,
                )?;
                return Ok(());
            }
            let policy = model_policy(offline, no_download);
            let started = Instant::now();
            let index = build_hybrid_index(
                &path,
                encoder_spec(encoder, model.as_deref(), policy),
                cache_config(cache_dir, no_cache, project_cache),
                offline,
            )?;
            let Some(chunk) = resolve_chunk(&index.chunks, &file_path, line) else {
                eprintln!("No chunk found at {file_path}:{line}.");
                std::process::exit(1);
            };
            let results = index.find_related(&chunk, top_k)?;
            let elapsed_ms = started.elapsed().as_millis();
            print_find_related_output(
                &path,
                &file_path,
                line,
                top_k,
                index.stats(),
                elapsed_ms,
                &results,
                &output,
            )?;
        }
        Some(Command::Model { command }) => run_model(command)?,
        Some(Command::Cache { command }) => run_cache(command)?,
        Some(Command::Doctor {
            model,
            encoder,
            path,
            offline,
            no_download,
        }) => run_doctor(
            &path,
            encoder,
            model.as_deref(),
            model_policy(offline, no_download),
            offline,
        )?,
        Some(Command::Mcp {
            command,
            path,
            ref_name,
            model,
            offline,
            no_download,
            cache_dir,
            no_cache,
            project_cache,
        }) => match command {
            Some(McpCommand::Install {
                client,
                scope,
                source,
                ref_name,
                name,
                offline,
                no_download,
                cache_dir,
                no_cache,
                project_cache,
                force,
                dry_run,
            }) => run_mcp_install(McpInstallOptions {
                client,
                scope,
                source,
                ref_name,
                name,
                offline,
                no_download,
                cache_dir,
                no_cache,
                project_cache,
                force,
                dry_run,
            })?,
            Some(McpCommand::Doctor {
                source,
                ref_name,
                offline,
                no_download,
                cache_dir,
                no_cache,
                project_cache,
            }) => run_mcp_doctor(McpDoctorOptions {
                source,
                ref_name,
                offline,
                no_download,
                cache_dir,
                no_cache,
                project_cache,
            })?,
            None => {
                let policy = model_policy(offline, no_download);
                if offline
                    && let Some(path) = &path
                    && is_git_url(path)
                {
                    bail!("--offline does not allow remote Git sources");
                }
                sifs::mcp::serve_with_options(
                    path,
                    ref_name,
                    ModelOptions::new(model.as_deref(), policy),
                    cache_config(cache_dir, no_cache, project_cache),
                    offline,
                )?;
            }
        },
        Some(Command::Files {
            path,
            limit,
            output,
            model,
            offline,
            no_download,
        }) => run_files(&path, limit, output, model, offline, no_download)?,
        Some(Command::Status {
            path,
            json,
            model,
            offline,
            no_download,
        }) => run_status(&path, json, model, offline, no_download)?,
        Some(Command::Get {
            file_path,
            line,
            path,
            output,
            model,
            offline,
            no_download,
        }) => run_get(&file_path, line, &path, output, model, offline, no_download)?,
        Some(Command::Clean { path }) => run_clean(&path)?,
        Some(Command::Init { force }) => run_init(force)?,
        Some(Command::Capabilities) => print_capabilities(),
        Some(Command::Daemon { command }) => run_daemon_command(command)?,
        None => {
            Cli::command().print_help()?;
            println!();
        }
    }
    Ok(())
}

struct SearchCommand {
    query: String,
    path: String,
    top_k: usize,
    mode: SearchMode,
    languages: Vec<String>,
    filter_paths: Vec<String>,
    context_lines: usize,
    explain: bool,
    output: OutputArgs,
    model: Option<String>,
    encoder: EncoderArg,
    offline: bool,
    no_download: bool,
    cache: CacheConfig,
}

fn run_search(command: SearchCommand) -> Result<()> {
    if let Some(result) = try_daemon_search(&command)? {
        print_search_output(
            &result.query,
            &result.source.source,
            &SearchOptions {
                top_k: command.top_k,
                mode: result.mode,
                alpha: None,
                filter_languages: command.languages.clone(),
                filter_paths: command.filter_paths.clone(),
                use_query_cache: true,
            },
            result.stats,
            u128::from(result.elapsed_ms),
            &daemon_warnings(&result.warnings),
            &result.results,
            command.context_lines,
            command.explain,
            &command.output,
        )?;
        return Ok(());
    }

    let policy = model_policy(command.offline, command.no_download);
    let started = Instant::now();
    let index = build_index_for_mode(
        &command.path,
        command.mode,
        encoder_spec(command.encoder, command.model.as_deref(), policy),
        command.cache,
        command.offline,
    )?;
    let mut options = SearchOptions::new(command.top_k).with_mode(command.mode);
    options.filter_languages = command.languages;
    options.filter_paths = command.filter_paths;
    let results = index.search_with(&command.query, &options)?;
    let elapsed_ms = started.elapsed().as_millis();
    let warnings = context_warnings(&command.path, command.context_lines);

    print_search_output(
        &command.query,
        &command.path,
        &options,
        index.stats(),
        elapsed_ms,
        &warnings,
        &results,
        command.context_lines,
        command.explain,
        &command.output,
    )
}

#[allow(clippy::too_many_arguments)]
fn print_search_output(
    query: &str,
    source: &str,
    options: &SearchOptions,
    stats: IndexStats,
    elapsed_ms: u128,
    warnings: &[String],
    results: &[SearchResult],
    context_lines: usize,
    explain: bool,
    output: &OutputArgs,
) -> Result<()> {
    let payload = search_payload(
        query,
        source,
        options,
        stats,
        elapsed_ms,
        warnings,
        results,
        context_lines,
    );
    if output.json {
        println!("{}", serde_json::to_string_pretty(&payload)?);
    } else if output.jsonl {
        for result in results {
            println!(
                "{}",
                serde_json::to_string(&search_result_record(
                    query,
                    source,
                    options,
                    elapsed_ms,
                    warnings,
                    result,
                    context_lines,
                ))?
            );
        }
    } else {
        if explain {
            print_explanation(query, source, options, elapsed_ms, warnings);
        }
        match output.format {
            TextFormat::Human => {
                if results.is_empty() {
                    println!("No results found.");
                } else {
                    println!(
                        "{}",
                        format_results(
                            &format!("Search results for: {:?} (mode={})", query, options.mode),
                            results,
                        )
                    );
                }
            }
            TextFormat::Compact => print_compact_results(results),
        }
    }
    Ok(())
}

fn try_daemon_search(
    command: &SearchCommand,
) -> Result<Option<sifs::daemon::protocol::SearchResultSet>> {
    let Some(client) = daemon_client_if_running()? else {
        return Ok(None);
    };
    let source = SourceSpec::resolve(&command.path, None, command.offline)?;
    let policy = model_policy(command.offline, command.no_download);
    let runtime_options = match command.mode {
        SearchMode::Bm25 => IndexRuntimeOptions::sparse(command.cache.clone()),
        SearchMode::Semantic | SearchMode::Hybrid => IndexRuntimeOptions::with_encoder(
            encoder_spec(command.encoder, command.model.as_deref(), policy),
            command.cache.clone(),
        ),
    };
    let mut search = SearchOptions::new(command.top_k).with_mode(command.mode);
    search.filter_languages = command.languages.clone();
    search.filter_paths = command.filter_paths.clone();
    match client.send(DaemonRequest::Search {
        source,
        options: runtime_options,
        query: command.query.clone(),
        search: SearchOptionsWire::from(search),
    }) {
        Ok(DaemonResult::Search(results)) => Ok(Some(results)),
        Ok(other) => bail!("unexpected daemon response: {other:?}"),
        Err(_) => Ok(None),
    }
}

#[allow(clippy::too_many_arguments)]
fn try_daemon_find_related(
    path: &str,
    file_path: &str,
    line: usize,
    top_k: usize,
    encoder: EncoderArg,
    model: Option<&str>,
    offline: bool,
    no_download: bool,
    cache: CacheConfig,
) -> Result<Option<sifs::daemon::protocol::SearchResultSet>> {
    let Some(client) = daemon_client_if_running()? else {
        return Ok(None);
    };
    let source = SourceSpec::resolve(path, None, offline)?;
    let policy = model_policy(offline, no_download);
    match client.send(DaemonRequest::FindRelated {
        source,
        options: IndexRuntimeOptions::with_encoder(encoder_spec(encoder, model, policy), cache),
        file_path: file_path.to_owned(),
        line,
        top_k,
    }) {
        Ok(DaemonResult::FindRelated(results)) => Ok(Some(results)),
        Ok(other) => bail!("unexpected daemon response: {other:?}"),
        Err(_) => Ok(None),
    }
}

#[allow(clippy::too_many_arguments)]
fn print_find_related_output(
    source: &str,
    file_path: &str,
    line: usize,
    top_k: usize,
    stats: IndexStats,
    elapsed_ms: u128,
    results: &[SearchResult],
    output: &OutputArgs,
) -> Result<()> {
    let payload = json!({
        "source": source,
        "file_path": file_path,
        "line": line,
        "top_k": top_k,
        "index_stats": stats,
        "elapsed_ms": elapsed_ms,
        "results": structured_results(source, results, 0),
    });
    if output.json {
        println!("{}", serde_json::to_string_pretty(&payload)?);
    } else if output.jsonl {
        for result in results {
            println!(
                "{}",
                serde_json::to_string(&json!({
                    "source": source,
                    "file_path": file_path,
                    "line": line,
                    "top_k": top_k,
                    "elapsed_ms": elapsed_ms,
                    "result": structured_result(source, result, 0),
                }))?
            );
        }
    } else {
        match output.format {
            TextFormat::Human => {
                if results.is_empty() {
                    println!("No related chunks found for {file_path}:{line}.");
                } else {
                    println!(
                        "{}",
                        format_results(&format!("Chunks related to {file_path}:{line}"), results)
                    );
                }
            }
            TextFormat::Compact => print_compact_results(results),
        }
    }
    Ok(())
}

fn daemon_client_if_running() -> Result<Option<DaemonClient>> {
    let paths = default_daemon_paths()?;
    if paths.socket.exists() {
        Ok(Some(DaemonClient::new(paths)))
    } else {
        Ok(None)
    }
}

fn daemon_warnings(warnings: &[sifs::IndexWarning]) -> Vec<String> {
    warnings
        .iter()
        .map(|warning| {
            if warning.path.is_empty() {
                warning.message.clone()
            } else {
                format!("{}: {}", warning.path, warning.message)
            }
        })
        .collect()
}

fn build_index_for_mode(
    path: &str,
    mode: SearchMode,
    encoder_spec: EncoderSpec,
    cache: CacheConfig,
    offline: bool,
) -> Result<SifsIndex> {
    match mode {
        SearchMode::Bm25 => build_sparse_index(path, cache, offline),
        SearchMode::Semantic | SearchMode::Hybrid => {
            build_hybrid_index(path, encoder_spec, cache, offline)
        }
    }
}

fn build_sparse_index(path: &str, cache: CacheConfig, offline: bool) -> Result<SifsIndex> {
    let options = IndexOptions::sparse().with_cache(cache);
    if is_git_url(path) {
        if offline {
            bail!("--offline does not allow remote Git sources");
        }
        SifsIndex::from_git_with_index_options(path, None, options)
    } else {
        SifsIndex::from_path_with_index_options(path, options)
    }
}

fn build_hybrid_index(
    path: &str,
    encoder_spec: EncoderSpec,
    cache: CacheConfig,
    offline: bool,
) -> Result<SifsIndex> {
    let options = IndexOptions::sparse()
        .with_encoder_spec(encoder_spec)
        .with_cache(cache);
    if is_git_url(path) {
        if offline {
            bail!("--offline does not allow remote Git sources");
        }
        SifsIndex::from_git_with_index_options(path, None, options)
    } else {
        SifsIndex::from_path_with_index_options(path, options)
    }
}

fn cache_config(cache_dir: Option<PathBuf>, no_cache: bool, project_cache: bool) -> CacheConfig {
    if no_cache {
        CacheConfig::Disabled
    } else if project_cache {
        CacheConfig::Project
    } else if let Some(path) = cache_dir {
        CacheConfig::Custom(path)
    } else {
        CacheConfig::Platform
    }
}

fn run_files(
    path: &str,
    limit: usize,
    output: OutputArgs,
    model: Option<String>,
    offline: bool,
    no_download: bool,
) -> Result<()> {
    if let Some(DaemonResult::ListFiles {
        source,
        total,
        files,
    }) = try_daemon_list_files(path, limit, model.as_deref(), offline, no_download)?
    {
        print_files_output(&source.source, total, limit, 0, files, output)?;
        return Ok(());
    }
    let policy = model_policy(offline, no_download);
    let started = Instant::now();
    let index = build_hybrid_index(
        path,
        EncoderSpec::model2vec(model.as_deref(), policy),
        CacheConfig::Platform,
        offline,
    )?;
    let elapsed_ms = started.elapsed().as_millis();
    let files = index.indexed_files();
    let shown: Vec<_> = files.iter().take(limit).cloned().collect();
    print_files_output(path, files.len(), limit, elapsed_ms, shown, output)
}

fn print_files_output(
    source: &str,
    total: usize,
    limit: usize,
    elapsed_ms: u128,
    shown: Vec<String>,
    output: OutputArgs,
) -> Result<()> {
    let payload = json!({
        "source": source,
        "total": total,
        "limit": limit,
        "elapsed_ms": elapsed_ms,
        "files": shown,
    });
    if output.json {
        println!("{}", serde_json::to_string_pretty(&payload)?);
    } else if output.jsonl {
        for file in &shown {
            println!(
                "{}",
                serde_json::to_string(&json!({
                    "source": source,
                    "total": total,
                    "limit": limit,
                    "file_path": file,
                }))?
            );
        }
    } else {
        match output.format {
            TextFormat::Human => {
                println!(
                    "Indexed files for {source:?} (showing {} of {}):",
                    shown.len(),
                    total
                );
                for file in shown {
                    println!("{file}");
                }
            }
            TextFormat::Compact => {
                for file in shown {
                    println!("{file}");
                }
            }
        }
    }
    Ok(())
}

fn try_daemon_list_files(
    path: &str,
    limit: usize,
    model: Option<&str>,
    offline: bool,
    no_download: bool,
) -> Result<Option<sifs::daemon::protocol::DaemonResult>> {
    let Some(client) = daemon_client_if_running()? else {
        return Ok(None);
    };
    let source = SourceSpec::resolve(path, None, offline)?;
    let policy = model_policy(offline, no_download);
    match client.send(DaemonRequest::ListFiles {
        source,
        options: IndexRuntimeOptions::with_encoder(
            EncoderSpec::model2vec(model, policy),
            CacheConfig::Platform,
        ),
        limit,
    }) {
        Ok(result @ DaemonResult::ListFiles { .. }) => Ok(Some(result)),
        Ok(other) => bail!("unexpected daemon response: {other:?}"),
        Err(_) => Ok(None),
    }
}

fn run_status(
    path: &str,
    json_output: bool,
    model: Option<String>,
    offline: bool,
    no_download: bool,
) -> Result<()> {
    if let Some(DaemonResult::IndexStatus(status)) =
        try_daemon_index_status(path, model.as_deref(), offline, no_download)?
    {
        print_status_output(
            &status.source.source,
            json_output,
            status.stats,
            status.semantic_loaded,
            status.semantic_loaded,
            0,
        )?;
        return Ok(());
    }
    let policy = model_policy(offline, no_download);
    let started = Instant::now();
    let index = build_hybrid_index(
        path,
        EncoderSpec::model2vec(model.as_deref(), policy),
        CacheConfig::Platform,
        offline,
    )?;
    let elapsed_ms = started.elapsed().as_millis();
    let stats = index.stats();
    print_status_output(
        path,
        json_output,
        stats,
        semantic_index_available(path),
        index.semantic_loaded(),
        elapsed_ms,
    )
}

fn print_status_output(
    source: &str,
    json_output: bool,
    stats: IndexStats,
    semantic_index_available: bool,
    semantic_index_loaded: bool,
    elapsed_ms: u128,
) -> Result<()> {
    let payload = json!({
        "source": source,
        "index_stats": stats,
        "elapsed_ms": elapsed_ms,
        "semantic_index_available": semantic_index_available,
        "semantic_index_loaded": semantic_index_loaded,
    });
    if json_output {
        println!("{}", serde_json::to_string_pretty(&payload)?);
    } else {
        println!(
            "Index status for {source:?}: {} files, {} chunks, languages: {}. Semantic index available: {}. Semantic index loaded: {}.",
            stats.indexed_files,
            stats.total_chunks,
            format_languages(&stats.languages),
            semantic_index_available,
            semantic_index_loaded
        );
    }
    Ok(())
}

fn try_daemon_index_status(
    path: &str,
    model: Option<&str>,
    offline: bool,
    no_download: bool,
) -> Result<Option<DaemonResult>> {
    let Some(client) = daemon_client_if_running()? else {
        return Ok(None);
    };
    let source = SourceSpec::resolve(path, None, offline)?;
    let policy = model_policy(offline, no_download);
    match client.send(DaemonRequest::IndexStatus {
        source,
        options: IndexRuntimeOptions::with_encoder(
            EncoderSpec::model2vec(model, policy),
            CacheConfig::Platform,
        ),
    }) {
        Ok(result @ DaemonResult::IndexStatus(_)) => Ok(Some(result)),
        Ok(other) => bail!("unexpected daemon response: {other:?}"),
        Err(_) => Ok(None),
    }
}

fn semantic_index_available(path: &str) -> bool {
    if is_git_url(path) {
        return false;
    }
    let cache_dir = Path::new(path).join(".sifs");
    let Ok(entries) = fs::read_dir(cache_dir) else {
        return false;
    };
    entries.filter_map(Result::ok).any(|entry| {
        entry
            .file_name()
            .to_string_lossy()
            .starts_with("semantic-v2-")
    })
}

fn run_get(
    file_path: &str,
    line: usize,
    path: &str,
    output: OutputArgs,
    model: Option<String>,
    offline: bool,
    no_download: bool,
) -> Result<()> {
    if let Some(DaemonResult::GetChunk { source, chunk }) = try_daemon_get_chunk(
        file_path,
        line,
        path,
        model.as_deref(),
        offline,
        no_download,
    )? {
        print_get_output(&source.source, &chunk, output)?;
        return Ok(());
    }
    let policy = model_policy(offline, no_download);
    let index = build_hybrid_index(
        path,
        EncoderSpec::model2vec(model.as_deref(), policy),
        CacheConfig::Platform,
        offline,
    )?;
    let Some(chunk) = resolve_chunk(&index.chunks, file_path, line) else {
        eprintln!("No chunk found at {file_path}:{line}. Use `sifs files` to check indexed paths.");
        std::process::exit(1);
    };
    print_get_output(path, &chunk, output)
}

fn print_get_output(source: &str, chunk: &sifs::Chunk, output: OutputArgs) -> Result<()> {
    let payload = json!({
        "source": source,
        "chunk": chunk,
    });
    if output.json {
        println!("{}", serde_json::to_string_pretty(&payload)?);
    } else if output.jsonl {
        println!("{}", serde_json::to_string(&payload)?);
    } else {
        match output.format {
            TextFormat::Human => {
                println!(
                    "{}\n\n```{}\n{}\n```",
                    chunk.location(),
                    chunk.language.clone().unwrap_or_default(),
                    chunk.content
                );
            }
            TextFormat::Compact => {
                println!(
                    "{}\t{}\t{}",
                    chunk.location(),
                    chunk.language.clone().unwrap_or_default(),
                    chunk.content.lines().next().unwrap_or_default().trim()
                );
            }
        }
    }
    Ok(())
}

fn try_daemon_get_chunk(
    file_path: &str,
    line: usize,
    path: &str,
    model: Option<&str>,
    offline: bool,
    no_download: bool,
) -> Result<Option<DaemonResult>> {
    let Some(client) = daemon_client_if_running()? else {
        return Ok(None);
    };
    let source = SourceSpec::resolve(path, None, offline)?;
    let policy = model_policy(offline, no_download);
    match client.send(DaemonRequest::GetChunk {
        source,
        options: IndexRuntimeOptions::with_encoder(
            EncoderSpec::model2vec(model, policy),
            CacheConfig::Platform,
        ),
        file_path: file_path.to_owned(),
        line,
    }) {
        Ok(result @ DaemonResult::GetChunk { .. }) => Ok(Some(result)),
        Ok(other) => bail!("unexpected daemon response: {other:?}"),
        Err(_) => Ok(None),
    }
}

fn run_clean(path: &str) -> Result<()> {
    if is_git_url(path) {
        bail!("clean only supports local directories");
    }
    let cache_dir = Path::new(path).join(".sifs");
    if cache_dir.exists() {
        fs::remove_dir_all(&cache_dir)
            .with_context(|| format!("remove SIFS cache {}", cache_dir.display()))?;
        println!("Removed {}", cache_dir.display());
    } else {
        println!("No SIFS cache found at {}", cache_dir.display());
    }
    Ok(())
}

#[allow(clippy::too_many_arguments)]
fn search_payload(
    query: &str,
    source: &str,
    options: &SearchOptions,
    stats: IndexStats,
    elapsed_ms: u128,
    warnings: &[String],
    results: &[SearchResult],
    context_lines: usize,
) -> Value {
    json!({
        "query": query,
        "mode": options.mode.to_string(),
        "source": source,
        "top_k": options.top_k,
        "filter_languages": options.filter_languages,
        "filter_paths": options.filter_paths,
        "index_stats": stats,
        "elapsed_ms": elapsed_ms,
        "warnings": warnings,
        "results": structured_results(source, results, context_lines),
    })
}

fn search_result_record(
    query: &str,
    source: &str,
    options: &SearchOptions,
    elapsed_ms: u128,
    warnings: &[String],
    result: &SearchResult,
    context_lines: usize,
) -> Value {
    json!({
        "query": query,
        "mode": options.mode.to_string(),
        "source": source,
        "top_k": options.top_k,
        "filter_languages": options.filter_languages,
        "filter_paths": options.filter_paths,
        "elapsed_ms": elapsed_ms,
        "warnings": warnings,
        "result": structured_result(source, result, context_lines),
    })
}

fn structured_results(source: &str, results: &[SearchResult], context_lines: usize) -> Value {
    json!(
        results
            .iter()
            .map(|result| structured_result(source, result, context_lines))
            .collect::<Vec<_>>()
    )
}

fn structured_result(source: &str, result: &SearchResult, context_lines: usize) -> Value {
    let mut value = json!({
        "file_path": result.chunk.file_path,
        "start_line": result.chunk.start_line,
        "end_line": result.chunk.end_line,
        "language": result.chunk.language,
        "score": result.score,
        "source": result.source.to_string(),
        "content": result.chunk.content,
    });
    if context_lines > 0
        && let Some(context) = expanded_context(
            source,
            &result.chunk.file_path,
            result.chunk.start_line,
            result.chunk.end_line,
            context_lines,
        )
    {
        value["context"] = context;
    }
    value
}

fn expanded_context(
    source: &str,
    file_path: &str,
    start_line: usize,
    end_line: usize,
    context_lines: usize,
) -> Option<Value> {
    if is_git_url(source) {
        return None;
    }
    let full_path = Path::new(source).join(file_path);
    let content = fs::read_to_string(full_path).ok()?;
    let lines: Vec<_> = content.lines().collect();
    if lines.is_empty() {
        return None;
    }
    let context_start = start_line.saturating_sub(context_lines).max(1);
    let context_end = (end_line + context_lines).min(lines.len());
    let text = lines
        .get(context_start - 1..context_end)?
        .to_vec()
        .join("\n");
    Some(json!({
        "start_line": context_start,
        "end_line": context_end,
        "content": text,
    }))
}

fn context_warnings(source: &str, context_lines: usize) -> Vec<String> {
    if context_lines > 0 && is_git_url(source) {
        vec![
            "context_lines is best-effort and is not expanded for Git URL sources in CLI mode"
                .to_owned(),
        ]
    } else {
        Vec::new()
    }
}

fn print_explanation(
    query: &str,
    source: &str,
    options: &SearchOptions,
    elapsed_ms: u128,
    warnings: &[String],
) {
    println!(
        "Query: {query:?}; source: {source}; mode: {}; top_k: {}; languages: [{}]; paths: [{}]; elapsed_ms: {elapsed_ms}",
        options.mode,
        options.top_k,
        options.filter_languages.join(", "),
        options.filter_paths.join(", ")
    );
    for warning in warnings {
        println!("Warning: {warning}");
    }
}

fn print_compact_results(results: &[SearchResult]) {
    for result in results {
        println!(
            "{}\t{:.4}\t{}\t{}",
            result.chunk.location(),
            result.score,
            result.source,
            result
                .chunk
                .content
                .lines()
                .next()
                .unwrap_or_default()
                .trim()
        );
    }
}

fn format_languages(languages: &std::collections::BTreeMap<String, usize>) -> String {
    if languages.is_empty() {
        return "none".to_owned();
    }
    languages
        .iter()
        .map(|(language, count)| format!("{language}={count}"))
        .collect::<Vec<_>>()
        .join(", ")
}

fn model_policy(offline: bool, no_download: bool) -> ModelLoadPolicy {
    if offline {
        ModelLoadPolicy::Offline
    } else if no_download {
        ModelLoadPolicy::NoDownload
    } else {
        ModelLoadPolicy::AllowDownload
    }
}

fn encoder_spec(encoder: EncoderArg, model: Option<&str>, policy: ModelLoadPolicy) -> EncoderSpec {
    match encoder {
        EncoderArg::Model2Vec => EncoderSpec::model2vec(model, policy),
        EncoderArg::Hashing => EncoderSpec::hashing(),
    }
}

fn run_daemon_command(command: DaemonCommand) -> Result<()> {
    match command {
        DaemonCommand::Run {
            replace_existing_socket,
        } => {
            let paths = default_daemon_paths()?;
            eprintln!("SIFS daemon listening on {}", paths.socket.display());
            run_foreground(DaemonRuntimeOptions {
                paths,
                replace_existing_socket,
            })
        }
        DaemonCommand::Ping => {
            let client = DaemonClient::new(default_daemon_paths()?);
            match client.send(DaemonRequest::Ping)? {
                DaemonResult::Pong { version } => {
                    println!("SIFS daemon is running: {version}");
                    Ok(())
                }
                other => bail!("unexpected daemon response: {other:?}"),
            }
        }
        DaemonCommand::Status { json } => {
            let client = DaemonClient::new(default_daemon_paths()?);
            match client.send(DaemonRequest::Status)? {
                DaemonResult::Status(status) => {
                    if json {
                        println!("{}", serde_json::to_string_pretty(&status)?);
                    } else {
                        println!(
                            "SIFS daemon {} (pid {}, protocol {})",
                            status.version, status.pid, status.protocol_version
                        );
                        if status.indexes.is_empty() {
                            println!("Cached indexes: none");
                        } else {
                            println!("Cached indexes:");
                            for index in status.indexes {
                                println!(
                                    "- {}: {} files, {} chunks, semantic_loaded={}",
                                    index.source.display(),
                                    index.stats.indexed_files,
                                    index.stats.total_chunks,
                                    index.semantic_loaded
                                );
                            }
                        }
                    }
                    Ok(())
                }
                other => bail!("unexpected daemon response: {other:?}"),
            }
        }
        DaemonCommand::InstallAgent { dry_run, force } => install_launch_agent(dry_run, force),
        DaemonCommand::UninstallAgent { dry_run } => uninstall_launch_agent(dry_run),
    }
}

fn install_launch_agent(dry_run: bool, force: bool) -> Result<()> {
    let exe = std::env::current_exe().context("resolve current sifs executable")?;
    warn_if_development_binary(&exe);
    if !dry_run && stable_binary_path(&exe) == "no" {
        bail!(
            "refusing to install LaunchAgent for development binary {}. Install with `cargo install --locked sifs` or Homebrew first.",
            exe.display()
        );
    }
    let plist_path = launch_agent_path()?;
    let plist = launch_agent_plist(&exe)?;
    if dry_run {
        println!("{}", plist);
        return Ok(());
    }
    if plist_path.exists() && !force {
        bail!(
            "SIFS LaunchAgent already exists at {}. Re-run with --force to replace it.",
            plist_path.display()
        );
    }
    let parent = plist_path
        .parent()
        .context("LaunchAgent path has no parent directory")?;
    fs::create_dir_all(parent)
        .with_context(|| format!("create LaunchAgents directory {}", parent.display()))?;
    fs::write(&plist_path, plist)
        .with_context(|| format!("write LaunchAgent {}", plist_path.display()))?;
    let _ = ProcessCommand::new("launchctl")
        .args([
            "bootout",
            &format!("gui/{}", current_uid_string()),
            plist_path.to_str().unwrap(),
        ])
        .output();
    let output = ProcessCommand::new("launchctl")
        .args([
            "bootstrap",
            &format!("gui/{}", current_uid_string()),
            plist_path.to_str().unwrap(),
        ])
        .output()
        .context("run launchctl bootstrap")?;
    if !output.status.success() {
        bail!(
            "launchctl bootstrap failed: {}",
            String::from_utf8_lossy(&output.stderr).trim()
        );
    }
    println!("Installed SIFS LaunchAgent at {}", plist_path.display());
    Ok(())
}

fn uninstall_launch_agent(dry_run: bool) -> Result<()> {
    let plist_path = launch_agent_path()?;
    if dry_run {
        println!("Would unload and remove {}", plist_path.display());
        return Ok(());
    }
    if plist_path.exists() {
        let _ = ProcessCommand::new("launchctl")
            .args([
                "bootout",
                &format!("gui/{}", current_uid_string()),
                plist_path.to_str().unwrap(),
            ])
            .output();
        fs::remove_file(&plist_path)
            .with_context(|| format!("remove LaunchAgent {}", plist_path.display()))?;
        println!("Removed SIFS LaunchAgent at {}", plist_path.display());
    } else {
        println!("No SIFS LaunchAgent found at {}", plist_path.display());
    }
    Ok(())
}

fn launch_agent_path() -> Result<PathBuf> {
    let home = std::env::var_os("HOME").context("HOME is not set")?;
    Ok(PathBuf::from(home).join("Library/LaunchAgents/dev.sifs.daemon.plist"))
}

fn launch_agent_plist(exe: &Path) -> Result<String> {
    Ok(format!(
        r#"<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
  <key>Label</key>
  <string>dev.sifs.daemon</string>
  <key>ProgramArguments</key>
  <array>
    <string>{}</string>
    <string>daemon</string>
    <string>run</string>
    <string>--replace-existing-socket</string>
  </array>
  <key>RunAtLoad</key>
  <true/>
  <key>KeepAlive</key>
  <true/>
  <key>StandardOutPath</key>
  <string>{}</string>
  <key>StandardErrorPath</key>
  <string>{}</string>
</dict>
</plist>
"#,
        xml_escape(&exe.to_string_lossy()),
        xml_escape(&default_daemon_paths()?.log_file.to_string_lossy()),
        xml_escape(&default_daemon_paths()?.log_file.to_string_lossy()),
    ))
}

fn xml_escape(value: &str) -> String {
    value
        .replace('&', "&amp;")
        .replace('<', "&lt;")
        .replace('>', "&gt;")
        .replace('"', "&quot;")
        .replace('\'', "&apos;")
}

fn current_uid_string() -> String {
    #[cfg(unix)]
    {
        unsafe { libc::geteuid().to_string() }
    }
    #[cfg(not(unix))]
    {
        "0".to_owned()
    }
}

fn run_model(command: ModelCommand) -> Result<()> {
    match command {
        ModelCommand::Pull { model } | ModelCommand::Fetch { model } => {
            let options = ModelOptions::new(model.as_deref(), ModelLoadPolicy::AllowDownload);
            load_model_with_options(&options)?;
            println!("Model is available: {}", options.model);
        }
        ModelCommand::Status { model, json } => {
            let status = model_status(model.as_deref());
            if json {
                println!(
                    "{}",
                    serde_json::to_string_pretty(&serde_json::json!({
                        "model": status.model,
                        "available": status.available(),
                        "tokenizer": status.tokenizer,
                        "safetensors": status.safetensors,
                        "config": status.config,
                    }))?
                );
            } else if status.available() {
                println!("Model is available locally: {}", status.model);
            } else {
                println!("Model is not available locally: {}", status.model);
            }
        }
    }
    Ok(())
}

fn run_doctor(
    path: &str,
    encoder: EncoderArg,
    model: Option<&str>,
    policy: ModelLoadPolicy,
    offline: bool,
) -> Result<()> {
    println!("SIFS doctor");
    println!("Path: {path}");
    if is_git_url(path) {
        println!("Source: remote Git URL");
        println!(
            "Remote Git under offline mode: {}",
            if offline { "blocked" } else { "allowed" }
        );
    } else {
        let path = PathBuf::from(path);
        println!("Source: local directory");
        println!("Path exists: {}", path.exists());
        println!("Path is directory: {}", path.is_dir());
        if path.is_dir() {
            let cache_dir = path.join(".sifs");
            let cache_writable = if cache_dir.exists() {
                fs::metadata(&cache_dir)
                    .map(|metadata| !metadata.permissions().readonly())
                    .unwrap_or(false)
            } else {
                fs::metadata(&path)
                    .map(|metadata| !metadata.permissions().readonly())
                    .unwrap_or(false)
            };
            println!("Cache directory: {}", cache_dir.display());
            println!("Cache writable: {cache_writable}");
        }
    }

    match encoder {
        EncoderArg::Hashing => {
            println!("Encoder: hashing");
            println!("Semantic readiness: ready without model files");
        }
        EncoderArg::Model2Vec => {
            let options = ModelOptions::new(model, policy);
            let status = model_status(Some(&options.model));
            println!("Encoder: model2vec");
            println!("Model: {}", status.model);
            println!("Tokenizer: {}", file_status(status.tokenizer.as_ref()));
            println!("Safetensors: {}", file_status(status.safetensors.as_ref()));
            println!("Config: {}", file_status(status.config.as_ref()));
            println!(
                "Semantic readiness: {}",
                if status.available() {
                    "ready from local files"
                } else if policy == ModelLoadPolicy::AllowDownload {
                    "will download on first semantic/hybrid search"
                } else {
                    "not ready in offline/no-download mode"
                }
            );
        }
    }
    Ok(())
}

fn file_status(path: Option<&PathBuf>) -> String {
    path.map(|path| format!("found at {}", path.display()))
        .unwrap_or_else(|| "missing".to_owned())
}

struct McpInstallOptions {
    client: McpClientArg,
    scope: Option<McpScopeArg>,
    source: Option<String>,
    ref_name: Option<String>,
    name: String,
    offline: bool,
    no_download: bool,
    cache_dir: Option<PathBuf>,
    no_cache: bool,
    project_cache: bool,
    force: bool,
    dry_run: bool,
}

struct McpDoctorOptions {
    source: String,
    ref_name: Option<String>,
    offline: bool,
    no_download: bool,
    cache_dir: Option<PathBuf>,
    no_cache: bool,
    project_cache: bool,
}

struct McpConfig {
    sifs_path: PathBuf,
    source: Option<String>,
    ref_name: Option<String>,
    offline: bool,
    no_download: bool,
    cache_dir: Option<PathBuf>,
    no_cache: bool,
    project_cache: bool,
}

fn run_mcp_install(options: McpInstallOptions) -> Result<()> {
    let source = options
        .source
        .as_deref()
        .map(|source| resolve_mcp_source(Some(source), options.offline))
        .transpose()?;
    let config = McpConfig {
        sifs_path: std::env::current_exe().context("resolve current sifs executable")?,
        source,
        ref_name: options.ref_name,
        offline: options.offline,
        no_download: options.no_download,
        cache_dir: options.cache_dir,
        no_cache: options.no_cache,
        project_cache: options.project_cache,
    };
    warn_if_development_binary(&config.sifs_path);
    if !options.dry_run && stable_binary_path(&config.sifs_path) == "no" {
        bail!(
            "refusing to write durable MCP config for development binary {}. Install with `cargo install --locked sifs` or Homebrew, then rerun this command.",
            config.sifs_path.display()
        );
    }

    match options.client {
        McpClientArg::Codex => {
            install_codex(&options.name, &config, options.force, options.dry_run)?
        }
        McpClientArg::Claude => install_claude(
            &options.name,
            options.scope,
            &config,
            options.force,
            options.dry_run,
        )?,
        McpClientArg::All => {
            install_codex(&options.name, &config, options.force, options.dry_run)?;
            install_claude(
                &options.name,
                options.scope,
                &config,
                options.force,
                options.dry_run,
            )?;
        }
    }
    Ok(())
}

fn run_mcp_doctor(options: McpDoctorOptions) -> Result<()> {
    let source = resolve_mcp_source(Some(&options.source), options.offline)?;
    let config = McpConfig {
        sifs_path: std::env::current_exe().context("resolve current sifs executable")?,
        source: Some(source),
        ref_name: options.ref_name,
        offline: options.offline,
        no_download: options.no_download,
        cache_dir: options.cache_dir,
        no_cache: options.no_cache,
        project_cache: options.project_cache,
    };
    println!("SIFS MCP doctor");
    println!("SIFS executable: {}", config.sifs_path.display());
    println!(
        "Stable install path: {}",
        stable_binary_path(&config.sifs_path)
    );
    println!("Codex CLI: {}", command_status("codex"));
    println!("Claude CLI: {}", command_status("claude"));
    println!(
        "MCP command: {}",
        display_command(&mcp_server_command(&config))
    );
    report_mcp_handshake(&config, HandshakeFraming::LineDelimited);
    report_mcp_handshake(&config, HandshakeFraming::ContentLength);

    if let Some(source) = &config.source
        && !is_git_url(source)
    {
        let smoke = ProcessCommand::new(&config.sifs_path)
            .args([
                "search",
                "sifs_mcp_smoke",
                source,
                "--mode",
                "bm25",
                "--offline",
                "--no-cache",
            ])
            .output();
        match smoke {
            Ok(output) if output.status.success() => println!("BM25 smoke: passed"),
            Ok(output) => println!(
                "BM25 smoke: failed ({})",
                String::from_utf8_lossy(&output.stderr).trim()
            ),
            Err(error) => println!("BM25 smoke: could not run ({error})"),
        }
    } else if config.source.as_deref().map(is_git_url).unwrap_or(false) {
        println!("BM25 smoke: skipped for Git URL source");
    } else {
        println!(
            "BM25 smoke: skipped because MCP install is daemon-first and not pinned to a source"
        );
    }
    Ok(())
}

#[derive(Clone, Copy)]
enum HandshakeFraming {
    LineDelimited,
    ContentLength,
}

impl HandshakeFraming {
    fn label(self) -> &'static str {
        match self {
            HandshakeFraming::LineDelimited => "newline",
            HandshakeFraming::ContentLength => "Content-Length",
        }
    }
}

fn report_mcp_handshake(config: &McpConfig, framing: HandshakeFraming) {
    match mcp_handshake_smoke(config, framing) {
        Ok(elapsed) => println!(
            "MCP handshake ({}): passed ({} ms)",
            framing.label(),
            elapsed.as_millis()
        ),
        Err(error) => println!("MCP handshake ({}): failed ({error})", framing.label()),
    }
}

fn mcp_handshake_smoke(config: &McpConfig, framing: HandshakeFraming) -> Result<Duration> {
    let input = mcp_initialize_probe(framing)?;
    let started = Instant::now();
    let output = run_mcp_probe(config, &input, Duration::from_secs(5))?;
    let elapsed = started.elapsed();
    if !output.status.success() {
        bail!(
            "server exited with status {}: {}",
            output.status,
            String::from_utf8_lossy(&output.stderr).trim()
        );
    }
    validate_mcp_initialize_response(framing, &output.stdout)?;
    Ok(elapsed)
}

fn mcp_initialize_probe(framing: HandshakeFraming) -> Result<Vec<u8>> {
    let body = serde_json::to_vec(&json!({
        "jsonrpc": "2.0",
        "id": 1,
        "method": "initialize",
        "params": {
            "protocolVersion": "2024-11-05",
            "capabilities": {},
            "clientInfo": {"name": "sifs-doctor", "version": env!("CARGO_PKG_VERSION")}
        }
    }))?;
    Ok(match framing {
        HandshakeFraming::LineDelimited => {
            let mut input = body;
            input.push(b'\n');
            input
        }
        HandshakeFraming::ContentLength => {
            let mut input = format!("Content-Length: {}\r\n\r\n", body.len()).into_bytes();
            input.extend(body);
            input
        }
    })
}

fn run_mcp_probe(config: &McpConfig, input: &[u8], timeout: Duration) -> Result<Output> {
    let mut child = ProcessCommand::new(&config.sifs_path)
        .args(mcp_args(config))
        .stdin(Stdio::piped())
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .spawn()
        .with_context(|| {
            format!(
                "launch MCP command {}",
                display_command(&mcp_server_command(config))
            )
        })?;

    child
        .stdin
        .as_mut()
        .context("open MCP probe stdin")?
        .write_all(input)
        .context("write MCP initialize probe")?;
    drop(child.stdin.take());

    let deadline = Instant::now() + timeout;
    loop {
        if child.try_wait()?.is_some() {
            return Ok(child.wait_with_output()?);
        }
        if Instant::now() >= deadline {
            let _ = child.kill();
            let output = child.wait_with_output()?;
            bail!(
                "timed out after {} ms; stderr: {}",
                timeout.as_millis(),
                String::from_utf8_lossy(&output.stderr).trim()
            );
        }
        thread::sleep(Duration::from_millis(20));
    }
}

fn validate_mcp_initialize_response(framing: HandshakeFraming, stdout: &[u8]) -> Result<()> {
    let value = match framing {
        HandshakeFraming::LineDelimited => {
            let line = std::str::from_utf8(stdout)
                .context("decode newline MCP response")?
                .lines()
                .next()
                .context("missing newline MCP response")?;
            serde_json::from_str::<Value>(line).context("parse newline MCP response")?
        }
        HandshakeFraming::ContentLength => parse_content_length_response(stdout)?,
    };
    if value.get("id").and_then(Value::as_i64) != Some(1) {
        bail!("initialize response id mismatch: {value}");
    }
    if value
        .pointer("/result/protocolVersion")
        .and_then(Value::as_str)
        .is_none()
    {
        bail!("initialize response missing protocolVersion: {value}");
    }
    Ok(())
}

fn parse_content_length_response(stdout: &[u8]) -> Result<Value> {
    let separator = b"\r\n\r\n";
    let header_end = stdout
        .windows(separator.len())
        .position(|window| window == separator)
        .context("missing Content-Length response separator")?;
    let header = std::str::from_utf8(&stdout[..header_end]).context("decode MCP headers")?;
    let length = header
        .strip_prefix("Content-Length: ")
        .context("missing Content-Length response header")?
        .parse::<usize>()
        .context("parse response Content-Length")?;
    let body_start = header_end + separator.len();
    let body_end = body_start + length;
    if stdout.len() < body_end {
        bail!("short Content-Length response body");
    }
    serde_json::from_slice(&stdout[body_start..body_end]).context("parse Content-Length body")
}

fn install_codex(name: &str, config: &McpConfig, force: bool, dry_run: bool) -> Result<()> {
    let add_args = codex_add_args(name, config);
    println!("Codex MCP:");
    println!(
        "  {}",
        display_command(&prepend_command("codex", &add_args))
    );
    if dry_run {
        println!(
            "\nCodex config.toml fallback:\n{}",
            codex_toml(name, config)
        );
        return Ok(());
    }
    if command_path("codex").is_none() {
        println!("codex was not found on PATH. Add this to ~/.codex/config.toml instead:");
        println!("{}", codex_toml(name, config));
        return Ok(());
    }
    if mcp_server_exists("codex", name, None)? {
        if !force {
            bail!("Codex MCP server {name:?} already exists. Re-run with --force to replace it.");
        }
        run_checked("codex", &["mcp", "remove", name])?;
    }
    run_checked_owned("codex", &add_args)?;
    println!("Configured Codex MCP server {name:?}.");
    Ok(())
}

fn install_claude(
    name: &str,
    scope: Option<McpScopeArg>,
    config: &McpConfig,
    force: bool,
    dry_run: bool,
) -> Result<()> {
    let scope = scope.unwrap_or(McpScopeArg::Local);
    let add_args = claude_add_args(name, scope, config)?;
    println!("Claude Code MCP:");
    println!(
        "  {}",
        display_command(&prepend_command("claude", &add_args))
    );
    if dry_run {
        println!(
            "\nClaude .mcp.json fallback:\n{}",
            claude_project_json(name, config)?
        );
        return Ok(());
    }
    if command_path("claude").is_none() {
        if matches!(scope, McpScopeArg::Project) {
            bail!(
                "claude was not found on PATH. Project-scoped .mcp.json should be written through Claude Code; install Claude or use --dry-run to print the fallback config."
            );
        }
        println!(
            "claude was not found on PATH. Add this project config only in trusted repositories:"
        );
        println!("{}", claude_project_json(name, config)?);
        return Ok(());
    }
    if mcp_server_exists("claude", name, Some(scope))? {
        if !force {
            bail!("Claude MCP server {name:?} already exists. Re-run with --force to replace it.");
        }
        let mut remove_args = vec!["mcp".to_owned(), "remove".to_owned(), name.to_owned()];
        remove_args.push("--scope".to_owned());
        remove_args.push(scope_arg(scope).to_owned());
        run_checked_owned("claude", &remove_args)?;
    }
    run_checked_owned("claude", &add_args)?;
    println!(
        "Configured Claude Code MCP server {name:?} at {} scope.",
        scope_arg(scope)
    );
    Ok(())
}

fn resolve_mcp_source(source: Option<&str>, offline: bool) -> Result<String> {
    let source = match source {
        Some(source) => source.to_owned(),
        None => std::env::current_dir()
            .context("resolve current directory")?
            .to_string_lossy()
            .into_owned(),
    };
    if is_git_url(&source) {
        if offline {
            bail!("--offline does not allow remote Git sources");
        }
        return Ok(source);
    }
    let path = PathBuf::from(source);
    if !path.exists() {
        bail!("local MCP source does not exist: {}", path.display());
    }
    if !path.is_dir() {
        bail!("local MCP source is not a directory: {}", path.display());
    }
    Ok(path
        .canonicalize()
        .with_context(|| format!("canonicalize MCP source {}", path.display()))?
        .to_string_lossy()
        .into_owned())
}

fn mcp_args(config: &McpConfig) -> Vec<String> {
    let mut args = vec!["mcp".to_owned()];
    if let Some(source) = &config.source {
        args.push(source.clone());
    }
    if let Some(ref_name) = &config.ref_name {
        args.push("--ref".to_owned());
        args.push(ref_name.clone());
    }
    if config.offline {
        args.push("--offline".to_owned());
    }
    if config.no_download {
        args.push("--no-download".to_owned());
    }
    if let Some(cache_dir) = &config.cache_dir {
        args.push("--cache-dir".to_owned());
        args.push(cache_dir.to_string_lossy().into_owned());
    }
    if config.no_cache {
        args.push("--no-cache".to_owned());
    }
    if config.project_cache {
        args.push("--project-cache".to_owned());
    }
    args
}

fn mcp_server_command(config: &McpConfig) -> Vec<String> {
    let mut command = vec![config.sifs_path.to_string_lossy().into_owned()];
    command.extend(mcp_args(config));
    command
}

fn codex_add_args(name: &str, config: &McpConfig) -> Vec<String> {
    let mut args = vec![
        "mcp".to_owned(),
        "add".to_owned(),
        name.to_owned(),
        "--".to_owned(),
    ];
    args.extend(mcp_server_command(config));
    args
}

fn claude_add_args(name: &str, scope: McpScopeArg, config: &McpConfig) -> Result<Vec<String>> {
    Ok(vec![
        "mcp".to_owned(),
        "add-json".to_owned(),
        name.to_owned(),
        claude_server_json(config)?,
        "--scope".to_owned(),
        scope_arg(scope).to_owned(),
    ])
}

fn codex_toml(name: &str, config: &McpConfig) -> String {
    let args = mcp_args(config)
        .iter()
        .map(|arg| serde_json::to_string(arg).unwrap())
        .collect::<Vec<_>>()
        .join(", ");
    format!(
        "[mcp_servers.{name}]\ncommand = {}\nargs = [{args}]\nstartup_timeout_sec = 20\ntool_timeout_sec = 60\n",
        toml_string(&config.sifs_path.to_string_lossy())
    )
}

fn claude_server_json(config: &McpConfig) -> Result<String> {
    Ok(serde_json::to_string(&json!({
        "type": "stdio",
        "command": config.sifs_path.to_string_lossy(),
        "args": mcp_args(config),
        "env": {},
    }))?)
}

fn claude_project_json(name: &str, config: &McpConfig) -> Result<String> {
    Ok(serde_json::to_string_pretty(&json!({
        "mcpServers": {
            name: {
                "type": "stdio",
                "command": config.sifs_path.to_string_lossy(),
                "args": mcp_args(config),
                "env": {},
            }
        }
    }))?)
}

fn toml_string(value: &str) -> String {
    serde_json::to_string(value).unwrap()
}

fn scope_arg(scope: McpScopeArg) -> &'static str {
    match scope {
        McpScopeArg::Local => "local",
        McpScopeArg::Project => "project",
        McpScopeArg::User => "user",
    }
}

fn mcp_server_exists(command: &str, name: &str, _scope: Option<McpScopeArg>) -> Result<bool> {
    let args = vec!["mcp".to_owned(), "get".to_owned(), name.to_owned()];
    let output = ProcessCommand::new(command)
        .args(&args)
        .output()
        .with_context(|| format!("run {command} {}", args.join(" ")))?;
    Ok(output.status.success())
}

fn run_checked(command: &str, args: &[&str]) -> Result<()> {
    let output = ProcessCommand::new(command)
        .args(args)
        .output()
        .with_context(|| format!("run {command} {}", args.join(" ")))?;
    if !output.status.success() {
        bail!(
            "{} failed: {}",
            display_command(&prepend_command(
                command,
                &args.iter().map(|arg| arg.to_string()).collect::<Vec<_>>()
            )),
            String::from_utf8_lossy(&output.stderr).trim()
        );
    }
    Ok(())
}

fn run_checked_owned(command: &str, args: &[String]) -> Result<()> {
    let output = ProcessCommand::new(command)
        .args(args)
        .output()
        .with_context(|| format!("run {command} {}", args.join(" ")))?;
    if !output.status.success() {
        bail!(
            "{} failed: {}",
            display_command(&prepend_command(command, args)),
            String::from_utf8_lossy(&output.stderr).trim()
        );
    }
    Ok(())
}

fn prepend_command(command: &str, args: &[String]) -> Vec<String> {
    let mut full = vec![command.to_owned()];
    full.extend(args.iter().cloned());
    full
}

fn display_command(parts: &[String]) -> String {
    parts
        .iter()
        .map(|part| shell_quote(part))
        .collect::<Vec<_>>()
        .join(" ")
}

fn shell_quote(value: &str) -> String {
    if value
        .chars()
        .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '/' | '.' | '_' | '-' | ':' | '='))
    {
        value.to_owned()
    } else {
        format!("'{}'", value.replace('\'', "'\\''"))
    }
}

fn command_status(command: &str) -> String {
    command_path(command)
        .map(|path| format!("found at {}", path.display()))
        .unwrap_or_else(|| "not found on PATH".to_owned())
}

fn command_path(command: &str) -> Option<PathBuf> {
    let path_var = std::env::var_os("PATH")?;
    std::env::split_paths(&path_var)
        .map(|dir| dir.join(command))
        .find(|candidate| candidate.is_file())
}

fn stable_binary_path(path: &Path) -> &'static str {
    if path.components().any(|component| {
        matches!(
            component.as_os_str().to_str(),
            Some("target" | "debug" | "release")
        )
    }) {
        "no"
    } else {
        "yes"
    }
}

fn warn_if_development_binary(path: &Path) {
    if stable_binary_path(path) == "no" {
        eprintln!(
            "Warning: sifs is running from {}. Install with `cargo install --locked sifs` or Homebrew before creating durable MCP config.",
            path.display()
        );
    }
}

fn run_cache(command: CacheCommand) -> Result<()> {
    match command {
        CacheCommand::Status { cache_dir, json } => {
            let root = cache_dir.unwrap_or(platform_cache_root()?);
            let summary = cache_summary(&root);
            if json {
                println!(
                    "{}",
                    serde_json::to_string_pretty(&serde_json::json!({
                        "root": summary.root,
                        "exists": summary.exists,
                        "entries": summary.entries,
                        "files": summary.files,
                        "bytes": summary.bytes,
                    }))?
                );
            } else {
                println!("Cache root: {}", summary.root.display());
                println!("Exists: {}", summary.exists);
                println!("Entries: {}", summary.entries);
                println!("Files: {}", summary.files);
                println!("Bytes: {}", summary.bytes);
            }
        }
        CacheCommand::Clean { cache_dir, dry_run } => {
            let root = cache_dir.unwrap_or(platform_cache_root()?);
            let summary = cache_summary(&root);
            if dry_run {
                println!(
                    "Would remove {} files ({} bytes) from {}.",
                    summary.files,
                    summary.bytes,
                    summary.root.display()
                );
            } else {
                remove_cache_root(&root)?;
                println!("Removed cache: {}", root.display());
            }
        }
    }
    Ok(())
}

fn remove_cache_root(root: &Path) -> Result<()> {
    if root.exists() {
        fs::remove_dir_all(root)?;
    }
    Ok(())
}

fn run_init(force: bool) -> Result<()> {
    let dest = PathBuf::from(".claude")
        .join("agents")
        .join("sifs-search.md");
    if dest.exists() && !force {
        eprintln!(
            "{} already exists. Run with --force to overwrite.",
            dest.display()
        );
        std::process::exit(1);
    }
    fs::create_dir_all(dest.parent().unwrap())?;
    fs::write(&dest, include_str!("agents/sifs-search.md"))?;
    println!("Created {}", dest.display());
    Ok(())
}

fn print_capabilities() {
    println!(
        "{}",
        [
            "SIFS capabilities:",
            "- Search local directories and Git URLs with hybrid, semantic, or BM25 ranking.",
            "- Print structured CLI output with `--json`, `--jsonl`, or `--format compact`.",
            "- Inspect indexes with `sifs files`, `sifs status`, `sifs get`, and `sifs clean`.",
            "- Find related code from a known file and one-based line.",
            "- Run `sifs mcp` as an MCP server with search, find_related, index_status, refresh_index, clear_index, list_indexed_files, get_chunk, and init_agent tools.",
            "- Run BM25 search without loading or downloading an embedding model.",
            "- Manage embedding models with `sifs model pull` and `sifs model status`.",
            "- Generate a Claude agent file with `sifs init`.",
            "- Use `sifs-benchmark` for quality and latency benchmarks.",
            "- Use `sifs-embed` for embedding diagnostics.",
            "",
            "Discovery:",
            "- `sifs --help` and subcommand help show CLI usage.",
            "- MCP clients can call `tools/list` and read `sifs://server/context`.",
        ]
        .join("\n")
    );
}