trusty-search 0.3.14

Machine-wide hybrid code search service: BM25 + vector + KG, zero cold-start, MCP server
Documentation
//! Handler for `trusty-search start` — boots the HTTP daemon.

use anyhow::Result;
use colored::Colorize;

/// Build a shared `FastEmbedder` for every index registered during the
/// daemon's lifetime.
///
/// Why (Bug A fix): without this, `create_index_handler` constructs a BM25-only
/// `CodeIndexer` and the HNSW lane silently contributes nothing — the symptom
/// seen in the 115k-chunk benchmark where every result returned
/// `match_reason: "bm25"`.
///
/// Why (blocking init): previously a failure here returned `None` and the
/// daemon continued in BM25-only mode without any visible signal — operators
/// only noticed when every search returned `match_reason: "bm25"` and an
/// entire 17k-file repo "indexed" in 12 seconds (no ONNX work happened).
/// Now: success is logged at INFO with the embedding dimension so operators
/// can confirm the model loaded; failure is logged at ERROR and propagated
/// so the daemon exits non-zero rather than silently degrading.
/// Test: run `trusty-search start` with `RUST_LOG=info` — the log MUST
/// contain `embedder initialized: dim=384` before any HTTP request is
/// accepted. Force a failure (e.g. delete the model cache while offline)
/// and the daemon must exit non-zero, not start in BM25 mode.
async fn build_embedder() -> Result<std::sync::Arc<dyn crate::core::Embedder>> {
    let embedder = crate::core::FastEmbedder::new().await.map_err(|e| {
        tracing::error!("FastEmbedder init failed: {e:#}");
        anyhow::anyhow!("FastEmbedder init failed: {e}")
    })?;
    let dim = <crate::core::FastEmbedder as crate::core::Embedder>::dimension(&embedder);
    let provider = embedder.provider();
    let metal_hint = match provider {
        trusty_embedder::ExecutionProvider::CoreML => " (Metal GPU / ANE)",
        trusty_embedder::ExecutionProvider::Cuda => " (CUDA GPU)",
        trusty_embedder::ExecutionProvider::Cpu => "",
    };
    tracing::info!(
        "embedder initialized: model=AllMiniLML6V2(Q) dim={dim} provider={provider}{metal_hint}"
    );
    Ok(std::sync::Arc::new(embedder))
}

/// Why: extracted from `main()`. The boot sequence is intricate (lockfile probe,
/// embedder, app state) and benefits from being its own unit. Facts storage
/// moved to trusty-analyzer (issue #40).
/// What: probes the lockfile fast-path, then constructs `SearchAppState` and
/// hands off to `run_daemon`. Maps `DaemonError::AlreadyRunning` to a friendly
/// exit-1 message.
/// Test: run twice in a row — the second invocation must exit 1 with the
/// "another daemon is already running" message.
pub async fn handle_start(port: u16, foreground: bool) -> Result<()> {
    // `foreground` is currently a no-op: `run_daemon` already runs inline
    // and never forks. The flag is accepted so launchd/systemd plists can
    // declare the supervised contract explicitly in ProgramArguments
    // (see ~/Library/LaunchAgents/com.bobmatnyc.trusty-search.plist).
    // If a background-fork path is ever added, gate it on `!foreground`.
    let _ = foreground;
    // Fast-path: bail before loading the 86 MB embedding model when
    // another daemon is already running.  The lock check is ~1 ms;
    // FastEmbedder::new() can take several seconds on first run.
    //
    // Bug fix (launchd crash-loop): if the lockfile exists and the recorded
    // PID is *alive*, exit 0 — launchd treats any non-zero exit as a crash
    // and re-spawns after ThrottleInterval, producing an infinite loop when
    // the daemon is already running. If the PID is dead (stale lock), fall
    // through to `run_daemon`, whose `acquire_lock` removes the stale file
    // and retries on our behalf.
    if let Some(pid) = crate::service::running_daemon_pid() {
        tracing::info!("daemon already running (pid {pid}), exiting cleanly");
        eprintln!(
            "{} trusty-search daemon already running (pid {pid}); nothing to do",
            "".green()
        );
        return Ok(());
    }
    // Rare race: the lock is held but the PID-aliveness check returned None
    // (lockfile may contain garbage or be mid-write by a sibling launch).
    // Fall through to `run_daemon` — its `acquire_lock` will either succeed
    // (lock now free) or return AlreadyRunning, handled below.

    // Why (v0.3.12 fix — deferred embedder init): previously `build_embedder()`
    // was awaited before the HTTP listener bound, so the daemon's port stayed
    // closed for 15–30 s on first run while ONNX/CoreML loaded the model.
    // That blew past the 10 s readiness budget in `daemon_guard.rs` and made
    // `trusty-search index` think the daemon had failed to start. Now: we
    // construct the `SearchAppState` immediately, kick off model loading on
    // a background task, and let `run_daemon` bind the HTTP port right away.
    // Handlers that need the embedder return `503 Service Unavailable` until
    // `state.install_embedder()` flips the watch channel.
    let cfg = crate::service::load_user_config();

    let state = crate::service::SearchAppState::new(crate::core::registry::IndexRegistry::new())
        .with_local_model(cfg.local_model)
        .with_openrouter_model(cfg.openrouter_model)
        .with_openrouter_api_key(cfg.openrouter_api_key);

    // Spawn embedder load on a background task; the daemon's HTTP server
    // starts serving requests in parallel. On success, `install_embedder`
    // populates the slot and flips the readiness watch so the next inbound
    // request transitions out of the "initializing" branch. On failure, we
    // log loudly but leave the daemon running in BM25-only mode — operators
    // can `/health`-check `embedder: "unavailable"` and intervene. We can't
    // exit the process here without racing the HTTP server's shutdown path.
    let install_state = state.clone();
    tokio::spawn(async move {
        match build_embedder().await {
            Ok(embedder) => {
                install_state.install_embedder(embedder).await;
                tracing::info!("embedder ready — vector lane online");
            }
            Err(e) => {
                tracing::error!(
                    "embedder failed to initialize: {e:#} — daemon will continue in BM25-only mode"
                );
                eprintln!(
                    "{} embedder failed to initialize: {e}\n\
                     Daemon is up but running BM25-only. Check the model cache at \
                     ~/Library/Caches/trusty-search/models/ and network access.",
                    "".red()
                );
            }
        }
    });
    match crate::service::run_daemon(state, port).await {
        Ok(()) => {}
        Err(crate::service::DaemonError::AlreadyRunning(p)) => {
            // `acquire_lock` returns AlreadyRunning only after confirming the
            // recorded PID is alive (it removes stale lockfiles automatically).
            // Exit 0 so launchd does not treat this as a crash and re-spawn.
            tracing::info!(
                "daemon already running (lock at {}), exiting cleanly",
                p.display()
            );
            eprintln!(
                "{} trusty-search daemon already running (lock at {}); nothing to do",
                "".green(),
                p.display()
            );
            return Ok(());
        }
        Err(e) => {
            eprintln!("{} daemon failed: {e}", "".red());
            std::process::exit(1);
        }
    }
    Ok(())
}