difflore-cli 0.1.0

Your AI coding agent, taught by your team's PR reviews — a local-first, open-source MCP server that turns past review comments into rules your agent follows automatically.
Documentation
//! Doctor data-probing for the default table view.
//!
//! Every call into `difflore_core::*` / `crate::mcp_install` / the
//! `CommandContext` that the default `difflore doctor` table needs is
//! collected here, decoded into the plain [`Findings`] struct, and
//! handed to the renderer in `table.rs`. The split keeps the renderer
//! free of any live data source so it can be exercised against a
//! hand-built `Findings` value without mocking core.
//!
//! Nothing in this module renders: it returns decoded scalars, options
//! and small plain enums only. The severity / status / hint strings —
//! i.e. how those findings are presented — live entirely in `table.rs`.

use std::collections::HashMap;
use std::path::Path;

use super::memory_snapshot::{self, MemorySnapshot};
use crate::mcp_install;

/// Everything the readiness table needs, already fetched and decoded.
/// Construct one with [`gather`]; the renderer (`table.rs`) turns it
/// into rows without touching any data source.
pub(crate) struct Findings {
    pub(crate) binary_version: String,
    pub(crate) project_db: ProjectDbProbe,
    /// Pre-loaded "what the AI has learned" snapshot. Defaulted (and so
    /// rendered as the empty string) whenever the current repo has no
    /// ready memory, so no load is issued in that case.
    pub(crate) memory_snapshot: MemorySnapshot,
    pub(crate) mcp: mcp_install::McpStatusSnapshot,
    pub(crate) provider: ProviderProbe,
    pub(crate) cloud: CloudProbe,
    pub(crate) embedder: EmbedderProbe,
    pub(crate) git_hooks: GitHookState,
    pub(crate) daemon: DaemonProbe,
}

/// Decoded project-DB counts. `db_available == false` means the pool
/// failed to open and every numeric field is meaningless.
pub(crate) struct ProjectDbProbe {
    pub(crate) db_available: bool,
    pub(crate) total_rules: i64,
    pub(crate) prs_imported: i64,
    pub(crate) repo_full_name: Option<String>,
    pub(crate) review_source_repo_full_name: Option<String>,
    pub(crate) scoped_active_rules: i64,
    pub(crate) review_source_active_rules: i64,
}

/// Decoded provider-list outcome.
pub(crate) enum ProviderProbe {
    DbUnavailable,
    /// Provider config was unreadable; carries the error text.
    Error(String),
    /// No providers configured at all.
    NoneConfigured,
    /// Providers exist; the resolved active provider's name.
    Active(String),
    /// Providers exist but none is active and none could be defaulted.
    NoneActive,
}

/// Decoded cloud-login outcome.
pub(crate) enum CloudProbe {
    NotLoggedIn,
    LoggedIn {
        plan: String,
        team_name: Option<String>,
    },
}

/// Raw embedder inputs. The `table.rs` shapers (`embedder_row_from_kind`
/// / `embedder_row_from_diagnostics`) decode the activity tail and pick
/// the final row, so this carries only fetched values.
pub(crate) struct EmbedderProbe {
    pub(crate) kind: difflore_core::context::embedding::ActiveEmbedderKind,
    pub(crate) activity_tail: Vec<difflore_core::activity_stream::ActivityEvent>,
    /// `None` when the per-project index DB could not be opened.
    pub(crate) diagnostics: Option<difflore_core::context::EmbeddingDiagnostics>,
}

/// Decoded daemon state, after the stale-pid cleanup attempt.
pub(crate) enum DaemonProbe {
    Running,
    /// Stale pid whose cleanup attempt failed (locked file etc.).
    StaleCleanupFailed,
    NotRunning,
}

pub(crate) enum GitHookState {
    NotARepo,
    None,
    Installed,
    OtherHook,
    /// Hook file exists on disk but we couldn't read it (permissions,
    /// IO error). Distinct from `OtherHook` — we don't actually know
    /// what's in there, and saying "another tool installed it" would
    /// be misleading (the file might be `DiffLore`'s own hook in a
    /// state we just can't open).
    Unreadable(String),
}

/// Probe every data source the default doctor table reads and return a
/// fully decoded [`Findings`]. The single entry point for `table.rs`.
pub(crate) async fn gather(ctx: &crate::runtime::CommandContext) -> Findings {
    let pool = Some(&ctx.db);
    // Keep probe side effects in display order; daemon stale-pid cleanup
    // should happen near the row that reports it.
    let project_db = probe_project_db(pool, &ctx.project).await;
    let binary_version = env!("CARGO_PKG_VERSION").to_owned();
    let mcp = mcp_install::collect_status_snapshot_with_runtime_probe();
    let provider = probe_provider(pool).await;
    let cloud = probe_cloud(ctx).await;
    let embedder = probe_embedder().await;
    let git_hooks = probe_git_hook_state();
    let daemon = probe_daemon();
    // Snapshot of "what the AI has actually learned". Loaded best-effort
    // — `ctx.db` always exists, but the corpus may be empty, so the load
    // is only issued when the current repo has ready memory; otherwise
    // we hand the renderer a default snapshot (which renders to "").
    let memory_snapshot = if project_db.repo_memory_ready {
        memory_snapshot::load_for_repo(&ctx.db, &project_db.repo_aliases).await
    } else {
        MemorySnapshot::default()
    };
    Findings {
        binary_version,
        project_db: project_db.probe,
        memory_snapshot,
        mcp,
        provider,
        cloud,
        embedder,
        git_hooks,
        daemon,
    }
}

/// Internal carrier so the snapshot load (a probe concern) can be gated
/// on repo readiness without leaking `repo_memory_ready` / `repo_aliases`
/// into the renderer.
struct ProjectDbResult {
    probe: ProjectDbProbe,
    repo_memory_ready: bool,
    repo_aliases: Vec<String>,
}

async fn probe_project_db(
    pool: Option<&difflore_core::SqlitePool>,
    project: &Path,
) -> ProjectDbResult {
    let Some(pool) = pool else {
        return ProjectDbResult {
            repo_memory_ready: false,
            repo_aliases: Vec::new(),
            probe: ProjectDbProbe {
                db_available: false,
                total_rules: 0,
                prs_imported: 0,
                repo_full_name: None,
                review_source_repo_full_name: None,
                scoped_active_rules: 0,
                review_source_active_rules: 0,
            },
        };
    };
    let total_rules = match difflore_core::skills::stats(pool).await {
        Ok(s) => s.total,
        Err(_) => 0,
    };
    let counts = difflore_core::db::table_counts(pool, &["review_items"]).await;
    let mut prs_imported: i64 = 0;
    for (_, result) in counts {
        if let Ok(n) = result {
            prs_imported = n;
        }
    }
    if total_rules == 0 {
        // Empty corpus blocks recall: there's nothing to retrieve.
        return ProjectDbResult {
            repo_memory_ready: false,
            repo_aliases: Vec::new(),
            probe: ProjectDbProbe {
                db_available: true,
                total_rules: 0,
                prs_imported,
                repo_full_name: None,
                review_source_repo_full_name: None,
                scoped_active_rules: 0,
                review_source_active_rules: 0,
            },
        };
    }

    let detected_repo_remotes =
        difflore_core::git::detect_github_repo_full_names(&project.to_string_lossy());
    let repo_remotes =
        difflore_core::skills::expand_repo_scopes_with_source_aliases(pool, &detected_repo_remotes)
            .await
            .unwrap_or(detected_repo_remotes);
    let repo_full_name = repo_remotes.first().cloned();
    let review_source_repo_full_name = repo_remotes.get(1).cloned();
    let active_rules = difflore_core::skills::list(pool).await.unwrap_or_default();
    let source_repos = difflore_core::skills::list_source_repos(pool)
        .await
        .unwrap_or_default();
    let scoped_active_rules =
        count_rules_for_repo(&active_rules, &source_repos, repo_full_name.as_deref());
    let review_source_active_rules = count_rules_for_repo(
        &active_rules,
        &source_repos,
        review_source_repo_full_name.as_deref(),
    );
    let repo_ready =
        repo_full_name.is_some() && (scoped_active_rules > 0 || review_source_active_rules > 0);

    ProjectDbResult {
        repo_memory_ready: repo_ready,
        repo_aliases: repo_remotes,
        probe: ProjectDbProbe {
            db_available: true,
            total_rules,
            prs_imported,
            repo_full_name,
            review_source_repo_full_name,
            scoped_active_rules,
            review_source_active_rules,
        },
    }
}

fn count_rules_for_repo(
    rules: &[difflore_core::models::SkillRecord],
    source_repos: &HashMap<String, Option<String>>,
    repo: Option<&str>,
) -> i64 {
    let Some(repo) = repo.map(normalize_repo).filter(|repo| !repo.is_empty()) else {
        return 0;
    };

    rules
        .iter()
        .filter(|rule| {
            source_repos
                .get(&rule.id)
                .and_then(|repo| repo.as_deref())
                .map(normalize_repo)
                .as_deref()
                == Some(repo.as_str())
        })
        .count() as i64
}

fn normalize_repo(repo: &str) -> String {
    repo.trim().trim_end_matches(".git").to_ascii_lowercase()
}

async fn probe_provider(pool: Option<&difflore_core::SqlitePool>) -> ProviderProbe {
    let Some(pool) = pool else {
        return ProviderProbe::DbUnavailable;
    };
    match difflore_core::providers::list(pool).await {
        Ok(providers) if providers.is_empty() => ProviderProbe::NoneConfigured,
        Ok(providers) => {
            let active = providers
                .iter()
                .find(|p| p.is_active)
                .or_else(|| providers.first());
            match active {
                Some(p) => ProviderProbe::Active(p.name.clone()),
                None => ProviderProbe::NoneActive,
            }
        }
        Err(e) => ProviderProbe::Error(e.to_string()),
    }
}

async fn probe_cloud(ctx: &crate::runtime::CommandContext) -> CloudProbe {
    let cloud_client = ctx.cloud().await;
    if cloud_client.is_logged_in() {
        let status = difflore_core::cloud::sync::fetch_cloud_status(cloud_client).await;
        CloudProbe::LoggedIn {
            plan: status.plan.as_deref().unwrap_or("free").to_owned(),
            team_name: status.team_name,
        }
    } else {
        CloudProbe::NotLoggedIn
    }
}

/// Embedder readiness probe. Delegates to `probe_active_embedder` (single
/// source of truth) so doctor always agrees with the runtime resolver.
/// Doesn't run a live embed call so it stays cheap and never imports network
/// failures into `doctor` output. The local keyword fallback is deterministic
/// and offline, but less semantic than cloud-managed or BYOK embeddings.
/// It also consults the cheap per-project embedding profile
/// diagnostic so the default table does not show green over a dead vector
/// lane. `doctor --report` owns the measured self-recall number.
async fn probe_embedder() -> EmbedderProbe {
    let kind = difflore_core::context::embedding::probe_active_embedder().await;
    let activity_tail = difflore_core::activity_stream::tail(200);
    let diagnostics = match difflore_core::context::index_db::get_pool_for_cwd().await {
        Ok(index_pool) => Some(
            difflore_core::context::gather_embedding_diagnostics_with_activity(&index_pool).await,
        ),
        Err(_) => None,
    };
    EmbedderProbe {
        kind,
        activity_tail,
        diagnostics,
    }
}

fn probe_daemon() -> DaemonProbe {
    // Stale pid is purely a leftover lock file from a dead process —
    // there's nothing for the user to debug, so we clean it on read
    // and report "off" with an informational status instead of a
    // permanent Warn that just trains users to ignore Optional hints.
    let mut daemon_status = difflore_core::daemon::status();
    if let difflore_core::daemon::DaemonStatus::Stale { .. } = daemon_status
        && let Ok(pid_path) = difflore_core::daemon::pid_path()
        && std::fs::remove_file(&pid_path).is_ok()
    {
        daemon_status = difflore_core::daemon::DaemonStatus::NotRunning;
    }
    match daemon_status {
        difflore_core::daemon::DaemonStatus::Running { .. } => DaemonProbe::Running,
        // Only reached when the cleanup attempt failed (locked file etc.).
        difflore_core::daemon::DaemonStatus::Stale { .. } => DaemonProbe::StaleCleanupFailed,
        difflore_core::daemon::DaemonStatus::NotRunning => DaemonProbe::NotRunning,
    }
}

fn probe_git_hook_state() -> GitHookState {
    let cwd = difflore_core::paths::current_project_root();
    // Resolve the git dir via `git rev-parse --git-dir` so worktrees
    // (where `.git` is a *file* pointing at `<main>/.git/worktrees/<name>`)
    // see the right hooks/ location. A naive `cwd.join(".git/hooks")`
    // hits a file-as-directory dead end and reports "no hook" even when
    // A prior hook installer may have put one there.
    let output = std::process::Command::new("git")
        .args(["rev-parse", "--git-dir"])
        .current_dir(&cwd)
        .output();
    let git_dir = match output {
        Ok(o) if o.status.success() => {
            let raw = String::from_utf8_lossy(&o.stdout).trim().to_owned();
            if raw.is_empty() {
                return GitHookState::NotARepo;
            }
            let p = std::path::PathBuf::from(&raw);
            if p.is_absolute() { p } else { cwd.join(p) }
        }
        _ => return GitHookState::NotARepo,
    };
    let hook_path = git_dir.join("hooks").join("pre-commit");
    if !hook_path.exists() {
        return GitHookState::None;
    }
    let body = match std::fs::read_to_string(&hook_path) {
        Ok(b) => b,
        Err(e) => return GitHookState::Unreadable(e.to_string()),
    };
    if body.contains("difflore") {
        GitHookState::Installed
    } else {
        GitHookState::OtherHook
    }
}