kaizen-cli 0.1.30

Distributable agent observability: real-time-tailable sessions, agile-style retros, and repo-level improvement (Cursor, Claude Code, Codex). SQLite, redact before any sync you enable.
Documentation
// SPDX-License-Identifier: AGPL-3.0-or-later
//! Build repo snapshot facts and sidecar.

use crate::core::repo::{binding_for_session, dirty_fingerprint, repo_head, tracked_files};
use crate::metrics::analyze::analyzer_for;
use crate::metrics::codegraph::{rebuild_sidecar, symbol_id};
use crate::metrics::git::load_history;
use crate::metrics::types::{FileFact, RepoAnalysis, RepoEdge, RepoSnapshotRecord, SymbolFact};
use crate::store::Store;
use anyhow::Result;
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};

const ANALYZER_VERSION: &str = "smart-metrics-v1";

pub fn ensure_indexed(store: &Store, workspace: &Path, force: bool) -> Result<RepoSnapshotRecord> {
    let now = now_ms();
    let workspace_str = workspace.to_string_lossy().to_string();
    let git_backed = workspace.join(".git").exists();
    let head_commit = repo_head(workspace)?;
    let dirty = git_backed
        && binding_for_session(workspace, now, Some(now))
            .dirty_end
            .unwrap_or(false);
    let dirty_fp = if git_backed {
        dirty_fingerprint(workspace)?
    } else {
        filesystem_fingerprint(workspace)?
    };
    let snapshot_id = snapshot_id(&workspace_str, head_commit.as_deref(), &dirty_fp);
    if !force
        && let Some(existing) = store.latest_repo_snapshot(&workspace_str)?
        && existing.id == snapshot_id
    {
        return Ok(existing);
    }

    let tracked = if git_backed {
        tracked_files(workspace)?
    } else {
        workspace_files(workspace)?
    };
    let (history, mut edges) = load_history(workspace, now)?;
    let mut analyses = tracked
        .iter()
        .filter_map(|rel| analyze_one(workspace, rel).ok())
        .collect::<Vec<_>>();
    let deps = resolve_dependencies(&analyses);
    edges.extend(deps.clone());
    let fan = fan_counts(&deps);
    let mut symbols = vec![];
    let graph_path = crate::core::paths::project_data_dir(workspace)
        .map(|d| d.join("codegraph.db"))
        .unwrap_or_else(|_| workspace.join("codegraph.db"));
    let snapshot = RepoSnapshotRecord {
        id: snapshot_id,
        workspace: workspace_str.clone(),
        head_commit,
        dirty_fingerprint: dirty_fp,
        analyzer_version: ANALYZER_VERSION.into(),
        indexed_at_ms: now,
        dirty,
        graph_path: graph_path.to_string_lossy().to_string(),
    };
    let facts = analyses
        .iter_mut()
        .map(|analysis| {
            for symbol in &mut analysis.symbols {
                symbol.path = analysis.path.clone();
            }
            symbols.extend(analysis.symbols.clone());
            let history = history.get(&analysis.path).cloned().unwrap_or_default();
            let (fan_in, fan_out) = fan.get(&analysis.path).copied().unwrap_or((0, 0));
            FileFact {
                snapshot_id: snapshot.id.clone(),
                path: analysis.path.clone(),
                language: analysis.language.clone(),
                bytes: analysis.bytes,
                loc: analysis.loc,
                sloc: analysis.sloc,
                complexity_total: analysis.complexity_total,
                max_fn_complexity: analysis.max_fn_complexity,
                symbol_count: analysis.symbols.len() as u32,
                import_count: analysis.imports.len() as u32,
                fan_in,
                fan_out,
                churn_30d: history.churn_30d,
                churn_90d: history.churn_90d,
                authors_90d: history.authors_90d,
                last_changed_ms: history.last_changed_ms,
            }
        })
        .collect::<Vec<_>>();
    edges.extend(call_edges(&symbols));
    if let Some(parent) = graph_path.parent() {
        fs::create_dir_all(parent)?;
    }
    rebuild_sidecar(&graph_path, &snapshot, &facts, &symbols, &edges)?;
    store.save_repo_snapshot(&snapshot, &facts, &edges)?;
    Ok(snapshot)
}

fn analyze_one(workspace: &Path, rel: &str) -> Result<RepoAnalysis> {
    let full = workspace.join(rel);
    let source = fs::read_to_string(&full)?;
    analyzer_for(&full).analyze(rel, &source)
}

fn workspace_files(workspace: &Path) -> Result<Vec<String>> {
    let mut out = Vec::new();
    collect_files(workspace, workspace, &mut out)?;
    out.sort();
    Ok(out)
}

fn collect_files(root: &Path, dir: &Path, out: &mut Vec<String>) -> Result<()> {
    for entry in fs::read_dir(dir)? {
        let entry = entry?;
        let path = entry.path();
        if should_skip(&path) {
            continue;
        }
        if path.is_dir() {
            collect_files(root, &path, out)?;
        } else if path.is_file()
            && let Ok(rel) = path.strip_prefix(root)
        {
            out.push(rel.to_string_lossy().replace('\\', "/"));
        }
    }
    Ok(())
}

fn should_skip(path: &Path) -> bool {
    path.file_name()
        .and_then(|n| n.to_str())
        .is_some_and(|n| n.starts_with('.') || matches!(n, "target" | "node_modules"))
}

fn filesystem_fingerprint(workspace: &Path) -> Result<String> {
    let mut hasher = blake3::Hasher::new();
    for rel in workspace_files(workspace)? {
        hash_file_meta(workspace.join(&rel), &rel, &mut hasher)?;
    }
    Ok(hex::encode(hasher.finalize().as_bytes()))
}

fn hash_file_meta(path: PathBuf, rel: &str, hasher: &mut blake3::Hasher) -> Result<()> {
    let meta = fs::metadata(path)?;
    hasher.update(rel.as_bytes());
    hasher.update(&meta.len().to_le_bytes());
    Ok(())
}

fn resolve_dependencies(analyses: &[RepoAnalysis]) -> Vec<RepoEdge> {
    let stem_index = analyses.iter().fold(
        HashMap::<String, Vec<String>>::new(),
        |mut acc, analysis| {
            let stem = Path::new(&analysis.path)
                .file_stem()
                .and_then(|s| s.to_str())
                .unwrap_or("")
                .to_string();
            acc.entry(stem).or_default().push(analysis.path.clone());
            acc
        },
    );
    analyses
        .iter()
        .flat_map(|analysis| {
            analysis.imports.iter().filter_map(|raw| {
                resolve_import(&analysis.path, raw, &stem_index).map(|to_path| RepoEdge {
                    from_path: analysis.path.clone(),
                    to_path: to_path.clone(),
                    kind: "DEPENDS_ON".into(),
                    weight: 1,
                })
            })
        })
        .collect()
}

fn resolve_import(
    current_path: &str,
    raw: &str,
    stem_index: &HashMap<String, Vec<String>>,
) -> Option<String> {
    if raw.starts_with("./") || raw.starts_with("../") {
        return relative_target(current_path, raw);
    }
    let normalized = raw
        .trim_start_matches("crate::")
        .trim_start_matches("self::")
        .trim_start_matches("super::");
    let tail = normalized
        .split(['/', ':', '.'])
        .rfind(|part| !part.is_empty())
        .unwrap_or("");
    stem_index.get(tail).and_then(|paths| {
        if paths.len() == 1 {
            Some(paths[0].clone())
        } else {
            None
        }
    })
}

fn relative_target(current_path: &str, raw: &str) -> Option<String> {
    let base = Path::new(current_path).parent()?;
    let joined = base.join(raw);
    let candidates = [
        joined.clone(),
        joined.with_extension("rs"),
        joined.with_extension("ts"),
        joined.with_extension("js"),
        joined.with_extension("py"),
        joined.with_extension("go"),
        joined.with_extension("java"),
        joined.join("mod.rs"),
        joined.join("index.ts"),
        joined.join("index.js"),
    ];
    candidates.into_iter().find_map(|path| {
        path.to_str()
            .filter(|s| !s.is_empty())
            .map(|s| s.replace('\\', "/"))
    })
}

fn fan_counts(edges: &[RepoEdge]) -> HashMap<String, (u32, u32)> {
    let mut out = HashMap::new();
    for edge in edges.iter().filter(|edge| edge.kind == "DEPENDS_ON") {
        out.entry(edge.from_path.clone()).or_insert((0, 0)).1 += 1;
        out.entry(edge.to_path.clone()).or_insert((0, 0)).0 += 1;
    }
    out
}

fn call_edges(symbols: &[SymbolFact]) -> Vec<RepoEdge> {
    let mut by_name: HashMap<String, Vec<&SymbolFact>> = HashMap::new();
    for symbol in symbols {
        by_name.entry(symbol.name.clone()).or_default().push(symbol);
    }
    let mut out = vec![];
    for symbol in symbols {
        for call in &symbol.calls {
            let Some(targets) = by_name.get(call) else {
                continue;
            };
            if targets.len() != 1 {
                continue;
            }
            out.push(RepoEdge {
                from_path: symbol_id(symbol),
                to_path: symbol_id(targets[0]),
                kind: "CALLS".into(),
                weight: 1,
            });
        }
    }
    out
}

fn snapshot_id(workspace: &str, head: Option<&str>, dirty_fp: &str) -> String {
    crate::sync::hash_with_salt(
        &[7u8; 32],
        format!("{workspace}:{head:?}:{dirty_fp}:{ANALYZER_VERSION}").as_bytes(),
    )
}

fn now_ms() -> u64 {
    std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .unwrap_or_default()
        .as_millis() as u64
}