lean-ctx 3.5.5 - Docs.rs

use std::collections::{BTreeMap, BTreeSet};
use std::path::Path;

use serde::Serialize;

use crate::core::artifacts::ResolvedArtifact;
use crate::core::tokens::count_tokens;

const DEFAULT_IMPACT_DEPTH: usize = 3;
const MAX_CHANGED_FILES_SHOWN: usize = 200;
const MAX_DIFF_BYTES: usize = 1_048_576; // 1 MiB

#[derive(Debug, Clone, Serialize)]
struct ChangedFile {
    path: String,
    status: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    old_path: Option<String>,
}

#[derive(Debug, Clone, Serialize)]
struct ImpactEntry {
    file: String,
    affected_files: Vec<String>,
}

#[derive(Debug, Serialize)]
struct PrPackJson {
    kind: &'static str,
    project_root: String,
    base: String,
    impact_depth: usize,
    changed_files: Vec<ChangedFile>,
    related_tests: Vec<String>,
    impacts: Vec<ImpactEntry>,
    context_artifacts: Vec<ResolvedArtifact>,
    warnings: Vec<String>,
    tokens: u64,
}

pub fn handle(
    action: &str,
    project_root: &str,
    base: Option<&str>,
    format: Option<&str>,
    depth: Option<usize>,
    diff: Option<&str>,
) -> String {
    match action {
        "pr" => handle_pr(project_root, base, format, depth, diff),
        _ => "Unknown action. Use: pr".to_string(),
    }
}

fn handle_pr(
    project_root: &str,
    base: Option<&str>,
    format: Option<&str>,
    depth: Option<usize>,
    diff: Option<&str>,
) -> String {
    let root = project_root.to_string();
    let base = base.map_or_else(
        || detect_default_base(&root).unwrap_or_else(|| "HEAD~1".to_string()),
        ToString::to_string,
    );
    let impact_depth = depth.unwrap_or(DEFAULT_IMPACT_DEPTH).max(1);

    let mut warnings: Vec<String> = Vec::new();
    let mut changed = if let Some(d) = diff {
        if d.len() > MAX_DIFF_BYTES {
            warnings.push(format!(
                "Diff input too large ({} bytes, limit {MAX_DIFF_BYTES}). Truncating at char boundary.",
                d.len()
            ));
            let mut boundary = MAX_DIFF_BYTES;
            while boundary > 0 && !d.is_char_boundary(boundary) {
                boundary -= 1;
            }
            let truncated = &d[..boundary];
            parse_changes_from_input(truncated)
        } else {
            parse_changes_from_input(d)
        }
    } else {
        git_diff_name_status(&root, &base, &mut warnings)
    };

    if changed.len() > MAX_CHANGED_FILES_SHOWN {
        warnings.push(format!(
            "Too many changed files ({}). Truncating to {MAX_CHANGED_FILES_SHOWN}.",
            changed.len()
        ));
        changed.truncate(MAX_CHANGED_FILES_SHOWN);
    }

    let related_tests = collect_related_tests(&changed, &root);
    let impacts = collect_impacts(&changed, &root, impact_depth);
    let context_artifacts = collect_relevant_artifacts(&changed, &root, &mut warnings);

    let format = format.unwrap_or("markdown");
    match format {
        "json" => {
            let mut json = PrPackJson {
                kind: "leanctx.pr_pack",
                project_root: root,
                base,
                impact_depth,
                changed_files: changed,
                related_tests,
                impacts,
                context_artifacts,
                warnings,
                tokens: 0,
            };
            match serde_json::to_string_pretty(&json) {
                Ok(s) => {
                    json.tokens = count_tokens(&s) as u64;
                    serde_json::to_string_pretty(&json)
                        .unwrap_or_else(|e| format!("{{\"error\": \"serialization failed: {e}\"}}"))
                }
                Err(e) => format!("{{\"error\": \"serialization failed: {e}\"}}"),
            }
        }
        _ => format_markdown(
            project_root,
            &base,
            impact_depth,
            &changed,
            &related_tests,
            &impacts,
            &context_artifacts,
            &warnings,
        ),
    }
}

fn format_markdown(
    project_root: &str,
    base: &str,
    impact_depth: usize,
    changed: &[ChangedFile],
    related_tests: &[String],
    impacts: &[ImpactEntry],
    artifacts: &[ResolvedArtifact],
    warnings: &[String],
) -> String {
    let mut out = String::new();
    out.push_str("# PR Context Pack\n\n");
    out.push_str(&format!("- Project root: `{project_root}`\n"));
    out.push_str(&format!("- Base: `{base}`\n"));
    out.push_str(&format!("- Impact depth: `{impact_depth}`\n\n"));

    if !warnings.is_empty() {
        out.push_str("## Warnings\n");
        for w in warnings {
            out.push_str(&format!("- {w}\n"));
        }
        out.push('\n');
    }

    out.push_str("## Changed files\n");
    for c in changed {
        match &c.old_path {
            Some(old) => out.push_str(&format!("- `{}` ({}) ← `{old}`\n", c.path, c.status)),
            None => out.push_str(&format!("- `{}` ({})\n", c.path, c.status)),
        }
    }
    out.push('\n');

    if !artifacts.is_empty() {
        out.push_str("## Context artifacts\n");
        for a in artifacts {
            let kind = if a.is_dir { "dir" } else { "file" };
            let exists = if a.exists { "exists" } else { "missing" };
            out.push_str(&format!(
                "- `{}` ({kind}, {exists}) — {}\n",
                a.path, a.description
            ));
        }
        out.push('\n');
    }

    if !related_tests.is_empty() {
        out.push_str("## Related tests\n");
        for t in related_tests {
            out.push_str(&format!("- `{t}`\n"));
        }
        out.push('\n');
    }

    if !impacts.is_empty() {
        out.push_str("## Impact (property graph)\n");
        for imp in impacts {
            out.push_str(&format!(
                "- `{}`: {} affected files\n",
                imp.file,
                imp.affected_files.len()
            ));
            for f in imp.affected_files.iter().take(30) {
                out.push_str(&format!("  - `{f}`\n"));
            }
            if imp.affected_files.len() > 30 {
                out.push_str("  - ...\n");
            }
        }
        out.push('\n');
    }

    let tokens = count_tokens(&out);
    out.push_str(&format!("[ctx_pack pr: {tokens} tok]\n"));
    out
}

fn collect_related_tests(changed: &[ChangedFile], project_root: &str) -> Vec<String> {
    let mut all: BTreeSet<String> = BTreeSet::new();
    for c in changed {
        for t in crate::tools::ctx_review::find_related_tests(&c.path, project_root) {
            all.insert(t);
        }
    }
    all.into_iter().collect()
}

fn collect_impacts(changed: &[ChangedFile], project_root: &str, depth: usize) -> Vec<ImpactEntry> {
    let mut out = Vec::new();
    for c in changed {
        if c.status == "D" {
            continue;
        }
        let raw = crate::tools::ctx_impact::handle(
            "analyze",
            Some(&c.path),
            project_root,
            Some(depth),
            None,
        );
        let affected = parse_ctx_impact_output(&raw);
        out.push(ImpactEntry {
            file: c.path.clone(),
            affected_files: affected,
        });
    }
    out
}

fn parse_ctx_impact_output(raw: &str) -> Vec<String> {
    let mut out: Vec<String> = Vec::new();
    for line in raw.lines() {
        let l = line.trim_end();
        if let Some(rest) = l.strip_prefix("  ") {
            let item = rest.trim().to_string();
            if item.starts_with("...") {
                continue;
            }
            if !item.is_empty() {
                out.push(item);
            }
        }
    }
    out.sort();
    out.dedup();
    out
}

fn collect_relevant_artifacts(
    changed: &[ChangedFile],
    project_root: &str,
    warnings: &mut Vec<String>,
) -> Vec<ResolvedArtifact> {
    let root = Path::new(project_root);
    let resolved = crate::core::artifacts::load_resolved(root);
    warnings.extend(resolved.warnings);

    let mut out: Vec<ResolvedArtifact> = Vec::new();
    for a in resolved.artifacts {
        if !a.exists {
            continue;
        }
        if is_artifact_relevant(&a, changed) {
            out.push(a);
        }
    }
    out.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.name.cmp(&b.name)));
    out
}

fn is_artifact_relevant(a: &ResolvedArtifact, changed: &[ChangedFile]) -> bool {
    if a.path.is_empty() {
        return false;
    }
    if a.is_dir {
        let prefix = if a.path.ends_with('/') {
            a.path.clone()
        } else {
            format!("{}/", a.path)
        };
        return changed.iter().any(|c| c.path.starts_with(&prefix));
    }
    changed.iter().any(|c| c.path == a.path)
}

fn parse_changes_from_input(input: &str) -> Vec<ChangedFile> {
    if input.contains("diff --git") || input.contains("\n+++ ") || input.starts_with("diff --git") {
        let paths = parse_unified_diff_paths(input);
        let mut out = Vec::new();
        for p in paths {
            out.push(ChangedFile {
                path: p,
                status: "M".to_string(),
                old_path: None,
            });
        }
        return dedup_changes(out);
    }

    let mut out = Vec::new();
    for line in input.lines() {
        let trimmed = line.trim();
        if trimmed.is_empty() {
            continue;
        }
        let parts: Vec<&str> = trimmed.split_whitespace().collect();
        if parts.len() >= 2 {
            let status = parts[0].to_string();
            if status.starts_with('R') && parts.len() >= 3 {
                out.push(ChangedFile {
                    path: parts[2].to_string(),
                    status: "R".to_string(),
                    old_path: Some(parts[1].to_string()),
                });
            } else {
                out.push(ChangedFile {
                    path: parts[1].to_string(),
                    status: status.chars().next().unwrap_or('M').to_string(),
                    old_path: None,
                });
            }
        } else {
            out.push(ChangedFile {
                path: trimmed.to_string(),
                status: "M".to_string(),
                old_path: None,
            });
        }
    }
    dedup_changes(out)
}

fn parse_unified_diff_paths(diff: &str) -> Vec<String> {
    let mut out: BTreeSet<String> = BTreeSet::new();
    for line in diff.lines() {
        if let Some(rest) = line.strip_prefix("+++ b/") {
            let p = rest.trim();
            if !p.is_empty() && p != "/dev/null" {
                out.insert(p.to_string());
            }
        }
        if let Some(rest) = line.strip_prefix("--- a/") {
            let p = rest.trim();
            if !p.is_empty() && p != "/dev/null" {
                out.insert(p.to_string());
            }
        }
    }
    out.into_iter().collect()
}

fn git_diff_name_status(
    project_root: &str,
    base: &str,
    warnings: &mut Vec<String>,
) -> Vec<ChangedFile> {
    let out = std::process::Command::new("git")
        .args(["diff", "--name-status", &format!("{base}...HEAD")])
        .current_dir(project_root)
        .stdout(std::process::Stdio::piped())
        .stderr(std::process::Stdio::piped())
        .output();
    let Ok(o) = out else {
        warnings.push("Failed to execute git diff".to_string());
        return Vec::new();
    };
    if !o.status.success() {
        let stderr = String::from_utf8_lossy(&o.stderr);
        warnings.push(format!("git diff failed: {}", stderr.trim()));
        return Vec::new();
    }
    let s = String::from_utf8_lossy(&o.stdout);
    parse_changes_from_input(&s)
}

fn detect_default_base(project_root: &str) -> Option<String> {
    for cand in ["origin/main", "origin/master", "main", "master"] {
        let ok = std::process::Command::new("git")
            .args(["rev-parse", "--verify", cand])
            .current_dir(project_root)
            .stdout(std::process::Stdio::null())
            .stderr(std::process::Stdio::null())
            .status()
            .ok()
            .is_some_and(|s| s.success());
        if ok {
            return Some(cand.to_string());
        }
    }
    None
}

fn dedup_changes(mut changes: Vec<ChangedFile>) -> Vec<ChangedFile> {
    let mut seen: BTreeMap<String, usize> = BTreeMap::new();
    let mut out: Vec<ChangedFile> = Vec::new();
    for c in changes.drain(..) {
        let key = c.path.clone();
        if let Some(i) = seen.get(&key) {
            out[*i] = c;
            continue;
        }
        seen.insert(key, out.len());
        out.push(c);
    }
    out
}