Skip to main content

lean_ctx/tools/
ctx_pack.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::path::Path;
3
4use serde::Serialize;
5
6use crate::core::artifacts::ResolvedArtifact;
7use crate::core::tokens::count_tokens;
8
9const DEFAULT_IMPACT_DEPTH: usize = 3;
10const MAX_CHANGED_FILES_SHOWN: usize = 200;
11const MAX_DIFF_BYTES: usize = 1_048_576; // 1 MiB
12
13#[derive(Debug, Clone, Serialize)]
14struct ChangedFile {
15    path: String,
16    status: String,
17    #[serde(skip_serializing_if = "Option::is_none")]
18    old_path: Option<String>,
19}
20
21#[derive(Debug, Clone, Serialize)]
22struct ImpactEntry {
23    file: String,
24    affected_files: Vec<String>,
25}
26
27#[derive(Debug, Serialize)]
28struct PrPackJson {
29    kind: &'static str,
30    project_root: String,
31    base: String,
32    impact_depth: usize,
33    changed_files: Vec<ChangedFile>,
34    related_tests: Vec<String>,
35    impacts: Vec<ImpactEntry>,
36    context_artifacts: Vec<ResolvedArtifact>,
37    warnings: Vec<String>,
38    tokens: u64,
39}
40
41pub fn handle(
42    action: &str,
43    project_root: &str,
44    base: Option<&str>,
45    format: Option<&str>,
46    depth: Option<usize>,
47    diff: Option<&str>,
48) -> String {
49    match action {
50        "pr" => handle_pr(project_root, base, format, depth, diff),
51        _ => "Unknown action. Use: pr".to_string(),
52    }
53}
54
55fn handle_pr(
56    project_root: &str,
57    base: Option<&str>,
58    format: Option<&str>,
59    depth: Option<usize>,
60    diff: Option<&str>,
61) -> String {
62    let root = project_root.to_string();
63    let base = base.map_or_else(
64        || detect_default_base(&root).unwrap_or_else(|| "HEAD~1".to_string()),
65        ToString::to_string,
66    );
67    let impact_depth = depth.unwrap_or(DEFAULT_IMPACT_DEPTH).max(1);
68
69    let mut warnings: Vec<String> = Vec::new();
70    let mut changed = if let Some(d) = diff {
71        if d.len() > MAX_DIFF_BYTES {
72            warnings.push(format!(
73                "Diff input too large ({} bytes, limit {MAX_DIFF_BYTES}). Truncating at char boundary.",
74                d.len()
75            ));
76            let mut boundary = MAX_DIFF_BYTES;
77            while boundary > 0 && !d.is_char_boundary(boundary) {
78                boundary -= 1;
79            }
80            let truncated = &d[..boundary];
81            parse_changes_from_input(truncated)
82        } else {
83            parse_changes_from_input(d)
84        }
85    } else {
86        git_diff_name_status(&root, &base, &mut warnings)
87    };
88
89    if changed.len() > MAX_CHANGED_FILES_SHOWN {
90        warnings.push(format!(
91            "Too many changed files ({}). Truncating to {MAX_CHANGED_FILES_SHOWN}.",
92            changed.len()
93        ));
94        changed.truncate(MAX_CHANGED_FILES_SHOWN);
95    }
96
97    let related_tests = collect_related_tests(&changed, &root);
98    let impacts = collect_impacts(&changed, &root, impact_depth);
99    let context_artifacts = collect_relevant_artifacts(&changed, &root, &mut warnings);
100
101    let format = format.unwrap_or("markdown");
102    match format {
103        "json" => {
104            let mut json = PrPackJson {
105                kind: "leanctx.pr_pack",
106                project_root: root,
107                base,
108                impact_depth,
109                changed_files: changed,
110                related_tests,
111                impacts,
112                context_artifacts,
113                warnings,
114                tokens: 0,
115            };
116            match serde_json::to_string_pretty(&json) {
117                Ok(s) => {
118                    json.tokens = count_tokens(&s) as u64;
119                    serde_json::to_string_pretty(&json)
120                        .unwrap_or_else(|e| format!("{{\"error\": \"serialization failed: {e}\"}}"))
121                }
122                Err(e) => format!("{{\"error\": \"serialization failed: {e}\"}}"),
123            }
124        }
125        _ => format_markdown(
126            project_root,
127            &base,
128            impact_depth,
129            &changed,
130            &related_tests,
131            &impacts,
132            &context_artifacts,
133            &warnings,
134        ),
135    }
136}
137
138fn format_markdown(
139    project_root: &str,
140    base: &str,
141    impact_depth: usize,
142    changed: &[ChangedFile],
143    related_tests: &[String],
144    impacts: &[ImpactEntry],
145    artifacts: &[ResolvedArtifact],
146    warnings: &[String],
147) -> String {
148    let mut out = String::new();
149    out.push_str("# PR Context Pack\n\n");
150    out.push_str(&format!("- Project root: `{project_root}`\n"));
151    out.push_str(&format!("- Base: `{base}`\n"));
152    out.push_str(&format!("- Impact depth: `{impact_depth}`\n\n"));
153
154    if !warnings.is_empty() {
155        out.push_str("## Warnings\n");
156        for w in warnings {
157            out.push_str(&format!("- {w}\n"));
158        }
159        out.push('\n');
160    }
161
162    out.push_str("## Changed files\n");
163    for c in changed {
164        match &c.old_path {
165            Some(old) => out.push_str(&format!("- `{}` ({}) ← `{old}`\n", c.path, c.status)),
166            None => out.push_str(&format!("- `{}` ({})\n", c.path, c.status)),
167        }
168    }
169    out.push('\n');
170
171    if !artifacts.is_empty() {
172        out.push_str("## Context artifacts\n");
173        for a in artifacts {
174            let kind = if a.is_dir { "dir" } else { "file" };
175            let exists = if a.exists { "exists" } else { "missing" };
176            out.push_str(&format!(
177                "- `{}` ({kind}, {exists}) — {}\n",
178                a.path, a.description
179            ));
180        }
181        out.push('\n');
182    }
183
184    if !related_tests.is_empty() {
185        out.push_str("## Related tests\n");
186        for t in related_tests {
187            out.push_str(&format!("- `{t}`\n"));
188        }
189        out.push('\n');
190    }
191
192    if !impacts.is_empty() {
193        out.push_str("## Impact (property graph)\n");
194        for imp in impacts {
195            out.push_str(&format!(
196                "- `{}`: {} affected files\n",
197                imp.file,
198                imp.affected_files.len()
199            ));
200            for f in imp.affected_files.iter().take(30) {
201                out.push_str(&format!("  - `{f}`\n"));
202            }
203            if imp.affected_files.len() > 30 {
204                out.push_str("  - ...\n");
205            }
206        }
207        out.push('\n');
208    }
209
210    let tokens = count_tokens(&out);
211    out.push_str(&format!("[ctx_pack pr: {tokens} tok]\n"));
212    out
213}
214
215fn collect_related_tests(changed: &[ChangedFile], project_root: &str) -> Vec<String> {
216    let mut all: BTreeSet<String> = BTreeSet::new();
217    for c in changed {
218        for t in crate::tools::ctx_review::find_related_tests(&c.path, project_root) {
219            all.insert(t);
220        }
221    }
222    all.into_iter().collect()
223}
224
225fn collect_impacts(changed: &[ChangedFile], project_root: &str, depth: usize) -> Vec<ImpactEntry> {
226    let mut out = Vec::new();
227    for c in changed {
228        if c.status == "D" {
229            continue;
230        }
231        let raw =
232            crate::tools::ctx_impact::handle("analyze", Some(&c.path), project_root, Some(depth));
233        let affected = parse_ctx_impact_output(&raw);
234        out.push(ImpactEntry {
235            file: c.path.clone(),
236            affected_files: affected,
237        });
238    }
239    out
240}
241
242fn parse_ctx_impact_output(raw: &str) -> Vec<String> {
243    let mut out: Vec<String> = Vec::new();
244    for line in raw.lines() {
245        let l = line.trim_end();
246        if let Some(rest) = l.strip_prefix("  ") {
247            let item = rest.trim().to_string();
248            if !item.is_empty() {
249                out.push(item);
250            }
251        }
252    }
253    out.sort();
254    out.dedup();
255    out
256}
257
258fn collect_relevant_artifacts(
259    changed: &[ChangedFile],
260    project_root: &str,
261    warnings: &mut Vec<String>,
262) -> Vec<ResolvedArtifact> {
263    let root = Path::new(project_root);
264    let resolved = crate::core::artifacts::load_resolved(root);
265    warnings.extend(resolved.warnings);
266
267    let mut out: Vec<ResolvedArtifact> = Vec::new();
268    for a in resolved.artifacts {
269        if !a.exists {
270            continue;
271        }
272        if is_artifact_relevant(&a, changed) {
273            out.push(a);
274        }
275    }
276    out.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.name.cmp(&b.name)));
277    out
278}
279
280fn is_artifact_relevant(a: &ResolvedArtifact, changed: &[ChangedFile]) -> bool {
281    if a.path.is_empty() {
282        return false;
283    }
284    if a.is_dir {
285        let prefix = if a.path.ends_with('/') {
286            a.path.clone()
287        } else {
288            format!("{}/", a.path)
289        };
290        return changed.iter().any(|c| c.path.starts_with(&prefix));
291    }
292    changed.iter().any(|c| c.path == a.path)
293}
294
295fn parse_changes_from_input(input: &str) -> Vec<ChangedFile> {
296    if input.contains("diff --git") || input.contains("\n+++ ") || input.starts_with("diff --git") {
297        let paths = parse_unified_diff_paths(input);
298        let mut out = Vec::new();
299        for p in paths {
300            out.push(ChangedFile {
301                path: p,
302                status: "M".to_string(),
303                old_path: None,
304            });
305        }
306        return dedup_changes(out);
307    }
308
309    let mut out = Vec::new();
310    for line in input.lines() {
311        let trimmed = line.trim();
312        if trimmed.is_empty() {
313            continue;
314        }
315        let parts: Vec<&str> = trimmed.split_whitespace().collect();
316        if parts.len() >= 2 {
317            let status = parts[0].to_string();
318            if status.starts_with('R') && parts.len() >= 3 {
319                out.push(ChangedFile {
320                    path: parts[2].to_string(),
321                    status: "R".to_string(),
322                    old_path: Some(parts[1].to_string()),
323                });
324            } else {
325                out.push(ChangedFile {
326                    path: parts[1].to_string(),
327                    status: status.chars().next().unwrap_or('M').to_string(),
328                    old_path: None,
329                });
330            }
331        } else {
332            out.push(ChangedFile {
333                path: trimmed.to_string(),
334                status: "M".to_string(),
335                old_path: None,
336            });
337        }
338    }
339    dedup_changes(out)
340}
341
342fn parse_unified_diff_paths(diff: &str) -> Vec<String> {
343    let mut out: BTreeSet<String> = BTreeSet::new();
344    for line in diff.lines() {
345        if let Some(rest) = line.strip_prefix("+++ b/") {
346            let p = rest.trim();
347            if !p.is_empty() && p != "/dev/null" {
348                out.insert(p.to_string());
349            }
350        }
351        if let Some(rest) = line.strip_prefix("--- a/") {
352            let p = rest.trim();
353            if !p.is_empty() && p != "/dev/null" {
354                out.insert(p.to_string());
355            }
356        }
357    }
358    out.into_iter().collect()
359}
360
361fn git_diff_name_status(
362    project_root: &str,
363    base: &str,
364    warnings: &mut Vec<String>,
365) -> Vec<ChangedFile> {
366    let out = std::process::Command::new("git")
367        .args(["diff", "--name-status", &format!("{base}...HEAD")])
368        .current_dir(project_root)
369        .stdout(std::process::Stdio::piped())
370        .stderr(std::process::Stdio::piped())
371        .output();
372    let Ok(o) = out else {
373        warnings.push("Failed to execute git diff".to_string());
374        return Vec::new();
375    };
376    if !o.status.success() {
377        let stderr = String::from_utf8_lossy(&o.stderr);
378        warnings.push(format!("git diff failed: {}", stderr.trim()));
379        return Vec::new();
380    }
381    let s = String::from_utf8_lossy(&o.stdout);
382    parse_changes_from_input(&s)
383}
384
385fn detect_default_base(project_root: &str) -> Option<String> {
386    for cand in ["origin/main", "origin/master", "main", "master"] {
387        let ok = std::process::Command::new("git")
388            .args(["rev-parse", "--verify", cand])
389            .current_dir(project_root)
390            .stdout(std::process::Stdio::null())
391            .stderr(std::process::Stdio::null())
392            .status()
393            .ok()
394            .is_some_and(|s| s.success());
395        if ok {
396            return Some(cand.to_string());
397        }
398    }
399    None
400}
401
402fn dedup_changes(mut changes: Vec<ChangedFile>) -> Vec<ChangedFile> {
403    let mut seen: BTreeMap<String, usize> = BTreeMap::new();
404    let mut out: Vec<ChangedFile> = Vec::new();
405    for c in changes.drain(..) {
406        let key = c.path.clone();
407        if let Some(i) = seen.get(&key) {
408            out[*i] = c;
409            continue;
410        }
411        seen.insert(key, out.len());
412        out.push(c);
413    }
414    out
415}