Skip to main content

lean_ctx/tools/
ctx_pack.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::path::Path;
3
4use serde::Serialize;
5
6use crate::core::artifacts::ResolvedArtifact;
7use crate::core::tokens::count_tokens;
8
9const DEFAULT_IMPACT_DEPTH: usize = 3;
10const MAX_CHANGED_FILES_SHOWN: usize = 200;
11const MAX_DIFF_BYTES: usize = 1_048_576; // 1 MiB
12
13#[derive(Debug, Clone, Serialize)]
14struct ChangedFile {
15    path: String,
16    status: String,
17    #[serde(skip_serializing_if = "Option::is_none")]
18    old_path: Option<String>,
19}
20
21#[derive(Debug, Clone, Serialize)]
22struct ImpactEntry {
23    file: String,
24    affected_files: Vec<String>,
25}
26
27#[derive(Debug, Serialize)]
28struct PrPackJson {
29    kind: &'static str,
30    project_root: String,
31    base: String,
32    impact_depth: usize,
33    changed_files: Vec<ChangedFile>,
34    related_tests: Vec<String>,
35    impacts: Vec<ImpactEntry>,
36    context_artifacts: Vec<ResolvedArtifact>,
37    warnings: Vec<String>,
38    tokens: u64,
39}
40
41pub fn handle(
42    action: &str,
43    project_root: &str,
44    base: Option<&str>,
45    format: Option<&str>,
46    depth: Option<usize>,
47    diff: Option<&str>,
48) -> String {
49    match action {
50        "pr" => handle_pr(project_root, base, format, depth, diff),
51        _ => "Unknown action. Use: pr".to_string(),
52    }
53}
54
55fn handle_pr(
56    project_root: &str,
57    base: Option<&str>,
58    format: Option<&str>,
59    depth: Option<usize>,
60    diff: Option<&str>,
61) -> String {
62    let root = project_root.to_string();
63    let base = base.map_or_else(
64        || detect_default_base(&root).unwrap_or_else(|| "HEAD~1".to_string()),
65        ToString::to_string,
66    );
67    let impact_depth = depth.unwrap_or(DEFAULT_IMPACT_DEPTH).max(1);
68
69    let mut warnings: Vec<String> = Vec::new();
70    let mut changed = if let Some(d) = diff {
71        if d.len() > MAX_DIFF_BYTES {
72            warnings.push(format!(
73                "Diff input too large ({} bytes, limit {MAX_DIFF_BYTES}). Truncating at char boundary.",
74                d.len()
75            ));
76            let mut boundary = MAX_DIFF_BYTES;
77            while boundary > 0 && !d.is_char_boundary(boundary) {
78                boundary -= 1;
79            }
80            let truncated = &d[..boundary];
81            parse_changes_from_input(truncated)
82        } else {
83            parse_changes_from_input(d)
84        }
85    } else {
86        git_diff_name_status(&root, &base, &mut warnings)
87    };
88
89    if changed.len() > MAX_CHANGED_FILES_SHOWN {
90        warnings.push(format!(
91            "Too many changed files ({}). Truncating to {MAX_CHANGED_FILES_SHOWN}.",
92            changed.len()
93        ));
94        changed.truncate(MAX_CHANGED_FILES_SHOWN);
95    }
96
97    let related_tests = collect_related_tests(&changed, &root);
98    let impacts = collect_impacts(&changed, &root, impact_depth);
99    let context_artifacts = collect_relevant_artifacts(&changed, &root, &mut warnings);
100
101    let format = format.unwrap_or("markdown");
102    match format {
103        "json" => {
104            let mut json = PrPackJson {
105                kind: "leanctx.pr_pack",
106                project_root: root,
107                base,
108                impact_depth,
109                changed_files: changed,
110                related_tests,
111                impacts,
112                context_artifacts,
113                warnings,
114                tokens: 0,
115            };
116            match serde_json::to_string_pretty(&json) {
117                Ok(s) => {
118                    json.tokens = count_tokens(&s) as u64;
119                    serde_json::to_string_pretty(&json)
120                        .unwrap_or_else(|e| format!("{{\"error\": \"serialization failed: {e}\"}}"))
121                }
122                Err(e) => format!("{{\"error\": \"serialization failed: {e}\"}}"),
123            }
124        }
125        _ => format_markdown(
126            project_root,
127            &base,
128            impact_depth,
129            &changed,
130            &related_tests,
131            &impacts,
132            &context_artifacts,
133            &warnings,
134        ),
135    }
136}
137
138fn format_markdown(
139    project_root: &str,
140    base: &str,
141    impact_depth: usize,
142    changed: &[ChangedFile],
143    related_tests: &[String],
144    impacts: &[ImpactEntry],
145    artifacts: &[ResolvedArtifact],
146    warnings: &[String],
147) -> String {
148    let mut out = String::new();
149    out.push_str("# PR Context Pack\n\n");
150    out.push_str(&format!("- Project root: `{project_root}`\n"));
151    out.push_str(&format!("- Base: `{base}`\n"));
152    out.push_str(&format!("- Impact depth: `{impact_depth}`\n\n"));
153
154    if !warnings.is_empty() {
155        out.push_str("## Warnings\n");
156        for w in warnings {
157            out.push_str(&format!("- {w}\n"));
158        }
159        out.push('\n');
160    }
161
162    out.push_str("## Changed files\n");
163    for c in changed {
164        match &c.old_path {
165            Some(old) => out.push_str(&format!("- `{}` ({}) ← `{old}`\n", c.path, c.status)),
166            None => out.push_str(&format!("- `{}` ({})\n", c.path, c.status)),
167        }
168    }
169    out.push('\n');
170
171    if !artifacts.is_empty() {
172        out.push_str("## Context artifacts\n");
173        for a in artifacts {
174            let kind = if a.is_dir { "dir" } else { "file" };
175            let exists = if a.exists { "exists" } else { "missing" };
176            out.push_str(&format!(
177                "- `{}` ({kind}, {exists}) — {}\n",
178                a.path, a.description
179            ));
180        }
181        out.push('\n');
182    }
183
184    if !related_tests.is_empty() {
185        out.push_str("## Related tests\n");
186        for t in related_tests {
187            out.push_str(&format!("- `{t}`\n"));
188        }
189        out.push('\n');
190    }
191
192    if !impacts.is_empty() {
193        out.push_str("## Impact (property graph)\n");
194        for imp in impacts {
195            out.push_str(&format!(
196                "- `{}`: {} affected files\n",
197                imp.file,
198                imp.affected_files.len()
199            ));
200            for f in imp.affected_files.iter().take(30) {
201                out.push_str(&format!("  - `{f}`\n"));
202            }
203            if imp.affected_files.len() > 30 {
204                out.push_str("  - ...\n");
205            }
206        }
207        out.push('\n');
208    }
209
210    let tokens = count_tokens(&out);
211    out.push_str(&format!("[ctx_pack pr: {tokens} tok]\n"));
212    out
213}
214
215fn collect_related_tests(changed: &[ChangedFile], project_root: &str) -> Vec<String> {
216    let mut all: BTreeSet<String> = BTreeSet::new();
217    for c in changed {
218        for t in crate::tools::ctx_review::find_related_tests(&c.path, project_root) {
219            all.insert(t);
220        }
221    }
222    all.into_iter().collect()
223}
224
225fn collect_impacts(changed: &[ChangedFile], project_root: &str, depth: usize) -> Vec<ImpactEntry> {
226    let mut out = Vec::new();
227    for c in changed {
228        if c.status == "D" {
229            continue;
230        }
231        let raw = crate::tools::ctx_impact::handle(
232            "analyze",
233            Some(&c.path),
234            project_root,
235            Some(depth),
236            None,
237        );
238        let affected = parse_ctx_impact_output(&raw);
239        out.push(ImpactEntry {
240            file: c.path.clone(),
241            affected_files: affected,
242        });
243    }
244    out
245}
246
247fn parse_ctx_impact_output(raw: &str) -> Vec<String> {
248    let mut out: Vec<String> = Vec::new();
249    for line in raw.lines() {
250        let l = line.trim_end();
251        if let Some(rest) = l.strip_prefix("  ") {
252            let item = rest.trim().to_string();
253            if item.starts_with("...") {
254                continue;
255            }
256            if !item.is_empty() {
257                out.push(item);
258            }
259        }
260    }
261    out.sort();
262    out.dedup();
263    out
264}
265
266fn collect_relevant_artifacts(
267    changed: &[ChangedFile],
268    project_root: &str,
269    warnings: &mut Vec<String>,
270) -> Vec<ResolvedArtifact> {
271    let root = Path::new(project_root);
272    let resolved = crate::core::artifacts::load_resolved(root);
273    warnings.extend(resolved.warnings);
274
275    let mut out: Vec<ResolvedArtifact> = Vec::new();
276    for a in resolved.artifacts {
277        if !a.exists {
278            continue;
279        }
280        if is_artifact_relevant(&a, changed) {
281            out.push(a);
282        }
283    }
284    out.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.name.cmp(&b.name)));
285    out
286}
287
288fn is_artifact_relevant(a: &ResolvedArtifact, changed: &[ChangedFile]) -> bool {
289    if a.path.is_empty() {
290        return false;
291    }
292    if a.is_dir {
293        let prefix = if a.path.ends_with('/') {
294            a.path.clone()
295        } else {
296            format!("{}/", a.path)
297        };
298        return changed.iter().any(|c| c.path.starts_with(&prefix));
299    }
300    changed.iter().any(|c| c.path == a.path)
301}
302
303fn parse_changes_from_input(input: &str) -> Vec<ChangedFile> {
304    if input.contains("diff --git") || input.contains("\n+++ ") || input.starts_with("diff --git") {
305        let paths = parse_unified_diff_paths(input);
306        let mut out = Vec::new();
307        for p in paths {
308            out.push(ChangedFile {
309                path: p,
310                status: "M".to_string(),
311                old_path: None,
312            });
313        }
314        return dedup_changes(out);
315    }
316
317    let mut out = Vec::new();
318    for line in input.lines() {
319        let trimmed = line.trim();
320        if trimmed.is_empty() {
321            continue;
322        }
323        let parts: Vec<&str> = trimmed.split_whitespace().collect();
324        if parts.len() >= 2 {
325            let status = parts[0].to_string();
326            if status.starts_with('R') && parts.len() >= 3 {
327                out.push(ChangedFile {
328                    path: parts[2].to_string(),
329                    status: "R".to_string(),
330                    old_path: Some(parts[1].to_string()),
331                });
332            } else {
333                out.push(ChangedFile {
334                    path: parts[1].to_string(),
335                    status: status.chars().next().unwrap_or('M').to_string(),
336                    old_path: None,
337                });
338            }
339        } else {
340            out.push(ChangedFile {
341                path: trimmed.to_string(),
342                status: "M".to_string(),
343                old_path: None,
344            });
345        }
346    }
347    dedup_changes(out)
348}
349
350fn parse_unified_diff_paths(diff: &str) -> Vec<String> {
351    let mut out: BTreeSet<String> = BTreeSet::new();
352    for line in diff.lines() {
353        if let Some(rest) = line.strip_prefix("+++ b/") {
354            let p = rest.trim();
355            if !p.is_empty() && p != "/dev/null" {
356                out.insert(p.to_string());
357            }
358        }
359        if let Some(rest) = line.strip_prefix("--- a/") {
360            let p = rest.trim();
361            if !p.is_empty() && p != "/dev/null" {
362                out.insert(p.to_string());
363            }
364        }
365    }
366    out.into_iter().collect()
367}
368
369fn git_diff_name_status(
370    project_root: &str,
371    base: &str,
372    warnings: &mut Vec<String>,
373) -> Vec<ChangedFile> {
374    let out = std::process::Command::new("git")
375        .args(["diff", "--name-status", &format!("{base}...HEAD")])
376        .current_dir(project_root)
377        .stdout(std::process::Stdio::piped())
378        .stderr(std::process::Stdio::piped())
379        .output();
380    let Ok(o) = out else {
381        warnings.push("Failed to execute git diff".to_string());
382        return Vec::new();
383    };
384    if !o.status.success() {
385        let stderr = String::from_utf8_lossy(&o.stderr);
386        warnings.push(format!("git diff failed: {}", stderr.trim()));
387        return Vec::new();
388    }
389    let s = String::from_utf8_lossy(&o.stdout);
390    parse_changes_from_input(&s)
391}
392
393fn detect_default_base(project_root: &str) -> Option<String> {
394    for cand in ["origin/main", "origin/master", "main", "master"] {
395        let ok = std::process::Command::new("git")
396            .args(["rev-parse", "--verify", cand])
397            .current_dir(project_root)
398            .stdout(std::process::Stdio::null())
399            .stderr(std::process::Stdio::null())
400            .status()
401            .ok()
402            .is_some_and(|s| s.success());
403        if ok {
404            return Some(cand.to_string());
405        }
406    }
407    None
408}
409
410fn dedup_changes(mut changes: Vec<ChangedFile>) -> Vec<ChangedFile> {
411    let mut seen: BTreeMap<String, usize> = BTreeMap::new();
412    let mut out: Vec<ChangedFile> = Vec::new();
413    for c in changes.drain(..) {
414        let key = c.path.clone();
415        if let Some(i) = seen.get(&key) {
416            out[*i] = c;
417            continue;
418        }
419        seen.insert(key, out.len());
420        out.push(c);
421    }
422    out
423}