Skip to main content

lean_ctx/tools/
ctx_fill.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::signatures;
5use crate::core::tokens::count_tokens;
6use crate::tools::CrpMode;
7
8struct FileCandidate {
9    path: String,
10    score: f64,
11    tokens_full: usize,
12    tokens_map: usize,
13    tokens_sig: usize,
14}
15
16pub fn handle(
17    cache: &mut SessionCache,
18    paths: &[String],
19    budget: usize,
20    crp_mode: CrpMode,
21    task: Option<&str>,
22) -> String {
23    if paths.is_empty() {
24        return "No files specified.".to_string();
25    }
26
27    let pagerank_scores = load_pagerank_scores(paths);
28    let mut candidates: Vec<FileCandidate> = Vec::new();
29
30    for path in paths {
31        let Ok(content) = std::fs::read_to_string(path) else {
32            continue;
33        };
34
35        let ext = Path::new(path)
36            .extension()
37            .and_then(|e| e.to_str())
38            .unwrap_or("");
39        let tokens_full = count_tokens(&content);
40        let sigs = signatures::extract_signatures(&content, ext);
41        let sig_text: String = sigs
42            .iter()
43            .map(super::super::core::signatures::Signature::to_compact)
44            .collect::<Vec<_>>()
45            .join("\n");
46        let tokens_sig = count_tokens(&sig_text);
47
48        let map_text = format_map(&content, ext, &sigs);
49        let tokens_map = count_tokens(&map_text);
50
51        let mut score = compute_relevance_score(path, &content);
52        if let Some(pr_boost) = pagerank_scores.get(path) {
53            score *= 1.0 + pr_boost * 5.0;
54        }
55
56        candidates.push(FileCandidate {
57            path: path.clone(),
58            score,
59            tokens_full,
60            tokens_map,
61            tokens_sig,
62        });
63    }
64
65    candidates.sort_by(|a, b| {
66        b.score
67            .partial_cmp(&a.score)
68            .unwrap_or(std::cmp::Ordering::Equal)
69    });
70
71    let mut pop_lines: Vec<String> = Vec::new();
72    if let Some(t) = task {
73        if let Some(root) = paths
74            .first()
75            .and_then(|p| crate::core::protocol::detect_project_root(p))
76        {
77            let rs: Vec<crate::core::task_relevance::RelevanceScore> = candidates
78                .iter()
79                .map(|c| crate::core::task_relevance::RelevanceScore {
80                    path: c.path.clone(),
81                    score: c.score,
82                    recommended_mode: "signatures",
83                })
84                .collect();
85            let refs: Vec<&crate::core::task_relevance::RelevanceScore> = rs.iter().collect();
86            let pop = crate::core::pop_pruning::decide_for_candidates(t, &root, &refs);
87            if !pop.excluded_modules.is_empty() {
88                let excluded: std::collections::BTreeSet<&str> = pop
89                    .excluded_modules
90                    .iter()
91                    .map(|e| e.module.as_str())
92                    .collect();
93                candidates.retain(|c| {
94                    let m = crate::core::pop_pruning::module_for_path(&c.path, &root);
95                    !excluded.contains(m.as_str())
96                });
97                pop_lines.push("POP:".to_string());
98                for ex in &pop.excluded_modules {
99                    pop_lines.push(format!(
100                        "  - exclude {}/ ({} candidates) — {}",
101                        ex.module, ex.candidate_files, ex.reason
102                    ));
103                }
104            }
105        }
106    }
107
108    let mut used_tokens = 0usize;
109    let mut selections: Vec<(String, String)> = Vec::new();
110
111    for candidate in &candidates {
112        if used_tokens >= budget {
113            break;
114        }
115
116        if crate::tools::ctx_read::is_instruction_file(&candidate.path) {
117            selections.push((candidate.path.clone(), "full".to_string()));
118            used_tokens += candidate.tokens_full;
119            continue;
120        }
121
122        let remaining = budget - used_tokens;
123        let (mode, cost) = select_best_fit(candidate, remaining);
124
125        if cost > remaining {
126            let sig_cost = candidate.tokens_sig;
127            if sig_cost <= remaining {
128                selections.push((candidate.path.clone(), "signatures".to_string()));
129                used_tokens += sig_cost;
130            }
131            continue;
132        }
133
134        selections.push((candidate.path.clone(), mode));
135        used_tokens += cost;
136    }
137
138    let mut output_parts = Vec::new();
139    output_parts.push(format!(
140        "ctx_fill: {budget} token budget, {} files analyzed, {} selected",
141        candidates.len(),
142        selections.len()
143    ));
144    if !pop_lines.is_empty() {
145        output_parts.push(pop_lines.join("\n"));
146    }
147    output_parts.push(String::new());
148
149    for (path, mode) in &selections {
150        let result = crate::tools::ctx_read::handle(cache, path, mode, crp_mode);
151        output_parts.push(result);
152        output_parts.push("---".to_string());
153    }
154
155    let skipped = candidates.len() - selections.len();
156    if skipped > 0 {
157        output_parts.push(format!("{skipped} files skipped (budget exhausted)"));
158    }
159    output_parts.push(format!("\nUsed: {used_tokens}/{budget} tokens"));
160
161    output_parts.join("\n")
162}
163
164fn select_best_fit(candidate: &FileCandidate, remaining: usize) -> (String, usize) {
165    if candidate.tokens_full <= remaining {
166        return ("full".to_string(), candidate.tokens_full);
167    }
168    if candidate.tokens_map <= remaining {
169        return ("map".to_string(), candidate.tokens_map);
170    }
171    if candidate.tokens_sig <= remaining {
172        return ("signatures".to_string(), candidate.tokens_sig);
173    }
174    ("signatures".to_string(), candidate.tokens_sig)
175}
176
177fn compute_relevance_score(path: &str, content: &str) -> f64 {
178    let mut score = 1.0;
179
180    let name = Path::new(path)
181        .file_name()
182        .and_then(|n| n.to_str())
183        .unwrap_or("");
184    if name.contains("test") || name.contains("spec") {
185        score *= 0.5;
186    }
187    if name.contains("config") || name.contains("types") || name.contains("schema") {
188        score *= 1.3;
189    }
190    if name == "mod.rs" || name == "index.ts" || name == "index.js" || name == "__init__.py" {
191        score *= 1.5;
192    }
193
194    let ext = Path::new(path)
195        .extension()
196        .and_then(|e| e.to_str())
197        .unwrap_or("");
198    if matches!(ext, "rs" | "ts" | "py" | "go" | "java") {
199        score *= 1.2;
200    }
201
202    let lines = content.lines().count();
203    if lines > 500 {
204        score *= 0.8;
205    }
206    if lines < 50 {
207        score *= 1.1;
208    }
209
210    let export_count = content
211        .lines()
212        .filter(|l| l.contains("pub ") || l.contains("export ") || l.contains("def "))
213        .count();
214    score *= 1.0 + (export_count as f64 * 0.02).min(0.5);
215
216    score
217}
218
219fn load_pagerank_scores(paths: &[String]) -> std::collections::HashMap<String, f64> {
220    let root = paths
221        .first()
222        .and_then(|p| crate::core::protocol::detect_project_root(p));
223
224    let Some(root) = root else {
225        return std::collections::HashMap::new();
226    };
227
228    let Ok(graph) = crate::core::property_graph::CodeGraph::open(&root) else {
229        return std::collections::HashMap::new();
230    };
231
232    if graph.node_count().unwrap_or(0) == 0 {
233        return std::collections::HashMap::new();
234    }
235
236    let top = crate::core::pagerank::top_files(graph.connection(), 200);
237    top.into_iter().collect()
238}
239
240fn format_map(content: &str, ext: &str, sigs: &[crate::core::signatures::Signature]) -> String {
241    let deps = crate::core::deps::extract_deps(content, ext);
242    let mut parts = Vec::new();
243    if !deps.imports.is_empty() {
244        parts.push(format!("deps: {}", deps.imports.join(", ")));
245    }
246    if !deps.exports.is_empty() {
247        parts.push(format!("exports: {}", deps.exports.join(", ")));
248    }
249    let key_sigs: Vec<_> = sigs
250        .iter()
251        .filter(|s| s.is_exported || s.indent == 0)
252        .collect();
253    for sig in &key_sigs {
254        parts.push(sig.to_compact());
255    }
256    parts.join("\n")
257}