lean_ctx/tools/
ctx_fill.rs1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::signatures;
5use crate::core::tokens::count_tokens;
6use crate::tools::CrpMode;
7
8struct FileCandidate {
9 path: String,
10 score: f64,
11 tokens_full: usize,
12 tokens_map: usize,
13 tokens_sig: usize,
14}
15
16pub fn handle(
17 cache: &mut SessionCache,
18 paths: &[String],
19 budget: usize,
20 crp_mode: CrpMode,
21 task: Option<&str>,
22) -> String {
23 if paths.is_empty() {
24 return "No files specified.".to_string();
25 }
26
27 let pagerank_scores = load_pagerank_scores(paths);
28 let mut candidates: Vec<FileCandidate> = Vec::new();
29
30 for path in paths {
31 let Ok(content) = std::fs::read_to_string(path) else {
32 continue;
33 };
34
35 let ext = Path::new(path)
36 .extension()
37 .and_then(|e| e.to_str())
38 .unwrap_or("");
39 let tokens_full = count_tokens(&content);
40 let sigs = signatures::extract_signatures(&content, ext);
41 let sig_text: String = sigs
42 .iter()
43 .map(super::super::core::signatures::Signature::to_compact)
44 .collect::<Vec<_>>()
45 .join("\n");
46 let tokens_sig = count_tokens(&sig_text);
47
48 let map_text = format_map(&content, ext, &sigs);
49 let tokens_map = count_tokens(&map_text);
50
51 let mut score = compute_relevance_score(path, &content);
52 if let Some(pr_boost) = pagerank_scores.get(path) {
53 score *= 1.0 + pr_boost * 5.0;
54 }
55
56 candidates.push(FileCandidate {
57 path: path.clone(),
58 score,
59 tokens_full,
60 tokens_map,
61 tokens_sig,
62 });
63 }
64
65 candidates.sort_by(|a, b| {
66 b.score
67 .partial_cmp(&a.score)
68 .unwrap_or(std::cmp::Ordering::Equal)
69 });
70
71 let mut pop_lines: Vec<String> = Vec::new();
72 if let Some(t) = task {
73 if let Some(root) = paths
74 .first()
75 .and_then(|p| crate::core::protocol::detect_project_root(p))
76 {
77 let rs: Vec<crate::core::task_relevance::RelevanceScore> = candidates
78 .iter()
79 .map(|c| crate::core::task_relevance::RelevanceScore {
80 path: c.path.clone(),
81 score: c.score,
82 recommended_mode: "signatures",
83 })
84 .collect();
85 let refs: Vec<&crate::core::task_relevance::RelevanceScore> = rs.iter().collect();
86 let pop = crate::core::pop_pruning::decide_for_candidates(t, &root, &refs);
87 if !pop.excluded_modules.is_empty() {
88 let excluded: std::collections::BTreeSet<&str> = pop
89 .excluded_modules
90 .iter()
91 .map(|e| e.module.as_str())
92 .collect();
93 candidates.retain(|c| {
94 let m = crate::core::pop_pruning::module_for_path(&c.path, &root);
95 !excluded.contains(m.as_str())
96 });
97 pop_lines.push("POP:".to_string());
98 for ex in &pop.excluded_modules {
99 pop_lines.push(format!(
100 " - exclude {}/ ({} candidates) — {}",
101 ex.module, ex.candidate_files, ex.reason
102 ));
103 }
104 }
105 }
106 }
107
108 let mut used_tokens = 0usize;
109 let mut selections: Vec<(String, String)> = Vec::new();
110
111 for candidate in &candidates {
112 if used_tokens >= budget {
113 break;
114 }
115
116 if crate::tools::ctx_read::is_instruction_file(&candidate.path) {
117 selections.push((candidate.path.clone(), "full".to_string()));
118 used_tokens += candidate.tokens_full;
119 continue;
120 }
121
122 let remaining = budget - used_tokens;
123 let (mode, cost) = select_best_fit(candidate, remaining);
124
125 if cost > remaining {
126 let sig_cost = candidate.tokens_sig;
127 if sig_cost <= remaining {
128 selections.push((candidate.path.clone(), "signatures".to_string()));
129 used_tokens += sig_cost;
130 }
131 continue;
132 }
133
134 selections.push((candidate.path.clone(), mode));
135 used_tokens += cost;
136 }
137
138 let mut output_parts = Vec::new();
139 output_parts.push(format!(
140 "ctx_fill: {budget} token budget, {} files analyzed, {} selected",
141 candidates.len(),
142 selections.len()
143 ));
144 if !pop_lines.is_empty() {
145 output_parts.push(pop_lines.join("\n"));
146 }
147 output_parts.push(String::new());
148
149 for (path, mode) in &selections {
150 let result = crate::tools::ctx_read::handle(cache, path, mode, crp_mode);
151 output_parts.push(result);
152 output_parts.push("---".to_string());
153 }
154
155 let skipped = candidates.len() - selections.len();
156 if skipped > 0 {
157 output_parts.push(format!("{skipped} files skipped (budget exhausted)"));
158 }
159 output_parts.push(format!("\nUsed: {used_tokens}/{budget} tokens"));
160
161 output_parts.join("\n")
162}
163
164fn select_best_fit(candidate: &FileCandidate, remaining: usize) -> (String, usize) {
165 if candidate.tokens_full <= remaining {
166 return ("full".to_string(), candidate.tokens_full);
167 }
168 if candidate.tokens_map <= remaining {
169 return ("map".to_string(), candidate.tokens_map);
170 }
171 if candidate.tokens_sig <= remaining {
172 return ("signatures".to_string(), candidate.tokens_sig);
173 }
174 ("signatures".to_string(), candidate.tokens_sig)
175}
176
177fn compute_relevance_score(path: &str, content: &str) -> f64 {
178 let mut score = 1.0;
179
180 let name = Path::new(path)
181 .file_name()
182 .and_then(|n| n.to_str())
183 .unwrap_or("");
184 if name.contains("test") || name.contains("spec") {
185 score *= 0.5;
186 }
187 if name.contains("config") || name.contains("types") || name.contains("schema") {
188 score *= 1.3;
189 }
190 if name == "mod.rs" || name == "index.ts" || name == "index.js" || name == "__init__.py" {
191 score *= 1.5;
192 }
193
194 let ext = Path::new(path)
195 .extension()
196 .and_then(|e| e.to_str())
197 .unwrap_or("");
198 if matches!(ext, "rs" | "ts" | "py" | "go" | "java") {
199 score *= 1.2;
200 }
201
202 let lines = content.lines().count();
203 if lines > 500 {
204 score *= 0.8;
205 }
206 if lines < 50 {
207 score *= 1.1;
208 }
209
210 let export_count = content
211 .lines()
212 .filter(|l| l.contains("pub ") || l.contains("export ") || l.contains("def "))
213 .count();
214 score *= 1.0 + (export_count as f64 * 0.02).min(0.5);
215
216 score
217}
218
219fn load_pagerank_scores(paths: &[String]) -> std::collections::HashMap<String, f64> {
220 let root = paths
221 .first()
222 .and_then(|p| crate::core::protocol::detect_project_root(p));
223
224 let Some(root) = root else {
225 return std::collections::HashMap::new();
226 };
227
228 let Ok(graph) = crate::core::property_graph::CodeGraph::open(&root) else {
229 return std::collections::HashMap::new();
230 };
231
232 if graph.node_count().unwrap_or(0) == 0 {
233 return std::collections::HashMap::new();
234 }
235
236 let top = crate::core::pagerank::top_files(graph.connection(), 200);
237 top.into_iter().collect()
238}
239
240fn format_map(content: &str, ext: &str, sigs: &[crate::core::signatures::Signature]) -> String {
241 let deps = crate::core::deps::extract_deps(content, ext);
242 let mut parts = Vec::new();
243 if !deps.imports.is_empty() {
244 parts.push(format!("deps: {}", deps.imports.join(", ")));
245 }
246 if !deps.exports.is_empty() {
247 parts.push(format!("exports: {}", deps.exports.join(", ")));
248 }
249 let key_sigs: Vec<_> = sigs
250 .iter()
251 .filter(|s| s.is_exported || s.indent == 0)
252 .collect();
253 for sig in &key_sigs {
254 parts.push(sig.to_compact());
255 }
256 parts.join("\n")
257}