1use std::collections::HashSet;
16
17#[derive(Debug, Clone)]
18pub struct SemanticChunk {
19 pub lines: Vec<String>,
20 pub kind: ChunkKind,
21 pub relevance: f64,
22 pub start_line: usize,
23 pub identifier: Option<String>,
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum ChunkKind {
28 Imports,
29 TypeDefinition,
30 FunctionDef,
31 Logic,
32 Empty,
33}
34
35pub fn detect_chunks(content: &str) -> Vec<SemanticChunk> {
37 let lines: Vec<&str> = content.lines().collect();
38 if lines.is_empty() {
39 return Vec::new();
40 }
41
42 let mut chunks: Vec<SemanticChunk> = Vec::new();
43 let mut current_lines: Vec<String> = Vec::new();
44 let mut current_kind = ChunkKind::Empty;
45 let mut current_start = 0;
46 let mut current_ident: Option<String> = None;
47 let mut brace_depth: i32 = 0;
48 let mut in_block = false;
49
50 for (i, &line) in lines.iter().enumerate() {
51 let trimmed = line.trim();
52 let line_kind = classify_line(trimmed);
53
54 let opens = trimmed.matches('{').count() as i32;
55 let closes = trimmed.matches('}').count() as i32;
56
57 if !in_block && is_block_start(trimmed) {
58 if !current_lines.is_empty() {
59 chunks.push(SemanticChunk {
60 lines: current_lines.clone(),
61 kind: current_kind,
62 relevance: 0.0,
63 start_line: current_start,
64 identifier: current_ident.take(),
65 });
66 current_lines.clear();
67 }
68 current_start = i;
69 current_kind = line_kind;
70 current_ident = extract_identifier(trimmed);
71 in_block = opens > closes;
72 brace_depth = opens - closes;
73 current_lines.push(line.to_string());
74 continue;
75 }
76
77 if in_block {
78 brace_depth += opens - closes;
79 current_lines.push(line.to_string());
80 if brace_depth <= 0 {
81 in_block = false;
82 chunks.push(SemanticChunk {
83 lines: current_lines.clone(),
84 kind: current_kind,
85 relevance: 0.0,
86 start_line: current_start,
87 identifier: current_ident.take(),
88 });
89 current_lines.clear();
90 }
91 continue;
92 }
93
94 let is_boundary =
96 trimmed.is_empty() || (line_kind != current_kind && !current_lines.is_empty());
97
98 if is_boundary && !current_lines.is_empty() {
99 chunks.push(SemanticChunk {
100 lines: current_lines.clone(),
101 kind: current_kind,
102 relevance: 0.0,
103 start_line: current_start,
104 identifier: current_ident.take(),
105 });
106 current_lines.clear();
107 }
108
109 if !trimmed.is_empty() {
110 if current_lines.is_empty() {
111 current_start = i;
112 current_kind = line_kind;
113 }
114 current_lines.push(line.to_string());
115 }
116 }
117
118 if !current_lines.is_empty() {
119 chunks.push(SemanticChunk {
120 lines: current_lines,
121 kind: current_kind,
122 relevance: 0.0,
123 start_line: current_start,
124 identifier: current_ident,
125 });
126 }
127
128 chunks
129}
130
131pub fn order_for_attention(
133 mut chunks: Vec<SemanticChunk>,
134 task_keywords: &[String],
135) -> Vec<SemanticChunk> {
136 if chunks.is_empty() {
137 return chunks;
138 }
139
140 let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
141
142 for chunk in &mut chunks {
144 let text = chunk.lines.join(" ").to_lowercase();
145 let keyword_score: f64 = kw_lower
146 .iter()
147 .filter(|kw| text.contains(kw.as_str()))
148 .count() as f64;
149
150 let kind_weight = match chunk.kind {
151 ChunkKind::FunctionDef => 2.0,
152 ChunkKind::TypeDefinition => 1.8,
153 ChunkKind::Imports => 1.0,
154 ChunkKind::Logic => 0.8,
155 ChunkKind::Empty => 0.1,
156 };
157
158 let size_factor = (chunk.lines.len() as f64 / 5.0).min(1.5);
159
160 chunk.relevance = keyword_score * 2.0 + kind_weight + size_factor * 0.3;
161 }
162
163 chunks.sort_by(|a, b| {
165 b.relevance
166 .partial_cmp(&a.relevance)
167 .unwrap_or(std::cmp::Ordering::Equal)
168 });
169
170 if chunks.len() <= 2 {
171 return chunks;
172 }
173
174 let primary = &chunks[0];
176 let primary_tokens: HashSet<String> = primary
177 .lines
178 .iter()
179 .flat_map(|l| l.split_whitespace().map(|w| w.to_lowercase()))
180 .collect();
181
182 let (mut deps, mut rest): (Vec<_>, Vec<_>) = chunks[1..].iter().cloned().partition(|chunk| {
183 if chunk.kind == ChunkKind::Imports || chunk.kind == ChunkKind::TypeDefinition {
184 let chunk_tokens: HashSet<String> = chunk
185 .lines
186 .iter()
187 .flat_map(|l| l.split_whitespace().map(|w| w.to_lowercase()))
188 .collect();
189 let overlap = primary_tokens.intersection(&chunk_tokens).count();
190 overlap >= 2
191 } else {
192 false
193 }
194 });
195
196 deps.sort_by(|a, b| {
197 b.relevance
198 .partial_cmp(&a.relevance)
199 .unwrap_or(std::cmp::Ordering::Equal)
200 });
201 rest.sort_by(|a, b| {
202 b.relevance
203 .partial_cmp(&a.relevance)
204 .unwrap_or(std::cmp::Ordering::Equal)
205 });
206
207 let mut ordered = Vec::with_capacity(chunks.len());
208 ordered.push(chunks[0].clone());
209 ordered.extend(deps);
210 ordered.extend(rest);
211
212 ordered
213}
214
215pub fn render_with_bridges(chunks: &[SemanticChunk]) -> String {
217 if chunks.is_empty() {
218 return String::new();
219 }
220
221 let mut output = Vec::new();
222
223 for (i, chunk) in chunks.iter().enumerate() {
224 if i > 0 {
225 output.push(String::new());
226 }
227 for line in &chunk.lines {
228 output.push(line.clone());
229 }
230 }
231
232 if chunks.len() > 2 {
234 if let Some(ref ident) = chunks[0].identifier {
235 output.push(String::new());
236 output.push(format!("[primary: {ident}]"));
237 }
238 }
239
240 output.join("\n")
241}
242
243fn classify_line(trimmed: &str) -> ChunkKind {
244 if trimmed.is_empty() {
245 return ChunkKind::Empty;
246 }
247 if is_import(trimmed) {
248 return ChunkKind::Imports;
249 }
250 if is_type_def(trimmed) {
251 return ChunkKind::TypeDefinition;
252 }
253 if is_fn_start(trimmed) {
254 return ChunkKind::FunctionDef;
255 }
256 ChunkKind::Logic
257}
258
259fn is_block_start(trimmed: &str) -> bool {
260 is_fn_start(trimmed) || is_type_def(trimmed)
261}
262
263fn is_fn_start(line: &str) -> bool {
264 let starters = [
265 "fn ",
266 "pub fn ",
267 "async fn ",
268 "pub async fn ",
269 "function ",
270 "export function ",
271 "async function ",
272 "def ",
273 "async def ",
274 "func ",
275 "pub(crate) fn ",
276 "pub(super) fn ",
277 ];
278 starters.iter().any(|s| line.starts_with(s))
279}
280
281fn is_type_def(line: &str) -> bool {
282 let starters = [
283 "struct ",
284 "pub struct ",
285 "enum ",
286 "pub enum ",
287 "trait ",
288 "pub trait ",
289 "type ",
290 "pub type ",
291 "interface ",
292 "export interface ",
293 "class ",
294 "export class ",
295 ];
296 starters.iter().any(|s| line.starts_with(s))
297}
298
299fn is_import(line: &str) -> bool {
300 line.starts_with("use ")
301 || line.starts_with("import ")
302 || line.starts_with("from ")
303 || line.starts_with("#include")
304}
305
306fn extract_identifier(line: &str) -> Option<String> {
307 let cleaned = line
308 .replace("pub ", "")
309 .replace("async ", "")
310 .replace("export ", "");
311 let trimmed = cleaned.trim();
312
313 for prefix in &[
314 "fn ",
315 "struct ",
316 "enum ",
317 "trait ",
318 "type ",
319 "class ",
320 "interface ",
321 "function ",
322 "def ",
323 "func ",
324 ] {
325 if let Some(rest) = trimmed.strip_prefix(prefix) {
326 let name: String = rest
327 .chars()
328 .take_while(|c| c.is_alphanumeric() || *c == '_')
329 .collect();
330 if !name.is_empty() {
331 return Some(name);
332 }
333 }
334 }
335 None
336}
337
338#[cfg(test)]
339mod tests {
340 use super::*;
341
342 #[test]
343 fn detect_chunks_basic() {
344 let content = "use std::io;\nuse std::fs;\n\nfn main() {\n let x = 1;\n}\n\nfn helper() {\n let y = 2;\n}";
345 let chunks = detect_chunks(content);
346 assert!(
347 chunks.len() >= 2,
348 "should detect multiple chunks, got {}",
349 chunks.len()
350 );
351 }
352
353 #[test]
354 fn detect_chunks_identifies_functions() {
355 let content = "fn main() {\n println!(\"hello\");\n}";
356 let chunks = detect_chunks(content);
357 assert!(
358 chunks.iter().any(|c| c.kind == ChunkKind::FunctionDef),
359 "should detect function definition"
360 );
361 }
362
363 #[test]
364 fn order_puts_relevant_first() {
365 let content =
366 "fn unrelated() {\n let x = 1;\n}\n\nfn validate_token() {\n check();\n}";
367 let chunks = detect_chunks(content);
368 let ordered = order_for_attention(chunks, &["validate".to_string()]);
369 assert!(
370 ordered[0].identifier.as_deref() == Some("validate_token"),
371 "most relevant chunk should be first"
372 );
373 }
374
375 #[test]
376 fn render_with_bridges_adds_anchor() {
377 let chunks = vec![
378 SemanticChunk {
379 lines: vec!["fn main() {".into(), " let x = 1;".into(), "}".into()],
380 kind: ChunkKind::FunctionDef,
381 relevance: 5.0,
382 start_line: 0,
383 identifier: Some("main".into()),
384 },
385 SemanticChunk {
386 lines: vec!["use std::io;".into()],
387 kind: ChunkKind::Imports,
388 relevance: 1.0,
389 start_line: 5,
390 identifier: None,
391 },
392 SemanticChunk {
393 lines: vec!["fn helper() {".into(), "}".into()],
394 kind: ChunkKind::FunctionDef,
395 relevance: 0.5,
396 start_line: 8,
397 identifier: Some("helper".into()),
398 },
399 ];
400 let result = render_with_bridges(&chunks);
401 assert!(
402 result.contains("[primary: main]"),
403 "should have tail anchor"
404 );
405 }
406
407 #[test]
408 fn extract_identifier_fn() {
409 assert_eq!(
410 extract_identifier("pub fn validate_token() {"),
411 Some("validate_token".into())
412 );
413 assert_eq!(extract_identifier("struct Config {"), Some("Config".into()));
414 assert_eq!(extract_identifier("let x = 1;"), None);
415 }
416}