1use anyhow::{Context, Result};
2use dashmap::DashMap;
3use once_cell::sync::Lazy;
4use std::collections::hash_map::DefaultHasher;
5use std::collections::{HashMap, HashSet};
6use std::hash::{Hash, Hasher};
7use tree_sitter::{Node, Parser as TSParser};
8
9use probe_code::language::factory::get_language_impl;
10use probe_code::language::language_trait::LanguageImpl;
11use probe_code::language::tree_cache;
12use probe_code::models::CodeBlock;
13
14static LINE_MAP_CACHE: Lazy<DashMap<String, Vec<Option<CachedNodeInfo>>>> = Lazy::new(DashMap::new);
16
17fn calculate_content_hash(content: &str) -> u64 {
19 let mut hasher = DefaultHasher::new();
20 content.hash(&mut hasher);
21 hasher.finish()
22}
23
24#[derive(Clone, Debug)]
26struct CachedNodeInfo {
27 start_byte: usize,
29 end_byte: usize,
30 start_row: usize,
31 end_row: usize,
32 node_kind: String,
33 is_comment: bool,
34 is_test: bool, original_node_is_acceptable: bool, context_node_bytes: Option<(usize, usize)>,
38 context_node_rows: Option<(usize, usize)>,
39 context_node_kind: Option<String>,
40 context_node_is_test: Option<bool>, parent_node_type: Option<String>,
44 parent_start_row: Option<usize>,
45 parent_end_row: Option<usize>,
46 }
54
55impl CachedNodeInfo {
56 fn from_node_info(
58 info: &NodeInfo<'_>,
59 language_impl: &dyn LanguageImpl,
60 content: &[u8],
61 allow_tests: bool,
62 ) -> Self {
63 let mut rep_node = info.node; if info.is_comment {
68 if let Some(ctx) = info.context_node {
69 if !allow_tests && !language_impl.is_test_node(&ctx, content) {
70 rep_node = ctx; }
73 }
74 } else if !info.is_test {
75 if let Some(ctx) = info.context_node {
76 if !allow_tests && !language_impl.is_test_node(&ctx, content) {
77 rep_node = ctx; }
79 }
80 }
81 let parent_info = if rep_node.kind() == "struct_type" {
85 language_impl
86 .find_parent_function(rep_node)
87 .map(|parent_node| {
88 let parent_type = parent_node.kind().to_string();
89 let parent_start = parent_node.start_position().row;
90 let parent_end = parent_node.end_position().row;
91 (parent_type, parent_start, parent_end)
92 })
93 } else {
94 None
95 };
96
97 let original_acceptable = language_impl.is_acceptable_parent(&info.node);
99
100 let context_test = info
102 .context_node
103 .map(|ctx| language_impl.is_test_node(&ctx, content));
104
105 CachedNodeInfo {
106 start_byte: info.node.start_byte(),
108 end_byte: info.node.end_byte(),
109 start_row: info.node.start_position().row,
110 end_row: info.node.end_position().row,
111 node_kind: info.node.kind().to_string(),
112 is_comment: info.is_comment,
113 is_test: info.is_test, original_node_is_acceptable: original_acceptable,
115 context_node_bytes: info.context_node.map(|n| (n.start_byte(), n.end_byte())),
117 context_node_rows: info
118 .context_node
119 .map(|n| (n.start_position().row, n.end_position().row)),
120 context_node_kind: info.context_node.map(|n| n.kind().to_string()),
121 context_node_is_test: context_test, parent_node_type: parent_info.as_ref().map(|(t, _, _)| t.clone()),
125 parent_start_row: parent_info.as_ref().map(|(_, s, _)| *s),
126 parent_end_row: parent_info.as_ref().map(|(_, _, e)| *e),
127 }
135 }
136}
137
138#[derive(Clone, Copy)]
140struct NodeInfo<'a> {
141 node: Node<'a>,
142 is_comment: bool,
143 context_node: Option<Node<'a>>, is_test: bool,
145 specificity: usize,
146}
147
148fn should_update_line_map<'a>(
150 line_map: &[Option<NodeInfo<'a>>],
151 line: usize,
152 node: Node<'a>,
153 is_comment: bool,
154 context_node: Option<Node<'a>>,
155 specificity: usize,
156) -> bool {
157 match &line_map[line] {
158 None => true, Some(current) => {
160 if current.is_comment && current.context_node.is_some() {
163 if let Some(ctx) = current.context_node {
164 if ctx.id() == node.id() {
165 return false;
166 }
167 }
168 }
169
170 if is_comment && context_node.is_some() {
173 if let Some(ctx) = context_node {
174 if ctx.id() == current.node.id() {
175 return true;
176 }
177 }
178 }
179
180 specificity < current.specificity
182 }
183 }
184}
185
186fn find_prev_sibling(node: Node<'_>) -> Option<Node<'_>> {
188 let parent = node.parent()?;
189
190 let mut cursor = parent.walk();
191 let mut prev_child = None;
192
193 for child in parent.children(&mut cursor) {
194 if child.id() == node.id() {
195 return prev_child;
196 }
197 prev_child = Some(child);
198 }
199
200 None }
202
203fn find_acceptable_child<'a>(node: Node<'a>, language_impl: &dyn LanguageImpl) -> Option<Node<'a>> {
205 let mut cursor = node.walk();
206 for child in node.children(&mut cursor) {
207 if language_impl.is_acceptable_parent(&child) {
208 return Some(child);
209 }
210
211 if let Some(acceptable) = find_acceptable_child(child, language_impl) {
213 return Some(acceptable);
214 }
215 }
216
217 None }
219
220fn find_immediate_next_node(node: Node<'_>) -> Option<Node<'_>> {
222 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
223
224 if let Some(next) = node.next_sibling() {
226 if debug_mode {
227 println!(
228 "DEBUG: Found immediate next sibling: type='{}', lines={}-{}",
229 next.kind(),
230 next.start_position().row + 1,
231 next.end_position().row + 1
232 );
233 }
234 return Some(next);
235 }
236
237 if let Some(parent) = node.parent() {
239 if let Some(next_parent) = parent.next_sibling() {
240 if debug_mode {
241 println!(
242 "DEBUG: Found parent's next sibling: type='{}', lines={}-{}",
243 next_parent.kind(),
244 next_parent.start_position().row + 1,
245 next_parent.end_position().row + 1
246 );
247 }
248 return Some(next_parent);
249 }
250 }
251
252 if debug_mode {
253 println!("DEBUG: No immediate next node found");
254 }
255 None
256}
257
258fn find_comment_context_node<'a>(
261 comment_node: Node<'a>,
262 language_impl: &dyn LanguageImpl,
263 debug_mode: bool,
264) -> Option<Node<'a>> {
265 let start_row = comment_node.start_position().row;
266
267 if debug_mode {
268 println!(
269 "DEBUG: Finding context for comment at lines {}-{}: {}",
270 comment_node.start_position().row + 1,
271 comment_node.end_position().row + 1,
272 comment_node.kind()
273 );
274 }
275
276 let mut current_sibling = comment_node.next_sibling();
278
279 while let Some(sibling) = current_sibling {
281 if sibling.kind() == "comment"
282 || sibling.kind() == "line_comment"
283 || sibling.kind() == "block_comment"
284 || sibling.kind() == "doc_comment"
285 || sibling.kind() == "//"
286 {
287 current_sibling = sibling.next_sibling();
289 continue;
290 }
291
292 if language_impl.is_acceptable_parent(&sibling) {
294 if debug_mode {
295 println!(
296 "DEBUG: Found next non-comment sibling for comment at line {}: type='{}', lines={}-{}",
297 start_row + 1,
298 sibling.kind(),
299 sibling.start_position().row + 1,
300 sibling.end_position().row + 1
301 );
302 }
303 return Some(sibling);
304 } else {
305 if let Some(child) = find_acceptable_child(sibling, language_impl) {
307 if debug_mode {
308 println!(
309 "DEBUG: Found acceptable child in next non-comment sibling for comment at line {}: type='{}', lines={}-{}",
310 start_row + 1,
311 child.kind(),
312 child.start_position().row + 1,
313 child.end_position().row + 1
314 );
315 }
316 return Some(child);
317 }
318 }
319
320 current_sibling = sibling.next_sibling();
322 }
323
324 let has_next_sibling = comment_node.next_sibling().is_some();
328
329 if !has_next_sibling {
330 if let Some(prev_sibling) = find_prev_sibling(comment_node) {
331 if language_impl.is_acceptable_parent(&prev_sibling) {
332 if debug_mode {
333 println!(
334 "DEBUG: Found previous sibling for comment at line {}: type='{}', lines={}-{}",
335 start_row + 1,
336 prev_sibling.kind(),
337 prev_sibling.start_position().row + 1,
338 prev_sibling.end_position().row + 1
339 );
340 }
341 return Some(prev_sibling);
342 } else {
343 if let Some(child) = find_acceptable_child(prev_sibling, language_impl) {
345 if debug_mode {
346 println!(
347 "DEBUG: Found acceptable child in previous sibling for comment at line {}: type='{}', lines={}-{}",
348 start_row + 1,
349 child.kind(),
350 child.start_position().row + 1,
351 child.end_position().row + 1
352 );
353 }
354 return Some(child);
355 }
356 }
357 }
358 }
359
360 let mut current = comment_node;
362 while let Some(parent) = current.parent() {
363 if language_impl.is_acceptable_parent(&parent) {
364 if debug_mode {
365 println!(
366 "DEBUG: Found parent for comment at line {}: type='{}', lines={}-{}",
367 start_row + 1,
368 parent.kind(),
369 parent.start_position().row + 1,
370 parent.end_position().row + 1
371 );
372 }
373 return Some(parent);
374 }
375 current = parent;
376 }
377
378 if let Some(next_node) = find_immediate_next_node(comment_node) {
380 if language_impl.is_acceptable_parent(&next_node) {
381 if debug_mode {
382 println!(
383 "DEBUG: Using immediate next acceptable node: type='{}', lines={}-{}",
384 next_node.kind(),
385 next_node.start_position().row + 1,
386 next_node.end_position().row + 1
387 );
388 }
389 return Some(next_node);
390 }
391
392 if let Some(child) = find_acceptable_child(next_node, language_impl) {
394 if debug_mode {
395 println!(
396 "DEBUG: Found acceptable child in next node: type='{}', lines={}-{}",
397 child.kind(),
398 child.start_position().row + 1,
399 child.end_position().row + 1
400 );
401 }
402 return Some(child);
403 }
404 }
405
406 if debug_mode {
407 println!("DEBUG: No related node found for the comment");
408 }
409 None
410}
411
412#[allow(clippy::too_many_arguments)]
415fn process_node<'a>(
416 node: Node<'a>,
417 line_map: &mut Vec<Option<NodeInfo<'a>>>,
418 _extension: &str, language_impl: &dyn LanguageImpl,
420 content: &[u8],
421 allow_tests: bool,
422 debug_mode: bool,
423 current_ancestor: Option<Node<'a>>, ) {
425 let start_row = node.start_position().row;
426 let end_row = node.end_position().row;
427
428 if start_row >= line_map.len() {
430 return;
431 }
432
433 let is_comment = node.kind() == "comment"
435 || node.kind() == "line_comment"
436 || node.kind() == "block_comment"
437 || node.kind() == "doc_comment"
438 || node.kind() == "//"; let is_test = !allow_tests && language_impl.is_test_node(&node, content);
442
443 let line_coverage = end_row.saturating_sub(start_row) + 1;
445 let byte_coverage = node.end_byte().saturating_sub(node.start_byte());
446 let specificity = line_coverage * 1000 + (byte_coverage / 100); let context_node = if is_comment {
450 find_comment_context_node(node, language_impl, debug_mode)
453 } else {
454 if !language_impl.is_acceptable_parent(&node) {
457 current_ancestor
458 } else {
459 None }
461 };
462
463 let effective_end_row = std::cmp::min(end_row, line_map.len().saturating_sub(1));
466 for line in start_row..=effective_end_row {
467 let should_update =
469 should_update_line_map(line_map, line, node, is_comment, context_node, specificity);
470
471 if should_update {
472 line_map[line] = Some(NodeInfo {
474 node,
475 is_comment,
476 context_node, is_test, specificity,
479 });
480 }
481 }
482
483 let next_ancestor = if language_impl.is_acceptable_parent(&node) {
485 Some(node)
487 } else {
488 current_ancestor
490 };
491
492 let mut cursor = node.walk();
494 for child in node.children(&mut cursor) {
495 process_node(
496 child,
497 line_map,
498 _extension,
499 language_impl,
500 content,
501 allow_tests,
502 debug_mode,
503 next_ancestor, );
505 }
506}
507
508fn process_cached_line_map(
510 cached_line_map: &[Option<CachedNodeInfo>],
511 line_numbers: &HashSet<usize>,
512 _language_impl: &dyn LanguageImpl, _content: &str, allow_tests: bool,
515 debug_mode: bool,
516) -> Result<Vec<CodeBlock>> {
517 let mut code_blocks: Vec<CodeBlock> = Vec::new();
518 let mut seen_block_spans: HashSet<(usize, usize)> = HashSet::new();
520
521 for &line in line_numbers {
523 let line_idx = line.saturating_sub(1); if debug_mode {
526 println!("DEBUG: Processing line {line} from cache");
527 }
528
529 if line_idx >= cached_line_map.len() {
530 if debug_mode {
531 println!("DEBUG: Line {line} is out of bounds (Cache)");
532 }
533 continue;
534 }
535
536 if let Some(info) = &cached_line_map[line_idx] {
537 if debug_mode {
538 println!(
539 "DEBUG: Found cached node info for line {}: original_type='{}', original_lines={}-{}, is_comment={}, is_test={}, context_kind={:?}, context_lines={:?}",
540 line,
541 info.node_kind,
542 info.start_row + 1,
543 info.end_row + 1,
544 info.is_comment,
545 info.is_test,
546 info.context_node_kind,
547 info.context_node_rows.map(|(s, e)| (s + 1, e + 1))
548 );
549 }
550
551 let mut potential_block: Option<CodeBlock> = None;
553 let mut block_key: Option<(usize, usize)> = None; if info.is_comment {
559 if debug_mode {
560 println!("DEBUG: Cache: Handling comment node at line {line}");
561 }
562 if let (Some(ctx_rows), Some(ctx_bytes), Some(ctx_kind), Some(ctx_is_test)) = (
564 info.context_node_rows,
565 info.context_node_bytes,
566 &info.context_node_kind,
567 info.context_node_is_test,
568 ) {
569 if !allow_tests && ctx_is_test {
571 if debug_mode {
572 println!(
573 "DEBUG: Cache: Skipping test context node at lines {}-{}, type: {}",
574 ctx_rows.0 + 1,
575 ctx_rows.1 + 1,
576 ctx_kind
577 );
578 }
579 } else {
581 let merged_start_row = std::cmp::min(info.start_row, ctx_rows.0);
583 let merged_end_row = std::cmp::max(info.end_row, ctx_rows.1);
584 let merged_start_byte = std::cmp::min(info.start_byte, ctx_bytes.0);
585 let merged_end_byte = std::cmp::max(info.end_byte, ctx_bytes.1);
586
587 block_key = Some((merged_start_row, merged_end_row));
588 if !seen_block_spans.contains(&block_key.unwrap()) {
589 potential_block = Some(CodeBlock {
590 start_row: merged_start_row,
591 end_row: merged_end_row,
592 start_byte: merged_start_byte,
593 end_byte: merged_end_byte,
594 node_type: ctx_kind.clone(), parent_node_type: None, parent_start_row: None,
597 parent_end_row: None,
598 });
599 if debug_mode {
600 println!(
601 "DEBUG: Cache: Potential merged block (comment + context) at lines {}-{}, type: {}",
602 merged_start_row + 1, merged_end_row + 1, ctx_kind
603 );
604 }
605 }
606 if seen_block_spans.contains(&block_key.unwrap())
609 || potential_block.is_some()
610 {
611 if seen_block_spans.contains(&block_key.unwrap()) && debug_mode {
612 println!(
613 "DEBUG: Cache: Merged block span {}-{} already seen",
614 block_key.unwrap().0 + 1,
615 block_key.unwrap().1 + 1
616 );
617 }
618 seen_block_spans.insert(block_key.unwrap());
620 seen_block_spans.insert((info.start_row, info.end_row));
622 if let Some(block) = potential_block {
623 code_blocks.push(block);
624 }
625 continue; }
627 }
628 }
629
630 if potential_block.is_none() {
632 block_key = Some((info.start_row, info.end_row));
633 if !seen_block_spans.contains(&block_key.unwrap()) {
634 potential_block = Some(CodeBlock {
635 start_row: info.start_row,
636 end_row: info.end_row,
637 start_byte: info.start_byte,
638 end_byte: info.end_byte,
639 node_type: info.node_kind.clone(),
640 parent_node_type: None,
641 parent_start_row: None,
642 parent_end_row: None,
643 });
644 if debug_mode {
645 println!(
646 "DEBUG: Cache: Potential individual comment block at lines {}-{}",
647 info.start_row + 1,
648 info.end_row + 1
649 );
650 }
651 } else if debug_mode {
652 println!(
653 "DEBUG: Cache: Individual comment span {}-{} already seen",
654 block_key.unwrap().0 + 1,
655 block_key.unwrap().1 + 1
656 );
657 }
658 }
659 }
660 else {
662 if !allow_tests && info.is_test {
664 if debug_mode {
665 println!(
666 "DEBUG: Cache: Skipping original test node at lines {}-{}",
667 info.start_row + 1,
668 info.end_row + 1
669 );
670 }
671 continue; }
673
674 if let (Some(ctx_rows), Some(ctx_bytes), Some(ctx_kind), Some(ctx_is_test)) = (
676 info.context_node_rows,
677 info.context_node_bytes,
678 &info.context_node_kind,
679 info.context_node_is_test,
680 ) {
681 if !allow_tests && ctx_is_test {
683 if debug_mode {
684 println!(
685 "DEBUG: Cache: Skipping test context node (ancestor) at lines {}-{}",
686 ctx_rows.0 + 1, ctx_rows.1 + 1
687 );
688 }
689 } else {
691 block_key = Some((ctx_rows.0, ctx_rows.1));
693 if !seen_block_spans.contains(&block_key.unwrap()) {
694 potential_block = Some(CodeBlock {
695 start_row: ctx_rows.0,
696 end_row: ctx_rows.1,
697 start_byte: ctx_bytes.0,
698 end_byte: ctx_bytes.1,
699 node_type: ctx_kind.clone(),
700 parent_node_type: info.parent_node_type.clone(),
702 parent_start_row: info.parent_start_row,
703 parent_end_row: info.parent_end_row,
704 });
705 if debug_mode {
706 println!(
707 "DEBUG: Cache: Potential context node (ancestor) block at lines {}-{}",
708 ctx_rows.0 + 1, ctx_rows.1 + 1
709 );
710 }
711 } else if debug_mode {
712 println!(
713 "DEBUG: Cache: Context node span {}-{} already seen",
714 block_key.unwrap().0 + 1,
715 block_key.unwrap().1 + 1
716 );
717 }
718 if seen_block_spans.contains(&block_key.unwrap())
720 || potential_block.is_some()
721 {
722 seen_block_spans.insert(block_key.unwrap()); if let Some(block) = potential_block {
724 code_blocks.push(block);
725 }
726 continue; }
728 }
729 }
730
731 if potential_block.is_none() && info.original_node_is_acceptable {
734 block_key = Some((info.start_row, info.end_row));
735 if !seen_block_spans.contains(&block_key.unwrap()) {
736 potential_block = Some(CodeBlock {
737 start_row: info.start_row,
738 end_row: info.end_row,
739 start_byte: info.start_byte,
740 end_byte: info.end_byte,
741 node_type: info.node_kind.clone(),
742 parent_node_type: info.parent_node_type.clone(),
744 parent_start_row: info.parent_start_row,
745 parent_end_row: info.parent_end_row,
746 });
747 if debug_mode {
748 println!(
749 "DEBUG: Cache: Potential acceptable original node block at lines {}-{}",
750 info.start_row + 1, info.end_row + 1
751 );
752 }
753 } else if debug_mode {
754 println!(
755 "DEBUG: Cache: Original acceptable node span {}-{} already seen",
756 block_key.unwrap().0 + 1,
757 block_key.unwrap().1 + 1
758 );
759 }
760 }
761 }
762
763 if let (Some(block), Some(key)) = (potential_block, block_key) {
765 if seen_block_spans.insert(key) {
766 code_blocks.push(block);
768 }
769 }
770 } else if debug_mode {
771 println!("DEBUG: Cache: No cached node info found for line {line}");
772 }
773 }
774
775 code_blocks.sort_by_key(|block| block.start_row);
779
780 let mut final_code_blocks: Vec<CodeBlock> = Vec::new();
782
783 for block in code_blocks
785 .iter()
786 .filter(|b| b.node_type.contains("comment") || b.node_type == "/*" || b.node_type == "*/")
787 {
788 final_code_blocks.push(block.clone());
789 }
790
791 for block in code_blocks
793 .iter() .filter(|b| !b.node_type.contains("comment") && b.node_type != "/*" && b.node_type != "*/")
795 {
796 let mut should_add = true;
797 let mut blocks_to_remove: Vec<usize> = Vec::new();
798
799 let important_block_types = [
801 "function_declaration",
802 "method_declaration",
803 "function_item",
804 "impl_item",
805 "type_declaration",
806 "struct_item",
807 "block_comment", ];
809 let is_important = important_block_types.contains(&block.node_type.as_str());
810
811 for (idx, prev_block) in final_code_blocks.iter().enumerate() {
813 if prev_block.node_type.contains("comment")
814 || prev_block.node_type == "/*"
815 || prev_block.node_type == "*/"
816 {
817 continue; }
819
820 let prev_is_important = important_block_types.contains(&prev_block.node_type.as_str());
821
822 if block.start_row <= prev_block.end_row && block.end_row >= prev_block.start_row {
824 if block.start_row >= prev_block.start_row && block.end_row <= prev_block.end_row {
826 if debug_mode {
827 println!(
828 "DEBUG: Cache Dedupe: Current block contained: type='{}', lines={}-{} (in type='{}', lines={}-{})",
829 block.node_type, block.start_row + 1, block.end_row + 1,
830 prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
831 );
832 }
833 if is_important && !prev_is_important {
834 if debug_mode {
835 println!("DEBUG: Cache Dedupe: Keeping important contained block");
836 }
837 } else if !is_important && prev_is_important {
839 if debug_mode {
840 println!("DEBUG: Cache Dedupe: Skipping non-important contained block");
841 }
842 should_add = false;
843 break;
844 } else {
845 if debug_mode {
847 println!(
848 "DEBUG: Cache Dedupe: Replacing outer block with contained block"
849 );
850 }
851 blocks_to_remove.push(idx);
852 }
853 }
854 else if prev_block.start_row >= block.start_row
856 && prev_block.end_row <= block.end_row
857 {
858 if debug_mode {
859 println!(
860 "DEBUG: Cache Dedupe: Previous block contained: type='{}', lines={}-{} (contains type='{}', lines={}-{})",
861 block.node_type, block.start_row + 1, block.end_row + 1,
862 prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
863 );
864 }
865 if is_important && !prev_is_important {
866 if debug_mode {
867 println!("DEBUG: Cache Dedupe: Keeping important outer block");
868 }
869 } else if !is_important && prev_is_important {
871 if debug_mode {
872 println!("DEBUG: Cache Dedupe: Skipping non-important outer block");
873 }
874 should_add = false;
875 break;
876 } else {
877 if debug_mode {
879 println!("DEBUG: Cache Dedupe: Skipping outer block (already have contained)");
880 }
881 should_add = false;
882 break;
883 }
884 }
885 else {
887 if debug_mode {
888 println!(
889 "DEBUG: Cache Dedupe: Partial overlap: type='{}', lines={}-{} (overlaps type='{}', lines={}-{})",
890 block.node_type, block.start_row + 1, block.end_row + 1,
891 prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
892 );
893 }
894 should_add = false;
896 break;
897 }
898 }
899 }
900
901 for idx in blocks_to_remove.iter().rev() {
903 final_code_blocks.remove(*idx);
904 }
905
906 if should_add {
908 final_code_blocks.push(block.clone());
909 }
910 }
911
912 final_code_blocks.sort_by_key(|block| block.start_row);
914 Ok(final_code_blocks)
915} pub fn parse_file_for_code_blocks(
920 content: &str,
921 extension: &str,
922 line_numbers: &HashSet<usize>,
923 allow_tests: bool,
924 _term_matches: Option<&HashMap<usize, HashSet<usize>>>, ) -> Result<Vec<CodeBlock>> {
926 let language_impl = match get_language_impl(extension) {
928 Some(lang) => lang,
929 None => {
930 return Err(anyhow::anyhow!(format!(
931 "Unsupported file type: {}",
932 extension
933 )))
934 }
935 };
936
937 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
939
940 let content_hash = calculate_content_hash(content);
942 let cache_key = format!("{extension}_{content_hash}_{allow_tests}");
943
944 if let Some(cached_entry) = LINE_MAP_CACHE.get(&cache_key) {
946 if debug_mode {
947 println!("DEBUG: Cache hit for line_map key: {cache_key}");
948 }
949
950 return process_cached_line_map(
952 cached_entry.value(),
953 line_numbers,
954 language_impl.as_ref(),
955 content,
956 allow_tests,
957 debug_mode,
958 );
959 }
960
961 if debug_mode {
962 println!("DEBUG: Cache miss for line_map key: {cache_key}. Generating...");
963 }
964
965 let language = language_impl.get_tree_sitter_language();
967
968 let mut parser = TSParser::new();
970 parser.set_language(&language)?;
971
972 let tree_cache_key = format!("file_{extension}");
975 let tree = tree_cache::get_or_parse_tree(&tree_cache_key, content, &mut parser)
976 .context("Failed to parse the file")?;
977
978 let root_node = tree.root_node();
979
980 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
982
983 if debug_mode {
984 println!("DEBUG: Parsing file with extension: {extension}");
985 println!("DEBUG: Root node type: {}", root_node.kind());
986
987 let mut node_types = HashSet::new();
989 super::common::collect_node_types(root_node, &mut node_types);
990 println!("DEBUG: All node types in file: {node_types:?}");
991 }
992
993 let line_count = content.lines().count();
995 let mut line_map: Vec<Option<NodeInfo>> = vec![None; line_count];
996
997 if debug_mode {
999 println!("DEBUG: Building line-to-node map with a single traversal");
1000 }
1001
1002 if debug_mode {
1006 println!("DEBUG: Using sequential processing for AST nodes");
1007 }
1008
1009 process_node(
1011 root_node,
1012 &mut line_map,
1013 extension, language_impl.as_ref(),
1015 content.as_bytes(),
1016 allow_tests,
1017 debug_mode,
1018 None, );
1021
1022 if debug_mode {
1023 println!("DEBUG: Line-to-node map built successfully");
1024 }
1025
1026 let mut code_blocks: Vec<CodeBlock> = Vec::new();
1033 let mut seen_nodes: HashSet<(usize, usize)> = HashSet::new(); for &line in line_numbers {
1037 let line_idx = line.saturating_sub(1);
1039
1040 if debug_mode {
1041 println!("DEBUG: Processing line {line} (Live NodeInfo)");
1042 }
1043
1044 if line_idx >= line_map.len() {
1046 if debug_mode {
1047 println!("DEBUG: Line {line} is out of bounds (Live NodeInfo)");
1048 }
1049 continue;
1050 }
1051
1052 if let Some(info) = &line_map[line_idx] {
1054 if debug_mode {
1055 println!(
1056 "DEBUG: Found node for line {}: type='{}', lines={}-{}",
1057 line,
1058 info.node.kind(),
1059 info.node.start_position().row + 1,
1060 info.node.end_position().row + 1
1061 );
1062 }
1063 let target_node = info.node;
1064 let start_pos = target_node.start_position();
1065 let end_pos = target_node.end_position();
1066 let node_key = (start_pos.row, end_pos.row);
1068
1069 if seen_nodes.contains(&node_key) {
1071 if debug_mode {
1072 println!(
1073 "DEBUG: Already processed node at lines {}-{}, type: {}",
1074 start_pos.row + 1,
1075 end_pos.row + 1,
1076 target_node.kind()
1077 );
1078 }
1079 continue;
1080 }
1081
1082 seen_nodes.insert(node_key);
1084
1085 if info.is_comment {
1087 if debug_mode {
1088 println!(
1089 "DEBUG: Found comment node at line {}: {}",
1090 line,
1091 target_node.kind()
1092 );
1093 }
1094
1095 if let Some(context_node) = info.context_node {
1097 let rel_start_pos = context_node.start_position();
1098 let rel_end_pos = context_node.end_position();
1099 let rel_key = (rel_start_pos.row, rel_end_pos.row);
1100
1101 if !allow_tests && language_impl.is_test_node(&context_node, content.as_bytes())
1104 {
1105 if debug_mode {
1106 println!(
1107 "DEBUG: Skipping test context node at lines {}-{}, type: {}",
1108 rel_start_pos.row + 1,
1109 rel_end_pos.row + 1,
1110 context_node.kind()
1111 );
1112 }
1113 } else {
1114 let merged_start_row = std::cmp::min(start_pos.row, rel_start_pos.row);
1116 let merged_end_row = std::cmp::max(end_pos.row, rel_end_pos.row);
1117 let merged_start_byte =
1118 std::cmp::min(target_node.start_byte(), context_node.start_byte());
1119 let merged_end_byte =
1120 std::cmp::max(target_node.end_byte(), context_node.end_byte());
1121 let merged_node_type = context_node.kind().to_string();
1122
1123 seen_nodes.insert(rel_key); code_blocks.push(CodeBlock {
1126 start_row: merged_start_row,
1127 end_row: merged_end_row,
1128 start_byte: merged_start_byte,
1129 end_byte: merged_end_byte,
1130 node_type: merged_node_type.clone(),
1131 parent_node_type: None, parent_start_row: None,
1133 parent_end_row: None,
1134 });
1135
1136 if debug_mode {
1137 println!(
1138 "DEBUG: Added merged block (comment + context) at lines {}-{}, type: {}",
1139 merged_start_row + 1,
1140 merged_end_row + 1,
1141 merged_node_type
1142 );
1143 }
1144 continue; }
1146 }
1147
1148 code_blocks.push(CodeBlock {
1150 start_row: start_pos.row,
1151 end_row: end_pos.row,
1152 start_byte: target_node.start_byte(),
1153 end_byte: target_node.end_byte(),
1154 node_type: target_node.kind().to_string(),
1155 parent_node_type: None,
1156 parent_start_row: None,
1157 parent_end_row: None,
1158 });
1159 if debug_mode {
1160 println!(
1161 "DEBUG: Added individual comment block at lines {}-{}",
1162 start_pos.row + 1,
1163 end_pos.row + 1
1164 );
1165 }
1166 continue; }
1168
1169 if info.is_test {
1171 if debug_mode {
1173 println!(
1174 "DEBUG: Skipping test node at lines {}-{}",
1175 start_pos.row + 1,
1176 end_pos.row + 1
1177 );
1178 }
1179 continue;
1180 }
1181
1182 let mut existing_block = false;
1185 for block in &code_blocks {
1186 if line > block.start_row + 1 && line <= block.end_row + 1 {
1187 if debug_mode {
1188 println!(
1189 "DEBUG: Line {} is within existing block: type='{}', lines={}-{}",
1190 line,
1191 block.node_type,
1192 block.start_row + 1,
1193 block.end_row + 1
1194 );
1195 }
1196 existing_block = true;
1197 break;
1198 }
1199 }
1200 if existing_block {
1201 continue;
1202 }
1203
1204 if let Some(context_node) = info.context_node {
1206 let rel_start_pos = context_node.start_position();
1208 let rel_end_pos = context_node.end_position();
1209 let rel_key = (rel_start_pos.row, rel_end_pos.row);
1210
1211 if !allow_tests && language_impl.is_test_node(&context_node, content.as_bytes()) {
1213 if debug_mode {
1214 println!(
1215 "DEBUG: Skipping test context node (ancestor) at lines {}-{}",
1216 rel_start_pos.row + 1,
1217 rel_end_pos.row + 1
1218 );
1219 }
1220 } else {
1221 if debug_mode {
1222 println!(
1223 "DEBUG: Using context node (ancestor) at lines {}-{}",
1224 rel_start_pos.row + 1,
1225 rel_end_pos.row + 1
1226 );
1227 }
1228 seen_nodes.insert(rel_key); let parent_info = if context_node.kind() == "struct_type" {
1232 language_impl
1233 .find_parent_function(context_node)
1234 .map(|parent_node| {
1235 let parent_type = parent_node.kind().to_string();
1236 let parent_start = parent_node.start_position().row;
1237 let parent_end = parent_node.end_position().row;
1238 (parent_type, parent_start, parent_end)
1239 })
1240 } else {
1241 None
1242 };
1243
1244 code_blocks.push(CodeBlock {
1245 start_row: rel_start_pos.row,
1246 end_row: rel_end_pos.row,
1247 start_byte: context_node.start_byte(),
1248 end_byte: context_node.end_byte(),
1249 node_type: context_node.kind().to_string(),
1250 parent_node_type: parent_info.as_ref().map(|(t, _, _)| t.clone()),
1251 parent_start_row: parent_info.as_ref().map(|(_, s, _)| *s),
1252 parent_end_row: parent_info.as_ref().map(|(_, _, e)| *e),
1253 });
1254 continue; }
1256 }
1257
1258 if language_impl.is_acceptable_parent(&target_node) {
1260 if debug_mode {
1261 println!(
1262 "DEBUG: Adding acceptable parent node at lines {}-{}",
1263 start_pos.row + 1,
1264 end_pos.row + 1
1265 );
1266 }
1267
1268 let parent_info = if target_node.kind() == "struct_type" {
1270 language_impl
1271 .find_parent_function(target_node)
1272 .map(|parent_node| {
1273 let parent_type = parent_node.kind().to_string();
1274 let parent_start = parent_node.start_position().row;
1275 let parent_end = parent_node.end_position().row;
1276 (parent_type, parent_start, parent_end)
1277 })
1278 } else {
1279 None
1280 };
1281
1282 code_blocks.push(CodeBlock {
1283 start_row: start_pos.row,
1284 end_row: end_pos.row,
1285 start_byte: target_node.start_byte(),
1286 end_byte: target_node.end_byte(),
1287 node_type: target_node.kind().to_string(),
1288 parent_node_type: parent_info.as_ref().map(|(t, _, _)| t.clone()),
1289 parent_start_row: parent_info.as_ref().map(|(_, s, _)| *s),
1290 parent_end_row: parent_info.as_ref().map(|(_, _, e)| *e),
1291 });
1292 continue; }
1294
1295 if debug_mode {
1297 println!(
1298 "DEBUG: Adding node via fallback at lines {}-{}",
1299 start_pos.row + 1,
1300 end_pos.row + 1
1301 );
1302 }
1303
1304 let parent_info = if target_node.kind() == "struct_type" {
1306 language_impl
1307 .find_parent_function(target_node)
1308 .map(|parent_node| {
1309 let parent_type = parent_node.kind().to_string();
1310 let parent_start = parent_node.start_position().row;
1311 let parent_end = parent_node.end_position().row;
1312 (parent_type, parent_start, parent_end)
1313 })
1314 } else {
1315 None
1316 };
1317
1318 code_blocks.push(CodeBlock {
1319 start_row: start_pos.row,
1320 end_row: end_pos.row,
1321 start_byte: target_node.start_byte(),
1322 end_byte: target_node.end_byte(),
1323 node_type: target_node.kind().to_string(),
1324 parent_node_type: parent_info.as_ref().map(|(t, _, _)| t.clone()),
1325 parent_start_row: parent_info.as_ref().map(|(_, s, _)| *s),
1326 parent_end_row: parent_info.as_ref().map(|(_, _, e)| *e),
1327 });
1328 } else if debug_mode {
1329 println!("DEBUG: No node info found for line {line} (Live NodeInfo)");
1330 }
1331 } code_blocks.sort_by_key(|block| block.start_row);
1335
1336 let mut final_code_blocks: Vec<CodeBlock> = Vec::new();
1338
1339 for block in code_blocks
1341 .iter()
1342 .filter(|b| b.node_type.contains("comment") || b.node_type == "/*" || b.node_type == "*/")
1343 {
1344 final_code_blocks.push(block.clone());
1345 }
1346
1347 for block in code_blocks
1349 .iter()
1350 .filter(|b| !b.node_type.contains("comment") && b.node_type != "/*" && b.node_type != "*/")
1351 {
1352 let mut should_add = true;
1353 let mut blocks_to_remove: Vec<usize> = Vec::new();
1354
1355 let important_block_types = [
1357 "function_declaration",
1358 "method_declaration",
1359 "function_item",
1360 "impl_item",
1361 "type_declaration",
1362 "struct_item",
1363 "block_comment",
1364 ];
1365 let is_important = important_block_types.contains(&block.node_type.as_str());
1366
1367 for (idx, prev_block) in final_code_blocks.iter().enumerate() {
1369 if prev_block.node_type.contains("comment")
1370 || prev_block.node_type == "/*"
1371 || prev_block.node_type == "*/"
1372 {
1373 continue; }
1375
1376 let prev_is_important = important_block_types.contains(&prev_block.node_type.as_str());
1377
1378 if block.start_row <= prev_block.end_row && block.end_row >= prev_block.start_row {
1380 if block.start_row >= prev_block.start_row && block.end_row <= prev_block.end_row {
1382 if debug_mode {
1383 println!(
1384 "DEBUG: Current block is contained within previous block: type='{}', lines={}-{} (contained in type='{}', lines={}-{})",
1385 block.node_type, block.start_row + 1, block.end_row + 1,
1386 prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
1387 );
1388 }
1389
1390 if is_important && !prev_is_important {
1392 if debug_mode {
1393 println!(
1394 "DEBUG: Keeping important block type: {node_type}",
1395 node_type = block.node_type
1396 );
1397 }
1398 }
1400 else if !is_important && prev_is_important {
1402 if debug_mode {
1403 println!("DEBUG: Skipping non-important block in favor of important block: {node_type}", node_type = prev_block.node_type);
1404 }
1405 should_add = false;
1406 break;
1407 }
1408 else {
1410 blocks_to_remove.push(idx);
1411 }
1412 }
1413 else if prev_block.start_row >= block.start_row
1415 && prev_block.end_row <= block.end_row
1416 {
1417 if debug_mode {
1418 println!(
1419 "DEBUG: Previous block is contained within current block: type='{}', lines={}-{} (contains type='{}', lines={}-{})",
1420 block.node_type, block.start_row + 1, block.end_row + 1,
1421 prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
1422 );
1423 }
1424
1425 if is_important && !prev_is_important {
1427 if debug_mode {
1428 println!(
1429 "DEBUG: Keeping important block type: {node_type}",
1430 node_type = block.node_type
1431 );
1432 }
1433 }
1435 else if !is_important && prev_is_important {
1437 if debug_mode {
1438 println!("DEBUG: Skipping non-important block in favor of important block: {node_type}", node_type = prev_block.node_type);
1439 }
1440 should_add = false;
1441 break;
1442 }
1443 else {
1445 should_add = false;
1446 break;
1447 }
1448 }
1449 else {
1451 if debug_mode {
1452 println!(
1453 "DEBUG: Blocks partially overlap: type='{}', lines={}-{} (overlaps with type='{}', lines={}-{})",
1454 block.node_type, block.start_row + 1, block.end_row + 1,
1455 prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
1456 );
1457 }
1458 should_add = false;
1460 break;
1461 }
1462 }
1463 }
1464
1465 for idx in blocks_to_remove.iter().rev() {
1467 final_code_blocks.remove(*idx);
1468 }
1469
1470 if should_add {
1471 final_code_blocks.push(block.clone());
1472 }
1473 }
1474
1475 final_code_blocks.sort_by_key(|block| block.start_row);
1477
1478 let cacheable_line_map: Vec<Option<CachedNodeInfo>> = line_map
1484 .iter()
1485 .map(|opt_node_info| {
1486 opt_node_info.map(|node_info| {
1487 CachedNodeInfo::from_node_info(
1488 &node_info,
1489 language_impl.as_ref(),
1490 content.as_bytes(),
1491 allow_tests,
1492 )
1493 })
1494 })
1495 .collect();
1496
1497 LINE_MAP_CACHE.insert(cache_key.clone(), cacheable_line_map);
1499 if debug_mode {
1500 println!("DEBUG: Stored generated line_map in cache key: {cache_key}");
1501 }
1502
1503 Ok(final_code_blocks)
1505}