probe_code/language/
parser.rs

1use anyhow::{Context, Result};
2use dashmap::DashMap;
3use once_cell::sync::Lazy;
4use std::collections::hash_map::DefaultHasher;
5use std::collections::{HashMap, HashSet};
6use std::hash::{Hash, Hasher};
7use tree_sitter::{Node, Parser as TSParser};
8
9use probe_code::language::factory::get_language_impl;
10use probe_code::language::language_trait::LanguageImpl;
11use probe_code::language::tree_cache;
12use probe_code::models::CodeBlock;
13
14// Define a static cache for line maps
15static LINE_MAP_CACHE: Lazy<DashMap<String, Vec<Option<CachedNodeInfo>>>> = Lazy::new(DashMap::new);
16
17/// Calculate a hash of the content for cache validation
18fn calculate_content_hash(content: &str) -> u64 {
19    let mut hasher = DefaultHasher::new();
20    content.hash(&mut hasher);
21    hasher.finish()
22}
23
24/// A version of NodeInfo without lifetimes for caching
25#[derive(Clone, Debug)]
26struct CachedNodeInfo {
27    // Original node info
28    start_byte: usize,
29    end_byte: usize,
30    start_row: usize,
31    end_row: usize,
32    node_kind: String,
33    is_comment: bool,
34    is_test: bool,                     // Test status of the original node
35    original_node_is_acceptable: bool, // Was the original node an acceptable parent type?
36    // Context node info (if any)
37    context_node_bytes: Option<(usize, usize)>,
38    context_node_rows: Option<(usize, usize)>,
39    context_node_kind: Option<String>,
40    context_node_is_test: Option<bool>, // Test status of the context node (if any)
41    // specificity: usize, // Specificity of the original node assignment - REMOVED (unused)
42    // Parent function info (if applicable)
43    parent_node_type: Option<String>,
44    parent_start_row: Option<usize>,
45    parent_end_row: Option<usize>,
46    // Representative node info - REMOVED (unused)
47    // representative_start_byte: usize,
48    // representative_end_byte: usize,
49    // representative_start_row: usize,
50    // representative_end_row: usize,
51    // representative_node_kind: String,
52    // is_merged_comment: bool, // Flag if this represents a merged comment+context block - REMOVED (unused)
53}
54
55impl CachedNodeInfo {
56    /// Create a CachedNodeInfo from a NodeInfo and determine the representative node
57    fn from_node_info(
58        info: &NodeInfo<'_>,
59        language_impl: &dyn LanguageImpl,
60        content: &[u8],
61        allow_tests: bool,
62    ) -> Self {
63        // Determine the representative node based on the same logic used in the live processing path
64        let mut rep_node = info.node; // Default to self
65                                      // let mut is_merged = false; // Removed unused variable
66
67        if info.is_comment {
68            if let Some(ctx) = info.context_node {
69                if !allow_tests && !language_impl.is_test_node(&ctx, content) {
70                    rep_node = ctx; // Context represents the merged block
71                                    // is_merged = true; // Removed assignment to unused variable
72                }
73            }
74        } else if !info.is_test {
75            if let Some(ctx) = info.context_node {
76                if !allow_tests && !language_impl.is_test_node(&ctx, content) {
77                    rep_node = ctx; // Use context ancestor
78                }
79            }
80        }
81        // If info.is_test is true, rep_node remains info.node
82
83        // Get parent function info if applicable (e.g., for struct_type nodes)
84        let parent_info = if rep_node.kind() == "struct_type" {
85            language_impl
86                .find_parent_function(rep_node)
87                .map(|parent_node| {
88                    let parent_type = parent_node.kind().to_string();
89                    let parent_start = parent_node.start_position().row;
90                    let parent_end = parent_node.end_position().row;
91                    (parent_type, parent_start, parent_end)
92                })
93        } else {
94            None
95        };
96
97        // Check if original node is acceptable
98        let original_acceptable = language_impl.is_acceptable_parent(&info.node);
99
100        // Check if context node is a test node
101        let context_test = info
102            .context_node
103            .map(|ctx| language_impl.is_test_node(&ctx, content));
104
105        CachedNodeInfo {
106            // Original node details
107            start_byte: info.node.start_byte(),
108            end_byte: info.node.end_byte(),
109            start_row: info.node.start_position().row,
110            end_row: info.node.end_position().row,
111            node_kind: info.node.kind().to_string(),
112            is_comment: info.is_comment,
113            is_test: info.is_test, // Original node test status
114            original_node_is_acceptable: original_acceptable,
115            // Context node details
116            context_node_bytes: info.context_node.map(|n| (n.start_byte(), n.end_byte())),
117            context_node_rows: info
118                .context_node
119                .map(|n| (n.start_position().row, n.end_position().row)),
120            context_node_kind: info.context_node.map(|n| n.kind().to_string()),
121            context_node_is_test: context_test, // Context node test status
122            // specificity: info.specificity, // Original node specificity - REMOVED (unused)
123            // Parent function info
124            parent_node_type: parent_info.as_ref().map(|(t, _, _)| t.clone()),
125            parent_start_row: parent_info.as_ref().map(|(_, s, _)| *s),
126            parent_end_row: parent_info.as_ref().map(|(_, _, e)| *e),
127            // Representative node details - REMOVED (unused)
128            // representative_start_byte: rep_node.start_byte(),
129            // representative_end_byte: rep_node.end_byte(),
130            // representative_start_row: rep_node.start_position().row,
131            // representative_end_row: rep_node.end_position().row,
132            // representative_node_kind: rep_node.kind().to_string(),
133            // is_merged_comment: is_merged,
134        }
135    }
136}
137
138/// Structure to hold node information for a specific line
139#[derive(Clone, Copy)]
140struct NodeInfo<'a> {
141    node: Node<'a>,
142    is_comment: bool,
143    context_node: Option<Node<'a>>, // Represents the nearest acceptable ancestor if node itself isn't one
144    is_test: bool,
145    specificity: usize,
146}
147
148/// Helper function to determine if we should update the line map for a given line
149fn should_update_line_map<'a>(
150    line_map: &[Option<NodeInfo<'a>>],
151    line: usize,
152    node: Node<'a>,
153    is_comment: bool,
154    context_node: Option<Node<'a>>,
155    specificity: usize,
156) -> bool {
157    match &line_map[line] {
158        None => true, // No existing node, always update
159        Some(current) => {
160            // Special case: If current node is a comment with context, and new node is the context,
161            // don't replace it (preserve the comment+context relationship)
162            if current.is_comment && current.context_node.is_some() {
163                if let Some(ctx) = current.context_node {
164                    if ctx.id() == node.id() {
165                        return false;
166                    }
167                }
168            }
169
170            // Special case: If new node is a comment with context, and current node is the context,
171            // replace it (comment with context is more specific)
172            if is_comment && context_node.is_some() {
173                if let Some(ctx) = context_node {
174                    if ctx.id() == current.node.id() {
175                        return true;
176                    }
177                }
178            }
179
180            // Otherwise use specificity to decide
181            specificity < current.specificity
182        }
183    }
184}
185
186/// Gets the previous sibling of a node in the AST
187fn find_prev_sibling(node: Node<'_>) -> Option<Node<'_>> {
188    let parent = node.parent()?;
189
190    let mut cursor = parent.walk();
191    let mut prev_child = None;
192
193    for child in parent.children(&mut cursor) {
194        if child.id() == node.id() {
195            return prev_child;
196        }
197        prev_child = Some(child);
198    }
199
200    None // No previous sibling found
201}
202
203/// Find first acceptable node in a subtree
204fn find_acceptable_child<'a>(node: Node<'a>, language_impl: &dyn LanguageImpl) -> Option<Node<'a>> {
205    let mut cursor = node.walk();
206    for child in node.children(&mut cursor) {
207        if language_impl.is_acceptable_parent(&child) {
208            return Some(child);
209        }
210
211        // Recursive search
212        if let Some(acceptable) = find_acceptable_child(child, language_impl) {
213            return Some(acceptable);
214        }
215    }
216
217    None // No acceptable child found
218}
219
220/// Finds the immediate next node that follows a given node in the AST
221fn find_immediate_next_node(node: Node<'_>) -> Option<Node<'_>> {
222    let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
223
224    // First try direct next sibling
225    if let Some(next) = node.next_sibling() {
226        if debug_mode {
227            println!(
228                "DEBUG: Found immediate next sibling: type='{}', lines={}-{}",
229                next.kind(),
230                next.start_position().row + 1,
231                next.end_position().row + 1
232            );
233        }
234        return Some(next);
235    }
236
237    // If no direct sibling, check parent's next sibling
238    if let Some(parent) = node.parent() {
239        if let Some(next_parent) = parent.next_sibling() {
240            if debug_mode {
241                println!(
242                    "DEBUG: Found parent's next sibling: type='{}', lines={}-{}",
243                    next_parent.kind(),
244                    next_parent.start_position().row + 1,
245                    next_parent.end_position().row + 1
246                );
247            }
248            return Some(next_parent);
249        }
250    }
251
252    if debug_mode {
253        println!("DEBUG: No immediate next node found");
254    }
255    None
256}
257
258/// Helper function to find the context node for a comment.
259/// This is a comprehensive implementation that handles all comment context finding strategies.
260fn find_comment_context_node<'a>(
261    comment_node: Node<'a>,
262    language_impl: &dyn LanguageImpl,
263    debug_mode: bool,
264) -> Option<Node<'a>> {
265    let start_row = comment_node.start_position().row;
266
267    if debug_mode {
268        println!(
269            "DEBUG: Finding context for comment at lines {}-{}: {}",
270            comment_node.start_position().row + 1,
271            comment_node.end_position().row + 1,
272            comment_node.kind()
273        );
274    }
275
276    // Strategy 1: Try to find next non-comment sibling first (most common case for doc comments)
277    let mut current_sibling = comment_node.next_sibling();
278
279    // Skip over any comment siblings to find the next non-comment sibling
280    while let Some(sibling) = current_sibling {
281        if sibling.kind() == "comment"
282            || sibling.kind() == "line_comment"
283            || sibling.kind() == "block_comment"
284            || sibling.kind() == "doc_comment"
285            || sibling.kind() == "//"
286        {
287            // This is another comment, move to the next sibling
288            current_sibling = sibling.next_sibling();
289            continue;
290        }
291
292        // Found a non-comment sibling
293        if language_impl.is_acceptable_parent(&sibling) {
294            if debug_mode {
295                println!(
296                    "DEBUG: Found next non-comment sibling for comment at line {}: type='{}', lines={}-{}",
297                    start_row + 1,
298                    sibling.kind(),
299                    sibling.start_position().row + 1,
300                    sibling.end_position().row + 1
301                );
302            }
303            return Some(sibling);
304        } else {
305            // If next sibling isn't acceptable, check its children
306            if let Some(child) = find_acceptable_child(sibling, language_impl) {
307                if debug_mode {
308                    println!(
309                        "DEBUG: Found acceptable child in next non-comment sibling for comment at line {}: type='{}', lines={}-{}",
310                        start_row + 1,
311                        child.kind(),
312                        child.start_position().row + 1,
313                        child.end_position().row + 1
314                    );
315                }
316                return Some(child);
317            }
318        }
319
320        // If we get here, this non-comment sibling wasn't acceptable, try the next one
321        current_sibling = sibling.next_sibling();
322    }
323
324    // Strategy 2: If no acceptable next sibling, try previous sibling (for trailing comments)
325    // But only if the comment is at the end of a block or if there's no next sibling
326    // This helps ensure comments are associated with the code that follows them when possible
327    let has_next_sibling = comment_node.next_sibling().is_some();
328
329    if !has_next_sibling {
330        if let Some(prev_sibling) = find_prev_sibling(comment_node) {
331            if language_impl.is_acceptable_parent(&prev_sibling) {
332                if debug_mode {
333                    println!(
334                        "DEBUG: Found previous sibling for comment at line {}: type='{}', lines={}-{}",
335                        start_row + 1,
336                        prev_sibling.kind(),
337                        prev_sibling.start_position().row + 1,
338                        prev_sibling.end_position().row + 1
339                    );
340                }
341                return Some(prev_sibling);
342            } else {
343                // If previous sibling isn't acceptable, check its children
344                if let Some(child) = find_acceptable_child(prev_sibling, language_impl) {
345                    if debug_mode {
346                        println!(
347                            "DEBUG: Found acceptable child in previous sibling for comment at line {}: type='{}', lines={}-{}",
348                            start_row + 1,
349                            child.kind(),
350                            child.start_position().row + 1,
351                            child.end_position().row + 1
352                        );
353                    }
354                    return Some(child);
355                }
356            }
357        }
358    }
359
360    // Strategy 3: Check parent chain
361    let mut current = comment_node;
362    while let Some(parent) = current.parent() {
363        if language_impl.is_acceptable_parent(&parent) {
364            if debug_mode {
365                println!(
366                    "DEBUG: Found parent for comment at line {}: type='{}', lines={}-{}",
367                    start_row + 1,
368                    parent.kind(),
369                    parent.start_position().row + 1,
370                    parent.end_position().row + 1
371                );
372            }
373            return Some(parent);
374        }
375        current = parent;
376    }
377
378    // Strategy 4: Look for any immediate next node
379    if let Some(next_node) = find_immediate_next_node(comment_node) {
380        if language_impl.is_acceptable_parent(&next_node) {
381            if debug_mode {
382                println!(
383                    "DEBUG: Using immediate next acceptable node: type='{}', lines={}-{}",
384                    next_node.kind(),
385                    next_node.start_position().row + 1,
386                    next_node.end_position().row + 1
387                );
388            }
389            return Some(next_node);
390        }
391
392        // Look for acceptable child in the next node
393        if let Some(child) = find_acceptable_child(next_node, language_impl) {
394            if debug_mode {
395                println!(
396                    "DEBUG: Found acceptable child in next node: type='{}', lines={}-{}",
397                    child.kind(),
398                    child.start_position().row + 1,
399                    child.end_position().row + 1
400                );
401            }
402            return Some(child);
403        }
404    }
405
406    if debug_mode {
407        println!("DEBUG: No related node found for the comment");
408    }
409    None
410}
411
412/// Process a node and its children in a single pass, building a comprehensive line-to-node map.
413/// This version passes the nearest acceptable ancestor context down the tree.
414#[allow(clippy::too_many_arguments)]
415fn process_node<'a>(
416    node: Node<'a>,
417    line_map: &mut Vec<Option<NodeInfo<'a>>>,
418    _extension: &str, // Keep extension if needed by language_impl methods, otherwise remove
419    language_impl: &dyn LanguageImpl,
420    content: &[u8],
421    allow_tests: bool,
422    debug_mode: bool,
423    current_ancestor: Option<Node<'a>>, // The nearest acceptable ancestor found so far
424) {
425    let start_row = node.start_position().row;
426    let end_row = node.end_position().row;
427
428    // Skip nodes that are outside the file bounds (e.g., if file content changed during processing)
429    if start_row >= line_map.len() {
430        return;
431    }
432
433    // Determine node type and test status
434    let is_comment = node.kind() == "comment"
435        || node.kind() == "line_comment"
436        || node.kind() == "block_comment"
437        || node.kind() == "doc_comment"
438        || node.kind() == "//"; // Example for some languages
439
440    // Check if the node itself represents test code
441    let is_test = !allow_tests && language_impl.is_test_node(&node, content);
442
443    // Calculate node specificity (smaller is more specific)
444    let line_coverage = end_row.saturating_sub(start_row) + 1;
445    let byte_coverage = node.end_byte().saturating_sub(node.start_byte());
446    let specificity = line_coverage * 1000 + (byte_coverage / 100); // Example specificity calculation
447
448    // Determine the context_node for this node
449    let context_node = if is_comment {
450        // For comments, find the related code node (e.g., the function it documents)
451        // This function might still need to look up/around, but doesn't use the ancestor cache.
452        find_comment_context_node(node, language_impl, debug_mode)
453    } else {
454        // For non-comments, if the node itself isn't an acceptable block boundary,
455        // use the ancestor context passed down. Otherwise, it defines its own context (None).
456        if !language_impl.is_acceptable_parent(&node) {
457            current_ancestor
458        } else {
459            None // This node is an acceptable parent, it starts a new context.
460        }
461    };
462
463    // Update the line map for each line covered by this node
464    // Ensure end_row does not exceed line_map bounds
465    let effective_end_row = std::cmp::min(end_row, line_map.len().saturating_sub(1));
466    for line in start_row..=effective_end_row {
467        // Determine if this node is a better fit for the line than the current entry
468        let should_update =
469            should_update_line_map(line_map, line, node, is_comment, context_node, specificity);
470
471        if should_update {
472            // Store info about the node covering this line
473            line_map[line] = Some(NodeInfo {
474                node,
475                is_comment,
476                context_node, // Store the determined context (parent ancestor or None)
477                is_test,      // Store the test status of this specific node
478                specificity,
479            });
480        }
481    }
482
483    // Determine the ancestor context to pass down to children
484    let next_ancestor = if language_impl.is_acceptable_parent(&node) {
485        // If this node is an acceptable parent, it becomes the context for its children
486        Some(node)
487    } else {
488        // Otherwise, children inherit the same context as this node
489        current_ancestor
490    };
491
492    // Process children recursively (depth-first traversal)
493    let mut cursor = node.walk();
494    for child in node.children(&mut cursor) {
495        process_node(
496            child,
497            line_map,
498            _extension,
499            language_impl,
500            content,
501            allow_tests,
502            debug_mode,
503            next_ancestor, // Pass the determined ancestor context down
504        );
505    }
506}
507
508/// Process a cached line map to extract code blocks
509fn process_cached_line_map(
510    cached_line_map: &[Option<CachedNodeInfo>],
511    line_numbers: &HashSet<usize>,
512    _language_impl: &dyn LanguageImpl, // Not used directly, logic relies on cached info
513    _content: &str,                    // Not used directly, logic relies on cached info
514    allow_tests: bool,
515    debug_mode: bool,
516) -> Result<Vec<CodeBlock>> {
517    let mut code_blocks: Vec<CodeBlock> = Vec::new();
518    // Use a HashSet to track the start/end rows of blocks already added
519    let mut seen_block_spans: HashSet<(usize, usize)> = HashSet::new();
520
521    // Process each line number using the cached map
522    for &line in line_numbers {
523        let line_idx = line.saturating_sub(1); // Adjust for 0-based indexing
524
525        if debug_mode {
526            println!("DEBUG: Processing line {line} from cache");
527        }
528
529        if line_idx >= cached_line_map.len() {
530            if debug_mode {
531                println!("DEBUG: Line {line} is out of bounds (Cache)");
532            }
533            continue;
534        }
535
536        if let Some(info) = &cached_line_map[line_idx] {
537            if debug_mode {
538                println!(
539                    "DEBUG: Found cached node info for line {}: original_type='{}', original_lines={}-{}, is_comment={}, is_test={}, context_kind={:?}, context_lines={:?}",
540                    line,
541                    info.node_kind,
542                    info.start_row + 1,
543                    info.end_row + 1,
544                    info.is_comment,
545                    info.is_test,
546                    info.context_node_kind,
547                    info.context_node_rows.map(|(s, e)| (s + 1, e + 1))
548                );
549            }
550
551            // Determine which block to potentially create based on cached info
552            let mut potential_block: Option<CodeBlock> = None;
553            let mut block_key: Option<(usize, usize)> = None; // Key for seen_block_spans
554
555            // --- Replicate Cache Miss Logic using CachedNodeInfo ---
556
557            // 1. Handle Comments
558            if info.is_comment {
559                if debug_mode {
560                    println!("DEBUG: Cache: Handling comment node at line {line}");
561                }
562                // Check for context node
563                if let (Some(ctx_rows), Some(ctx_bytes), Some(ctx_kind), Some(ctx_is_test)) = (
564                    info.context_node_rows,
565                    info.context_node_bytes,
566                    &info.context_node_kind,
567                    info.context_node_is_test,
568                ) {
569                    // Check test status of the context node
570                    if !allow_tests && ctx_is_test {
571                        if debug_mode {
572                            println!(
573                                "DEBUG: Cache: Skipping test context node at lines {}-{}, type: {}",
574                                ctx_rows.0 + 1,
575                                ctx_rows.1 + 1,
576                                ctx_kind
577                            );
578                        }
579                        // Fall through to potentially add individual comment if context is skipped
580                    } else {
581                        // Create a merged block
582                        let merged_start_row = std::cmp::min(info.start_row, ctx_rows.0);
583                        let merged_end_row = std::cmp::max(info.end_row, ctx_rows.1);
584                        let merged_start_byte = std::cmp::min(info.start_byte, ctx_bytes.0);
585                        let merged_end_byte = std::cmp::max(info.end_byte, ctx_bytes.1);
586
587                        block_key = Some((merged_start_row, merged_end_row));
588                        if !seen_block_spans.contains(&block_key.unwrap()) {
589                            potential_block = Some(CodeBlock {
590                                start_row: merged_start_row,
591                                end_row: merged_end_row,
592                                start_byte: merged_start_byte,
593                                end_byte: merged_end_byte,
594                                node_type: ctx_kind.clone(), // Use context kind for merged block
595                                parent_node_type: None, // Consistent with original miss path logic
596                                parent_start_row: None,
597                                parent_end_row: None,
598                            });
599                            if debug_mode {
600                                println!(
601                                    "DEBUG: Cache: Potential merged block (comment + context) at lines {}-{}, type: {}",
602                                    merged_start_row + 1, merged_end_row + 1, ctx_kind
603                                );
604                            }
605                        }
606                        // If we created a merged block, we don't add the individual comment later
607                        // So we continue the outer loop here if the block_key was already seen or if we created a potential block
608                        if seen_block_spans.contains(&block_key.unwrap())
609                            || potential_block.is_some()
610                        {
611                            if seen_block_spans.contains(&block_key.unwrap()) && debug_mode {
612                                println!(
613                                    "DEBUG: Cache: Merged block span {}-{} already seen",
614                                    block_key.unwrap().0 + 1,
615                                    block_key.unwrap().1 + 1
616                                );
617                            }
618                            // Add the key even if block wasn't added, to prevent reprocessing context
619                            seen_block_spans.insert(block_key.unwrap());
620                            // Also mark original comment span as seen if merged
621                            seen_block_spans.insert((info.start_row, info.end_row));
622                            if let Some(block) = potential_block {
623                                code_blocks.push(block);
624                            }
625                            continue; // Move to next line number
626                        }
627                    }
628                }
629
630                // Add individual comment if not merged or if context was skipped
631                if potential_block.is_none() {
632                    block_key = Some((info.start_row, info.end_row));
633                    if !seen_block_spans.contains(&block_key.unwrap()) {
634                        potential_block = Some(CodeBlock {
635                            start_row: info.start_row,
636                            end_row: info.end_row,
637                            start_byte: info.start_byte,
638                            end_byte: info.end_byte,
639                            node_type: info.node_kind.clone(),
640                            parent_node_type: None,
641                            parent_start_row: None,
642                            parent_end_row: None,
643                        });
644                        if debug_mode {
645                            println!(
646                                "DEBUG: Cache: Potential individual comment block at lines {}-{}",
647                                info.start_row + 1,
648                                info.end_row + 1
649                            );
650                        }
651                    } else if debug_mode {
652                        println!(
653                            "DEBUG: Cache: Individual comment span {}-{} already seen",
654                            block_key.unwrap().0 + 1,
655                            block_key.unwrap().1 + 1
656                        );
657                    }
658                }
659            }
660            // 2. Handle Non-Comments
661            else {
662                // Skip original test nodes if not allowed
663                if !allow_tests && info.is_test {
664                    if debug_mode {
665                        println!(
666                            "DEBUG: Cache: Skipping original test node at lines {}-{}",
667                            info.start_row + 1,
668                            info.end_row + 1
669                        );
670                    }
671                    continue; // Move to next line number
672                }
673
674                // Check for context node (ancestor)
675                if let (Some(ctx_rows), Some(ctx_bytes), Some(ctx_kind), Some(ctx_is_test)) = (
676                    info.context_node_rows,
677                    info.context_node_bytes,
678                    &info.context_node_kind,
679                    info.context_node_is_test,
680                ) {
681                    // Check test status of the context node
682                    if !allow_tests && ctx_is_test {
683                        if debug_mode {
684                            println!(
685                                "DEBUG: Cache: Skipping test context node (ancestor) at lines {}-{}",
686                                ctx_rows.0 + 1, ctx_rows.1 + 1
687                            );
688                        }
689                        // Fall through to check original node if context is skipped
690                    } else {
691                        // Use context node
692                        block_key = Some((ctx_rows.0, ctx_rows.1));
693                        if !seen_block_spans.contains(&block_key.unwrap()) {
694                            potential_block = Some(CodeBlock {
695                                start_row: ctx_rows.0,
696                                end_row: ctx_rows.1,
697                                start_byte: ctx_bytes.0,
698                                end_byte: ctx_bytes.1,
699                                node_type: ctx_kind.clone(),
700                                // Parent info comes from CachedNodeInfo, which derived it based on the representative node (potentially the context)
701                                parent_node_type: info.parent_node_type.clone(),
702                                parent_start_row: info.parent_start_row,
703                                parent_end_row: info.parent_end_row,
704                            });
705                            if debug_mode {
706                                println!(
707                                    "DEBUG: Cache: Potential context node (ancestor) block at lines {}-{}",
708                                    ctx_rows.0 + 1, ctx_rows.1 + 1
709                                );
710                            }
711                        } else if debug_mode {
712                            println!(
713                                "DEBUG: Cache: Context node span {}-{} already seen",
714                                block_key.unwrap().0 + 1,
715                                block_key.unwrap().1 + 1
716                            );
717                        }
718                        // If we used the context node (or it was already seen), skip checking the original node
719                        if seen_block_spans.contains(&block_key.unwrap())
720                            || potential_block.is_some()
721                        {
722                            seen_block_spans.insert(block_key.unwrap()); // Mark context as seen
723                            if let Some(block) = potential_block {
724                                code_blocks.push(block);
725                            }
726                            continue; // Move to next line number
727                        }
728                    }
729                }
730
731                // Check if original node itself is acceptable (and wasn't skipped as test)
732                // This check happens if there was no context node, or if the context node was skipped (e.g., test)
733                if potential_block.is_none() && info.original_node_is_acceptable {
734                    block_key = Some((info.start_row, info.end_row));
735                    if !seen_block_spans.contains(&block_key.unwrap()) {
736                        potential_block = Some(CodeBlock {
737                            start_row: info.start_row,
738                            end_row: info.end_row,
739                            start_byte: info.start_byte,
740                            end_byte: info.end_byte,
741                            node_type: info.node_kind.clone(),
742                            // Parent info comes from CachedNodeInfo, derived based on representative node (original node in this case)
743                            parent_node_type: info.parent_node_type.clone(),
744                            parent_start_row: info.parent_start_row,
745                            parent_end_row: info.parent_end_row,
746                        });
747                        if debug_mode {
748                            println!(
749                                "DEBUG: Cache: Potential acceptable original node block at lines {}-{}",
750                                info.start_row + 1, info.end_row + 1
751                            );
752                        }
753                    } else if debug_mode {
754                        println!(
755                            "DEBUG: Cache: Original acceptable node span {}-{} already seen",
756                            block_key.unwrap().0 + 1,
757                            block_key.unwrap().1 + 1
758                        );
759                    }
760                }
761            }
762
763            // Add the potential block if one was determined and not already seen
764            if let (Some(block), Some(key)) = (potential_block, block_key) {
765                if seen_block_spans.insert(key) {
766                    // Returns true if the value was not present
767                    code_blocks.push(block);
768                }
769            }
770        } else if debug_mode {
771            println!("DEBUG: Cache: No cached node info found for line {line}");
772        }
773    }
774
775    // Removed extra closing brace that was here
776
777    // Sort the blocks generated from the cache
778    code_blocks.sort_by_key(|block| block.start_row);
779
780    // --- Apply the exact same deduplication logic as the cache miss path ---
781    let mut final_code_blocks: Vec<CodeBlock> = Vec::new();
782
783    // Add comments first
784    for block in code_blocks
785        .iter()
786        .filter(|b| b.node_type.contains("comment") || b.node_type == "/*" || b.node_type == "*/")
787    {
788        final_code_blocks.push(block.clone());
789    }
790
791    // Add non-comments, using the improved deduplication logic
792    for block in code_blocks
793        .iter() // Use iter() here as we pushed clones earlier
794        .filter(|b| !b.node_type.contains("comment") && b.node_type != "/*" && b.node_type != "*/")
795    {
796        let mut should_add = true;
797        let mut blocks_to_remove: Vec<usize> = Vec::new();
798
799        // Define important block types that should be preserved
800        let important_block_types = [
801            "function_declaration",
802            "method_declaration",
803            "function_item",
804            "impl_item",
805            "type_declaration",
806            "struct_item",
807            "block_comment", // Keep this? Seems odd for non-comment filter but matches original
808        ];
809        let is_important = important_block_types.contains(&block.node_type.as_str());
810
811        // Check if this block overlaps with any of the previous blocks in final_code_blocks
812        for (idx, prev_block) in final_code_blocks.iter().enumerate() {
813            if prev_block.node_type.contains("comment")
814                || prev_block.node_type == "/*"
815                || prev_block.node_type == "*/"
816            {
817                continue; // Skip comments already added
818            }
819
820            let prev_is_important = important_block_types.contains(&prev_block.node_type.as_str());
821
822            // Check if blocks overlap
823            if block.start_row <= prev_block.end_row && block.end_row >= prev_block.start_row {
824                // Case 1: Current block is contained within previous block
825                if block.start_row >= prev_block.start_row && block.end_row <= prev_block.end_row {
826                    if debug_mode {
827                        println!(
828                            "DEBUG: Cache Dedupe: Current block contained: type='{}', lines={}-{} (in type='{}', lines={}-{})",
829                            block.node_type, block.start_row + 1, block.end_row + 1,
830                            prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
831                        );
832                    }
833                    if is_important && !prev_is_important {
834                        if debug_mode {
835                            println!("DEBUG: Cache Dedupe: Keeping important contained block");
836                        }
837                        // Keep both - don't remove, don't skip add
838                    } else if !is_important && prev_is_important {
839                        if debug_mode {
840                            println!("DEBUG: Cache Dedupe: Skipping non-important contained block");
841                        }
842                        should_add = false;
843                        break;
844                    } else {
845                        // Both important or both not - prefer contained (current)
846                        if debug_mode {
847                            println!(
848                                "DEBUG: Cache Dedupe: Replacing outer block with contained block"
849                            );
850                        }
851                        blocks_to_remove.push(idx);
852                    }
853                }
854                // Case 2: Previous block is contained within current block
855                else if prev_block.start_row >= block.start_row
856                    && prev_block.end_row <= block.end_row
857                {
858                    if debug_mode {
859                        println!(
860                            "DEBUG: Cache Dedupe: Previous block contained: type='{}', lines={}-{} (contains type='{}', lines={}-{})",
861                            block.node_type, block.start_row + 1, block.end_row + 1,
862                            prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
863                        );
864                    }
865                    if is_important && !prev_is_important {
866                        if debug_mode {
867                            println!("DEBUG: Cache Dedupe: Keeping important outer block");
868                        }
869                        // Keep both - don't skip add, continue checking
870                    } else if !is_important && prev_is_important {
871                        if debug_mode {
872                            println!("DEBUG: Cache Dedupe: Skipping non-important outer block");
873                        }
874                        should_add = false;
875                        break;
876                    } else {
877                        // Both important or both not - prefer contained (previous)
878                        if debug_mode {
879                            println!("DEBUG: Cache Dedupe: Skipping outer block (already have contained)");
880                        }
881                        should_add = false;
882                        break;
883                    }
884                }
885                // Case 3: Blocks partially overlap
886                else {
887                    if debug_mode {
888                        println!(
889                            "DEBUG: Cache Dedupe: Partial overlap: type='{}', lines={}-{} (overlaps type='{}', lines={}-{})",
890                            block.node_type, block.start_row + 1, block.end_row + 1,
891                            prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
892                        );
893                    }
894                    // Skip current block in case of partial overlap (consistent with miss path)
895                    should_add = false;
896                    break;
897                }
898            }
899        }
900
901        // Remove blocks marked for removal (in reverse order)
902        for idx in blocks_to_remove.iter().rev() {
903            final_code_blocks.remove(*idx);
904        }
905
906        // Add the current block if it wasn't skipped
907        if should_add {
908            final_code_blocks.push(block.clone());
909        }
910    }
911
912    // Final sort to maintain correct order after deduplication
913    final_code_blocks.sort_by_key(|block| block.start_row);
914    Ok(final_code_blocks)
915} // Added missing closing brace for process_cached_line_map
916  // Removed unexpected closing brace that was here
917
918/// Function to parse a file and extract code blocks for the given line numbers
919pub fn parse_file_for_code_blocks(
920    content: &str,
921    extension: &str,
922    line_numbers: &HashSet<usize>,
923    allow_tests: bool,
924    _term_matches: Option<&HashMap<usize, HashSet<usize>>>, // Query index to line numbers
925) -> Result<Vec<CodeBlock>> {
926    // Get the appropriate language implementation
927    let language_impl = match get_language_impl(extension) {
928        Some(lang) => lang,
929        None => {
930            return Err(anyhow::anyhow!(format!(
931                "Unsupported file type: {}",
932                extension
933            )))
934        }
935    };
936
937    // Check for debug mode
938    let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
939
940    // Calculate content hash for cache key
941    let content_hash = calculate_content_hash(content);
942    let cache_key = format!("{extension}_{content_hash}_{allow_tests}");
943
944    // Check if we have a cached line map
945    if let Some(cached_entry) = LINE_MAP_CACHE.get(&cache_key) {
946        if debug_mode {
947            println!("DEBUG: Cache hit for line_map key: {cache_key}");
948        }
949
950        // Process the cached line map
951        return process_cached_line_map(
952            cached_entry.value(),
953            line_numbers,
954            language_impl.as_ref(),
955            content,
956            allow_tests,
957            debug_mode,
958        );
959    }
960
961    if debug_mode {
962        println!("DEBUG: Cache miss for line_map key: {cache_key}. Generating...");
963    }
964
965    // Get the tree-sitter language
966    let language = language_impl.get_tree_sitter_language();
967
968    // Parse the file
969    let mut parser = TSParser::new();
970    parser.set_language(&language)?;
971
972    // Use the tree cache to get or parse the tree
973    // We use a stable identifier for the file
974    let tree_cache_key = format!("file_{extension}");
975    let tree = tree_cache::get_or_parse_tree(&tree_cache_key, content, &mut parser)
976        .context("Failed to parse the file")?;
977
978    let root_node = tree.root_node();
979
980    // Check for debug mode
981    let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
982
983    if debug_mode {
984        println!("DEBUG: Parsing file with extension: {extension}");
985        println!("DEBUG: Root node type: {}", root_node.kind());
986
987        // Log all node types in the file
988        let mut node_types = HashSet::new();
989        super::common::collect_node_types(root_node, &mut node_types);
990        println!("DEBUG: All node types in file: {node_types:?}");
991    }
992
993    // Create a line-to-node map for the entire file
994    let line_count = content.lines().count();
995    let mut line_map: Vec<Option<NodeInfo>> = vec![None; line_count];
996
997    // Build the line-to-node map with a single traversal
998    if debug_mode {
999        println!("DEBUG: Building line-to-node map with a single traversal");
1000    }
1001
1002    // For large files, we could parallelize the processing, but due to thread-safety
1003    // constraints with the language implementation, we'll use a sequential approach
1004    // that's still efficient for most cases
1005    if debug_mode {
1006        println!("DEBUG: Using sequential processing for AST nodes");
1007    }
1008
1009    // Start the traversal from the root node, passing None as the initial ancestor context
1010    process_node(
1011        root_node,
1012        &mut line_map,
1013        extension, // Pass if needed by process_node/language_impl
1014        language_impl.as_ref(),
1015        content.as_bytes(),
1016        allow_tests,
1017        debug_mode,
1018        None, // Initial ancestor context is None
1019              // REMOVED: &mut ancestor_cache,
1020    );
1021
1022    if debug_mode {
1023        println!("DEBUG: Line-to-node map built successfully");
1024    }
1025
1026    // ====================================================================
1027    // START: Inserted Original Block Processing Logic (Cache Miss Path)
1028    // ====================================================================
1029    // This code runs ONLY on a cache miss, after process_node generates the live line_map.
1030    // It generates the CodeBlocks for *this specific request* from the live NodeInfo data.
1031
1032    let mut code_blocks: Vec<CodeBlock> = Vec::new();
1033    let mut seen_nodes: HashSet<(usize, usize)> = HashSet::new(); // Use row-based key for this original logic
1034
1035    // Process each line number using the *live* precomputed map (line_map)
1036    for &line in line_numbers {
1037        // Adjust for 0-based indexing
1038        let line_idx = line.saturating_sub(1);
1039
1040        if debug_mode {
1041            println!("DEBUG: Processing line {line} (Live NodeInfo)");
1042        }
1043
1044        // Skip if line is out of bounds
1045        if line_idx >= line_map.len() {
1046            if debug_mode {
1047                println!("DEBUG: Line {line} is out of bounds (Live NodeInfo)");
1048            }
1049            continue;
1050        }
1051
1052        // Get the node info for this line from the live map
1053        if let Some(info) = &line_map[line_idx] {
1054            if debug_mode {
1055                println!(
1056                    "DEBUG: Found node for line {}: type='{}', lines={}-{}",
1057                    line,
1058                    info.node.kind(),
1059                    info.node.start_position().row + 1,
1060                    info.node.end_position().row + 1
1061                );
1062            }
1063            let target_node = info.node;
1064            let start_pos = target_node.start_position();
1065            let end_pos = target_node.end_position();
1066            // Use row key consistent with original logic for seen_nodes in this block
1067            let node_key = (start_pos.row, end_pos.row);
1068
1069            // Skip if we've already processed this node
1070            if seen_nodes.contains(&node_key) {
1071                if debug_mode {
1072                    println!(
1073                        "DEBUG: Already processed node at lines {}-{}, type: {}",
1074                        start_pos.row + 1,
1075                        end_pos.row + 1,
1076                        target_node.kind()
1077                    );
1078                }
1079                continue;
1080            }
1081
1082            // Mark this node as seen
1083            seen_nodes.insert(node_key);
1084
1085            // Special handling for comments (using live NodeInfo and context_node)
1086            if info.is_comment {
1087                if debug_mode {
1088                    println!(
1089                        "DEBUG: Found comment node at line {}: {}",
1090                        line,
1091                        target_node.kind()
1092                    );
1093                }
1094
1095                // If we have a context node for this comment
1096                if let Some(context_node) = info.context_node {
1097                    let rel_start_pos = context_node.start_position();
1098                    let rel_end_pos = context_node.end_position();
1099                    let rel_key = (rel_start_pos.row, rel_end_pos.row);
1100
1101                    // Check test status using live node and language_impl
1102                    // Ensure content is available here if needed by is_test_node
1103                    if !allow_tests && language_impl.is_test_node(&context_node, content.as_bytes())
1104                    {
1105                        if debug_mode {
1106                            println!(
1107                                "DEBUG: Skipping test context node at lines {}-{}, type: {}",
1108                                rel_start_pos.row + 1,
1109                                rel_end_pos.row + 1,
1110                                context_node.kind()
1111                            );
1112                        }
1113                    } else {
1114                        // Create a merged block
1115                        let merged_start_row = std::cmp::min(start_pos.row, rel_start_pos.row);
1116                        let merged_end_row = std::cmp::max(end_pos.row, rel_end_pos.row);
1117                        let merged_start_byte =
1118                            std::cmp::min(target_node.start_byte(), context_node.start_byte());
1119                        let merged_end_byte =
1120                            std::cmp::max(target_node.end_byte(), context_node.end_byte());
1121                        let merged_node_type = context_node.kind().to_string();
1122
1123                        seen_nodes.insert(rel_key); // Mark context as seen too
1124
1125                        code_blocks.push(CodeBlock {
1126                            start_row: merged_start_row,
1127                            end_row: merged_end_row,
1128                            start_byte: merged_start_byte,
1129                            end_byte: merged_end_byte,
1130                            node_type: merged_node_type.clone(),
1131                            parent_node_type: None, // Keep consistent with original logic here
1132                            parent_start_row: None,
1133                            parent_end_row: None,
1134                        });
1135
1136                        if debug_mode {
1137                            println!(
1138                                "DEBUG: Added merged block (comment + context) at lines {}-{}, type: {}",
1139                                merged_start_row + 1,
1140                                merged_end_row + 1,
1141                                merged_node_type
1142                            );
1143                        }
1144                        continue; // Skip adding individual comment
1145                    }
1146                }
1147
1148                // Add individual comment if not merged
1149                code_blocks.push(CodeBlock {
1150                    start_row: start_pos.row,
1151                    end_row: end_pos.row,
1152                    start_byte: target_node.start_byte(),
1153                    end_byte: target_node.end_byte(),
1154                    node_type: target_node.kind().to_string(),
1155                    parent_node_type: None,
1156                    parent_start_row: None,
1157                    parent_end_row: None,
1158                });
1159                if debug_mode {
1160                    println!(
1161                        "DEBUG: Added individual comment block at lines {}-{}",
1162                        start_pos.row + 1,
1163                        end_pos.row + 1
1164                    );
1165                }
1166                continue; // Skip rest for comments
1167            }
1168
1169            // Skip test nodes (using live check)
1170            if info.is_test {
1171                // is_test flag was set during process_node
1172                if debug_mode {
1173                    println!(
1174                        "DEBUG: Skipping test node at lines {}-{}",
1175                        start_pos.row + 1,
1176                        end_pos.row + 1
1177                    );
1178                }
1179                continue;
1180            }
1181
1182            // Check if line is within an existing block (this check might be redundant with seen_nodes)
1183            // Keep consistent with original logic if it was there
1184            let mut existing_block = false;
1185            for block in &code_blocks {
1186                if line > block.start_row + 1 && line <= block.end_row + 1 {
1187                    if debug_mode {
1188                        println!(
1189                            "DEBUG: Line {} is within existing block: type='{}', lines={}-{}",
1190                            line,
1191                            block.node_type,
1192                            block.start_row + 1,
1193                            block.end_row + 1
1194                        );
1195                    }
1196                    existing_block = true;
1197                    break;
1198                }
1199            }
1200            if existing_block {
1201                continue;
1202            }
1203
1204            // Check context node (acceptable ancestor)
1205            if let Some(context_node) = info.context_node {
1206                // context_node was set during process_node
1207                let rel_start_pos = context_node.start_position();
1208                let rel_end_pos = context_node.end_position();
1209                let rel_key = (rel_start_pos.row, rel_end_pos.row);
1210
1211                // Ensure content is available if needed by is_test_node
1212                if !allow_tests && language_impl.is_test_node(&context_node, content.as_bytes()) {
1213                    if debug_mode {
1214                        println!(
1215                            "DEBUG: Skipping test context node (ancestor) at lines {}-{}",
1216                            rel_start_pos.row + 1,
1217                            rel_end_pos.row + 1
1218                        );
1219                    }
1220                } else {
1221                    if debug_mode {
1222                        println!(
1223                            "DEBUG: Using context node (ancestor) at lines {}-{}",
1224                            rel_start_pos.row + 1,
1225                            rel_end_pos.row + 1
1226                        );
1227                    }
1228                    seen_nodes.insert(rel_key); // Mark context as seen
1229
1230                    // Get parent function info if applicable (e.g., for struct_type nodes)
1231                    let parent_info = if context_node.kind() == "struct_type" {
1232                        language_impl
1233                            .find_parent_function(context_node)
1234                            .map(|parent_node| {
1235                                let parent_type = parent_node.kind().to_string();
1236                                let parent_start = parent_node.start_position().row;
1237                                let parent_end = parent_node.end_position().row;
1238                                (parent_type, parent_start, parent_end)
1239                            })
1240                    } else {
1241                        None
1242                    };
1243
1244                    code_blocks.push(CodeBlock {
1245                        start_row: rel_start_pos.row,
1246                        end_row: rel_end_pos.row,
1247                        start_byte: context_node.start_byte(),
1248                        end_byte: context_node.end_byte(),
1249                        node_type: context_node.kind().to_string(),
1250                        parent_node_type: parent_info.as_ref().map(|(t, _, _)| t.clone()),
1251                        parent_start_row: parent_info.as_ref().map(|(_, s, _)| *s),
1252                        parent_end_row: parent_info.as_ref().map(|(_, _, e)| *e),
1253                    });
1254                    continue; // Skip adding target_node if context was added
1255                }
1256            }
1257
1258            // Check if target_node itself is acceptable (using live check)
1259            if language_impl.is_acceptable_parent(&target_node) {
1260                if debug_mode {
1261                    println!(
1262                        "DEBUG: Adding acceptable parent node at lines {}-{}",
1263                        start_pos.row + 1,
1264                        end_pos.row + 1
1265                    );
1266                }
1267
1268                // Get parent function info if applicable (e.g., for struct_type nodes)
1269                let parent_info = if target_node.kind() == "struct_type" {
1270                    language_impl
1271                        .find_parent_function(target_node)
1272                        .map(|parent_node| {
1273                            let parent_type = parent_node.kind().to_string();
1274                            let parent_start = parent_node.start_position().row;
1275                            let parent_end = parent_node.end_position().row;
1276                            (parent_type, parent_start, parent_end)
1277                        })
1278                } else {
1279                    None
1280                };
1281
1282                code_blocks.push(CodeBlock {
1283                    start_row: start_pos.row,
1284                    end_row: end_pos.row,
1285                    start_byte: target_node.start_byte(),
1286                    end_byte: target_node.end_byte(),
1287                    node_type: target_node.kind().to_string(),
1288                    parent_node_type: parent_info.as_ref().map(|(t, _, _)| t.clone()),
1289                    parent_start_row: parent_info.as_ref().map(|(_, s, _)| *s),
1290                    parent_end_row: parent_info.as_ref().map(|(_, _, e)| *e),
1291                });
1292                continue; // Skip fallback if acceptable parent added
1293            }
1294
1295            // Fallback: Add the node found for the line if no context/acceptable parent logic applied
1296            if debug_mode {
1297                println!(
1298                    "DEBUG: Adding node via fallback at lines {}-{}",
1299                    start_pos.row + 1,
1300                    end_pos.row + 1
1301                );
1302            }
1303
1304            // Get parent function info if applicable (e.g., for struct_type nodes)
1305            let parent_info = if target_node.kind() == "struct_type" {
1306                language_impl
1307                    .find_parent_function(target_node)
1308                    .map(|parent_node| {
1309                        let parent_type = parent_node.kind().to_string();
1310                        let parent_start = parent_node.start_position().row;
1311                        let parent_end = parent_node.end_position().row;
1312                        (parent_type, parent_start, parent_end)
1313                    })
1314            } else {
1315                None
1316            };
1317
1318            code_blocks.push(CodeBlock {
1319                start_row: start_pos.row,
1320                end_row: end_pos.row,
1321                start_byte: target_node.start_byte(),
1322                end_byte: target_node.end_byte(),
1323                node_type: target_node.kind().to_string(),
1324                parent_node_type: parent_info.as_ref().map(|(t, _, _)| t.clone()),
1325                parent_start_row: parent_info.as_ref().map(|(_, s, _)| *s),
1326                parent_end_row: parent_info.as_ref().map(|(_, _, e)| *e),
1327            });
1328        } else if debug_mode {
1329            println!("DEBUG: No node info found for line {line} (Live NodeInfo)");
1330        }
1331    } // End loop over line_numbers
1332
1333    // Sort and deduplicate the blocks generated from live data
1334    code_blocks.sort_by_key(|block| block.start_row);
1335
1336    // Apply the improved deduplication logic
1337    let mut final_code_blocks: Vec<CodeBlock> = Vec::new();
1338
1339    // Add comments first
1340    for block in code_blocks
1341        .iter()
1342        .filter(|b| b.node_type.contains("comment") || b.node_type == "/*" || b.node_type == "*/")
1343    {
1344        final_code_blocks.push(block.clone());
1345    }
1346
1347    // Add non-comments, using the improved deduplication logic
1348    for block in code_blocks
1349        .iter()
1350        .filter(|b| !b.node_type.contains("comment") && b.node_type != "/*" && b.node_type != "*/")
1351    {
1352        let mut should_add = true;
1353        let mut blocks_to_remove: Vec<usize> = Vec::new();
1354
1355        // Define important block types that should be preserved
1356        let important_block_types = [
1357            "function_declaration",
1358            "method_declaration",
1359            "function_item",
1360            "impl_item",
1361            "type_declaration",
1362            "struct_item",
1363            "block_comment",
1364        ];
1365        let is_important = important_block_types.contains(&block.node_type.as_str());
1366
1367        // Check if this block overlaps with any of the previous blocks
1368        for (idx, prev_block) in final_code_blocks.iter().enumerate() {
1369            if prev_block.node_type.contains("comment")
1370                || prev_block.node_type == "/*"
1371                || prev_block.node_type == "*/"
1372            {
1373                continue; // Skip comments
1374            }
1375
1376            let prev_is_important = important_block_types.contains(&prev_block.node_type.as_str());
1377
1378            // Check if blocks overlap
1379            if block.start_row <= prev_block.end_row && block.end_row >= prev_block.start_row {
1380                // Case 1: Current block is contained within previous block
1381                if block.start_row >= prev_block.start_row && block.end_row <= prev_block.end_row {
1382                    if debug_mode {
1383                        println!(
1384                            "DEBUG: Current block is contained within previous block: type='{}', lines={}-{} (contained in type='{}', lines={}-{})",
1385                            block.node_type, block.start_row + 1, block.end_row + 1,
1386                            prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
1387                        );
1388                    }
1389
1390                    // If current block is important and previous block is not, keep both
1391                    if is_important && !prev_is_important {
1392                        if debug_mode {
1393                            println!(
1394                                "DEBUG: Keeping important block type: {node_type}",
1395                                node_type = block.node_type
1396                            );
1397                        }
1398                        // Don't remove any blocks, don't set should_add to false
1399                    }
1400                    // If previous block is important and current block is not, skip current block
1401                    else if !is_important && prev_is_important {
1402                        if debug_mode {
1403                            println!("DEBUG: Skipping non-important block in favor of important block: {node_type}", node_type = prev_block.node_type);
1404                        }
1405                        should_add = false;
1406                        break;
1407                    }
1408                    // Otherwise, prefer the more specific (contained) block
1409                    else {
1410                        blocks_to_remove.push(idx);
1411                    }
1412                }
1413                // Case 2: Previous block is contained within current block
1414                else if prev_block.start_row >= block.start_row
1415                    && prev_block.end_row <= block.end_row
1416                {
1417                    if debug_mode {
1418                        println!(
1419                            "DEBUG: Previous block is contained within current block: type='{}', lines={}-{} (contains type='{}', lines={}-{})",
1420                            block.node_type, block.start_row + 1, block.end_row + 1,
1421                            prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
1422                        );
1423                    }
1424
1425                    // If current block is important and previous block is not, keep both
1426                    if is_important && !prev_is_important {
1427                        if debug_mode {
1428                            println!(
1429                                "DEBUG: Keeping important block type: {node_type}",
1430                                node_type = block.node_type
1431                            );
1432                        }
1433                        // Don't set should_add to false, continue checking other blocks
1434                    }
1435                    // If previous block is important and current block is not, skip current block
1436                    else if !is_important && prev_is_important {
1437                        if debug_mode {
1438                            println!("DEBUG: Skipping non-important block in favor of important block: {node_type}", node_type = prev_block.node_type);
1439                        }
1440                        should_add = false;
1441                        break;
1442                    }
1443                    // Otherwise, skip current block as it's less specific
1444                    else {
1445                        should_add = false;
1446                        break;
1447                    }
1448                }
1449                // Case 3: Blocks partially overlap
1450                else {
1451                    if debug_mode {
1452                        println!(
1453                            "DEBUG: Blocks partially overlap: type='{}', lines={}-{} (overlaps with type='{}', lines={}-{})",
1454                            block.node_type, block.start_row + 1, block.end_row + 1,
1455                            prev_block.node_type, prev_block.start_row + 1, prev_block.end_row + 1
1456                        );
1457                    }
1458                    // Skip current block in case of partial overlap
1459                    should_add = false;
1460                    break;
1461                }
1462            }
1463        }
1464
1465        // Remove any blocks that should be replaced
1466        for idx in blocks_to_remove.iter().rev() {
1467            final_code_blocks.remove(*idx);
1468        }
1469
1470        if should_add {
1471            final_code_blocks.push(block.clone());
1472        }
1473    }
1474
1475    // Final sort to maintain correct order
1476    final_code_blocks.sort_by_key(|block| block.start_row);
1477
1478    // ====================================================================
1479    // END: Inserted Original Block Processing Logic (Cache Miss Path)
1480    // ====================================================================
1481
1482    // Convert the original line_map to a cacheable format with representative node info
1483    let cacheable_line_map: Vec<Option<CachedNodeInfo>> = line_map
1484        .iter()
1485        .map(|opt_node_info| {
1486            opt_node_info.map(|node_info| {
1487                CachedNodeInfo::from_node_info(
1488                    &node_info,
1489                    language_impl.as_ref(),
1490                    content.as_bytes(),
1491                    allow_tests,
1492                )
1493            })
1494        })
1495        .collect();
1496
1497    // Store the cacheable version in the cache (as you already have)
1498    LINE_MAP_CACHE.insert(cache_key.clone(), cacheable_line_map);
1499    if debug_mode {
1500        println!("DEBUG: Stored generated line_map in cache key: {cache_key}");
1501    }
1502
1503    // Return the blocks generated from the LIVE data in this cache miss path
1504    Ok(final_code_blocks)
1505}
probe_code/language/parser.rs

probe_code/language/
parser.rs