Skip to main content

perl_semantic_analyzer/analysis/semantic/
node_analysis.rs

1//! AST node analysis — the `analyze_node` traversal and source-text helpers.
2//!
3//! This module provides the core recursive descent over the Perl AST that
4//! produces semantic tokens and hover information.  It is an `impl` block
5//! extension of `SemanticAnalyzer`; Rust allows splitting `impl` blocks across
6//! files within the same module, so the types defined in `mod.rs` are
7//! directly accessible here.
8
9use crate::SourceLocation;
10use crate::ast::{Node, NodeKind};
11use crate::symbol::{ScopeId, ScopeKind, SymbolKind};
12use regex::Regex;
13use std::sync::OnceLock;
14
15use super::SemanticAnalyzer;
16use super::builtins::{
17    get_builtin_documentation, is_builtin_function, is_control_keyword, is_file_test_operator,
18};
19use super::hover::HoverInfo;
20use super::tokens::{SemanticToken, SemanticTokenModifier, SemanticTokenType};
21
22impl SemanticAnalyzer {
23    /// Analyze a node and generate semantic information
24    pub(super) fn analyze_node(&mut self, node: &Node, scope_id: ScopeId) {
25        match &node.kind {
26            NodeKind::Program { statements } => {
27                for stmt in statements {
28                    self.analyze_node(stmt, scope_id);
29                }
30            }
31
32            NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
33                // Add semantic token for declaration
34                if let NodeKind::Variable { sigil, name } = &variable.kind {
35                    let token_type = match declarator.as_str() {
36                        "my" | "state" => SemanticTokenType::VariableDeclaration,
37                        "our" => SemanticTokenType::Variable,
38                        "local" => SemanticTokenType::Variable,
39                        _ => SemanticTokenType::Variable,
40                    };
41
42                    let mut modifiers = vec![SemanticTokenModifier::Declaration];
43                    if declarator == "state" || attributes.iter().any(|a| a == ":shared") {
44                        modifiers.push(SemanticTokenModifier::Static);
45                    }
46
47                    self.semantic_tokens.push(SemanticToken {
48                        location: variable.location,
49                        token_type,
50                        modifiers,
51                    });
52
53                    // Add hover info
54                    let hover = HoverInfo {
55                        signature: format!("{} {}{}", declarator, sigil, name),
56                        documentation: self.extract_documentation(node.location.start),
57                        details: if attributes.is_empty() {
58                            vec![]
59                        } else {
60                            vec![format!("Attributes: {}", attributes.join(", "))]
61                        },
62                    };
63
64                    self.hover_info.insert(variable.location, hover);
65                }
66
67                if let Some(init) = initializer {
68                    self.analyze_node(init, scope_id);
69                }
70            }
71
72            NodeKind::Variable { sigil, name } => {
73                let kind = match sigil.as_str() {
74                    "$" => SymbolKind::scalar(),
75                    "@" => SymbolKind::array(),
76                    "%" => SymbolKind::hash(),
77                    _ => return,
78                };
79
80                // Find the symbol definition
81                let symbols = self.symbol_table.find_symbol(name, scope_id, kind);
82
83                let token_type = if let Some(symbol) = symbols.first() {
84                    match symbol.declaration.as_deref() {
85                        Some("my") | Some("state") => SemanticTokenType::Variable,
86                        Some("our") => SemanticTokenType::Variable,
87                        _ => SemanticTokenType::Variable,
88                    }
89                } else {
90                    // Undefined variable
91                    SemanticTokenType::Variable
92                };
93
94                self.semantic_tokens.push(SemanticToken {
95                    location: node.location,
96                    token_type,
97                    modifiers: vec![],
98                });
99
100                // Add hover info if we found the symbol
101                if let Some(symbol) = symbols.first() {
102                    let hover = HoverInfo {
103                        signature: format!(
104                            "{} {}{}",
105                            symbol.declaration.as_deref().unwrap_or(""),
106                            sigil,
107                            name
108                        )
109                        .trim()
110                        .to_string(),
111                        documentation: symbol.documentation.clone(),
112                        details: vec![format!(
113                            "Defined at line {}",
114                            self.line_number(symbol.location.start)
115                        )],
116                    };
117
118                    self.hover_info.insert(node.location, hover);
119                }
120            }
121
122            NodeKind::Subroutine { name, prototype, signature, attributes, body, name_span: _ } => {
123                if let Some(sub_name) = name {
124                    // Named subroutine
125                    let token = SemanticToken {
126                        location: node.location,
127                        token_type: SemanticTokenType::FunctionDeclaration,
128                        modifiers: vec![SemanticTokenModifier::Declaration],
129                    };
130
131                    self.semantic_tokens.push(token);
132
133                    // Add hover info
134                    let mut signature_str = format!("sub {}", sub_name);
135                    if let Some(sig_node) = signature {
136                        signature_str.push_str(&format_signature_params(sig_node));
137                    }
138
139                    let hover = HoverInfo {
140                        signature: signature_str,
141                        documentation: self.extract_sub_documentation(node.location.start, body),
142                        details: if attributes.is_empty() {
143                            vec![]
144                        } else {
145                            vec![format!("Attributes: {}", attributes.join(", "))]
146                        },
147                    };
148
149                    self.hover_info.insert(node.location, hover);
150                } else {
151                    // Anonymous subroutine (closure)
152                    // Add semantic token for the 'sub' keyword
153                    self.semantic_tokens.push(SemanticToken {
154                        location: SourceLocation {
155                            start: node.location.start,
156                            end: node.location.start + 3, // "sub"
157                        },
158                        token_type: SemanticTokenType::Keyword,
159                        modifiers: vec![],
160                    });
161
162                    // Add hover info for anonymous subs
163                    let mut signature_str = "sub".to_string();
164                    if let Some(sig_node) = signature {
165                        signature_str.push_str(&format_signature_params(sig_node));
166                    }
167                    signature_str.push_str(" { ... }");
168
169                    let mut details = vec!["Anonymous subroutine (closure)".to_string()];
170                    if !attributes.is_empty() {
171                        details.push(format!("Attributes: {}", attributes.join(", ")));
172                    }
173
174                    let hover = HoverInfo {
175                        signature: signature_str,
176                        documentation: self.extract_sub_documentation(node.location.start, body),
177                        details,
178                    };
179
180                    self.hover_info.insert(node.location, hover);
181                }
182
183                {
184                    // Get the subroutine scope from the symbol table
185                    let sub_scope = self.get_scope_for(node, ScopeKind::Subroutine);
186
187                    if let Some(proto) = prototype {
188                        self.analyze_node(proto, sub_scope);
189                    }
190                    if let Some(sig) = signature {
191                        self.analyze_node(sig, sub_scope);
192                    }
193
194                    self.analyze_node(body, sub_scope);
195                }
196            }
197
198            NodeKind::Method { name, signature, attributes, body } => {
199                self.semantic_tokens.push(SemanticToken {
200                    location: node.location, // Approximate, ideally name span
201                    token_type: SemanticTokenType::FunctionDeclaration,
202                    modifiers: vec![SemanticTokenModifier::Declaration],
203                });
204
205                // Add hover info
206                let hover = HoverInfo {
207                    signature: format!("method {}", name),
208                    documentation: self.extract_sub_documentation(node.location.start, body),
209                    details: if attributes.is_empty() {
210                        vec![]
211                    } else {
212                        vec![format!("Attributes: {}", attributes.join(", "))]
213                    },
214                };
215                self.hover_info.insert(node.location, hover);
216
217                // Analyze body in new scope (assumed same as Subroutine scope kind for now)
218                let sub_scope = self.get_scope_for(node, ScopeKind::Subroutine);
219                if let Some(sig) = signature {
220                    self.analyze_node(sig, sub_scope);
221                }
222                self.analyze_node(body, sub_scope);
223            }
224
225            NodeKind::FunctionCall { name, args } => {
226                // Check if this is a built-in function
227                {
228                    let token_type = if is_control_keyword(name) {
229                        SemanticTokenType::KeywordControl
230                    } else if is_builtin_function(name) {
231                        SemanticTokenType::Function
232                    } else {
233                        // Check if it's a user-defined function
234                        let symbols =
235                            self.symbol_table.find_symbol(name, scope_id, SymbolKind::Subroutine);
236                        if symbols.is_empty() {
237                            SemanticTokenType::Function
238                        } else {
239                            SemanticTokenType::Function
240                        }
241                    };
242
243                    self.semantic_tokens.push(SemanticToken {
244                        location: node.location,
245                        token_type,
246                        modifiers: if is_builtin_function(name) && !is_control_keyword(name) {
247                            vec![SemanticTokenModifier::DefaultLibrary]
248                        } else {
249                            vec![]
250                        },
251                    });
252
253                    // Add hover for built-ins
254                    if let Some(doc) = get_builtin_documentation(name) {
255                        let hover = HoverInfo {
256                            signature: doc.signature.to_string(),
257                            documentation: Some(doc.description.to_string()),
258                            details: vec![],
259                        };
260
261                        self.hover_info.insert(node.location, hover);
262                    }
263                }
264
265                // Name is already a string, not a node
266                for arg in args {
267                    self.analyze_node(arg, scope_id);
268                }
269            }
270
271            NodeKind::Package { name, block, name_span: _ } => {
272                self.semantic_tokens.push(SemanticToken {
273                    location: node.location,
274                    token_type: SemanticTokenType::Namespace,
275                    modifiers: vec![SemanticTokenModifier::Declaration],
276                });
277
278                // Try POD docs first, then fall back to leading comments
279                let documentation = self
280                    .extract_pod_name_section(name)
281                    .or_else(|| self.extract_documentation(node.location.start));
282
283                let hover = HoverInfo {
284                    signature: format!("package {}", name),
285                    documentation,
286                    details: vec![],
287                };
288
289                self.hover_info.insert(node.location, hover);
290
291                if let Some(block_node) = block {
292                    let package_scope = self.get_scope_for(node, ScopeKind::Package);
293                    self.analyze_node(block_node, package_scope);
294                }
295            }
296
297            NodeKind::String { value: _, interpolated: _ } => {
298                self.semantic_tokens.push(SemanticToken {
299                    location: node.location,
300                    token_type: SemanticTokenType::String,
301                    modifiers: vec![],
302                });
303            }
304
305            NodeKind::Number { value: _ } => {
306                self.semantic_tokens.push(SemanticToken {
307                    location: node.location,
308                    token_type: SemanticTokenType::Number,
309                    modifiers: vec![],
310                });
311            }
312
313            NodeKind::Regex { .. } => {
314                self.semantic_tokens.push(SemanticToken {
315                    location: node.location,
316                    token_type: SemanticTokenType::Regex,
317                    modifiers: vec![],
318                });
319            }
320
321            NodeKind::Match { expr, .. } => {
322                self.semantic_tokens.push(SemanticToken {
323                    location: node.location,
324                    token_type: SemanticTokenType::Regex,
325                    modifiers: vec![],
326                });
327                self.analyze_node(expr, scope_id);
328            }
329            NodeKind::Substitution { expr, .. } => {
330                // Substitution operator: s/// - add semantic token for the operator
331                self.semantic_tokens.push(SemanticToken {
332                    location: node.location,
333                    token_type: SemanticTokenType::Operator,
334                    modifiers: vec![],
335                });
336                self.analyze_node(expr, scope_id);
337            }
338            NodeKind::Transliteration { expr, .. } => {
339                // Transliteration operator: tr/// or y/// - add semantic token for the operator
340                self.semantic_tokens.push(SemanticToken {
341                    location: node.location,
342                    token_type: SemanticTokenType::Operator,
343                    modifiers: vec![],
344                });
345                self.analyze_node(expr, scope_id);
346            }
347
348            NodeKind::LabeledStatement { label: _, statement } => {
349                self.semantic_tokens.push(SemanticToken {
350                    location: node.location,
351                    token_type: SemanticTokenType::Label,
352                    modifiers: vec![],
353                });
354
355                {
356                    self.analyze_node(statement, scope_id);
357                }
358            }
359
360            // Control flow keywords
361            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
362                self.analyze_node(condition, scope_id);
363                self.analyze_node(then_branch, scope_id);
364                for (elsif_cond, elsif_branch) in elsif_branches {
365                    self.analyze_node(elsif_cond, scope_id);
366                    self.analyze_node(elsif_branch, scope_id);
367                }
368                if let Some(else_node) = else_branch {
369                    self.analyze_node(else_node, scope_id);
370                }
371            }
372
373            NodeKind::While { condition, body, continue_block: _ } => {
374                self.analyze_node(condition, scope_id);
375                self.analyze_node(body, scope_id);
376            }
377
378            NodeKind::For { init, condition, update, body, .. } => {
379                if let Some(init_node) = init {
380                    self.analyze_node(init_node, scope_id);
381                }
382                if let Some(cond_node) = condition {
383                    self.analyze_node(cond_node, scope_id);
384                }
385                if let Some(update_node) = update {
386                    self.analyze_node(update_node, scope_id);
387                }
388                self.analyze_node(body, scope_id);
389            }
390
391            NodeKind::Foreach { variable, list, body, continue_block } => {
392                self.analyze_node(variable, scope_id);
393                self.analyze_node(list, scope_id);
394                self.analyze_node(body, scope_id);
395                if let Some(cb) = continue_block {
396                    self.analyze_node(cb, scope_id);
397                }
398            }
399
400            // Recursively analyze other nodes
401            NodeKind::Block { statements } => {
402                for stmt in statements {
403                    self.analyze_node(stmt, scope_id);
404                }
405            }
406
407            NodeKind::Binary { left, right, .. } => {
408                self.analyze_node(left, scope_id);
409                self.analyze_node(right, scope_id);
410            }
411
412            NodeKind::Assignment { lhs, rhs, .. } => {
413                self.analyze_node(lhs, scope_id);
414                self.analyze_node(rhs, scope_id);
415            }
416
417            // Phase 1: Critical LSP Features (Issue #188)
418            NodeKind::VariableListDeclaration {
419                declarator,
420                variables,
421                attributes,
422                initializer,
423            } => {
424                // Handle multi-variable declarations like: my ($x, $y, $z) = (1, 2, 3);
425                for var in variables {
426                    if let NodeKind::Variable { sigil, name } = &var.kind {
427                        let token_type = match declarator.as_str() {
428                            "my" | "state" => SemanticTokenType::VariableDeclaration,
429                            "our" => SemanticTokenType::Variable,
430                            "local" => SemanticTokenType::Variable,
431                            _ => SemanticTokenType::Variable,
432                        };
433
434                        let mut modifiers = vec![SemanticTokenModifier::Declaration];
435                        if declarator == "state" || attributes.iter().any(|a| a == ":shared") {
436                            modifiers.push(SemanticTokenModifier::Static);
437                        }
438
439                        self.semantic_tokens.push(SemanticToken {
440                            location: var.location,
441                            token_type,
442                            modifiers,
443                        });
444
445                        // Add hover info
446                        let hover = HoverInfo {
447                            signature: format!("{} {}{}", declarator, sigil, name),
448                            documentation: self.extract_documentation(var.location.start),
449                            details: if attributes.is_empty() {
450                                vec![]
451                            } else {
452                                vec![format!("Attributes: {}", attributes.join(", "))]
453                            },
454                        };
455
456                        self.hover_info.insert(var.location, hover);
457                    }
458                }
459
460                if let Some(init) = initializer {
461                    self.analyze_node(init, scope_id);
462                }
463            }
464
465            NodeKind::Ternary { condition, then_expr, else_expr } => {
466                // Handle conditional expressions: $x ? $y : $z
467                self.analyze_node(condition, scope_id);
468                self.analyze_node(then_expr, scope_id);
469                self.analyze_node(else_expr, scope_id);
470            }
471
472            NodeKind::ArrayLiteral { elements } => {
473                // Handle array constructors: [1, 2, 3, 4]
474                for elem in elements {
475                    self.analyze_node(elem, scope_id);
476                }
477            }
478
479            NodeKind::HashLiteral { pairs } => {
480                // Handle hash constructors: { key1 => "value1", key2 => "value2" }
481                for (key, value) in pairs {
482                    self.analyze_node(key, scope_id);
483                    self.analyze_node(value, scope_id);
484                }
485            }
486
487            NodeKind::Try { body, catch_blocks, finally_block } => {
488                // Handle try/catch error handling
489                self.analyze_node(body, scope_id);
490
491                for (_var, catch_body) in catch_blocks {
492                    // Note: var is just a String (variable name), not a Node
493                    self.analyze_node(catch_body, scope_id);
494                }
495
496                if let Some(finally) = finally_block {
497                    self.analyze_node(finally, scope_id);
498                }
499            }
500
501            NodeKind::PhaseBlock { phase: _, phase_span: _, block } => {
502                // Handle BEGIN/END/INIT/CHECK/UNITCHECK blocks
503                self.semantic_tokens.push(SemanticToken {
504                    location: node.location,
505                    token_type: SemanticTokenType::Keyword,
506                    modifiers: vec![],
507                });
508
509                self.analyze_node(block, scope_id);
510            }
511
512            NodeKind::ExpressionStatement { expression } => {
513                // Handle expression statements: $x + 10;
514                // Just delegate to the wrapped expression
515                self.analyze_node(expression, scope_id);
516            }
517
518            NodeKind::Do { block } => {
519                // Handle do blocks: do { ... }
520                // Do blocks create expression context but maintain scope
521                self.analyze_node(block, scope_id);
522            }
523
524            NodeKind::Eval { block } => {
525                // Handle eval blocks: eval { dangerous_operation(); }
526                self.semantic_tokens.push(SemanticToken {
527                    location: node.location,
528                    token_type: SemanticTokenType::Keyword,
529                    modifiers: vec![],
530                });
531
532                // Eval blocks should create a new scope for error isolation
533                self.analyze_node(block, scope_id);
534            }
535
536            NodeKind::Defer { block } => {
537                // defer { } blocks run on scope exit; analyze the block for symbol resolution
538                self.semantic_tokens.push(SemanticToken {
539                    location: node.location,
540                    token_type: SemanticTokenType::Keyword,
541                    modifiers: vec![],
542                });
543                self.analyze_node(block, scope_id);
544            }
545
546            NodeKind::VariableWithAttributes { variable, attributes } => {
547                // Handle attributed variables: my $x :shared = 42;
548                // Analyze the base variable node
549                self.analyze_node(variable, scope_id);
550
551                // Add modifier tokens for special attributes
552                if attributes.iter().any(|a| a == ":shared" || a == ":lvalue") {
553                    // The variable node was already processed, so we just note the attributes
554                    // in the hover info (if we need to enhance it later)
555                }
556            }
557
558            NodeKind::Unary { op, operand } => {
559                // Handle unary operators: -$x, !$x, ++$x, $x++
560                // Add token for the operator itself (if needed for highlighting)
561                if matches!(op.as_str(), "++" | "--" | "!" | "-" | "~" | "\\") {
562                    self.semantic_tokens.push(SemanticToken {
563                        location: node.location,
564                        token_type: SemanticTokenType::Operator,
565                        modifiers: vec![],
566                    });
567                }
568
569                // Handle file test operators: -e, -d, -f, -r, -w, -x, -s, -z, -T, -B, etc.
570                if is_file_test_operator(op) {
571                    self.semantic_tokens.push(SemanticToken {
572                        location: node.location,
573                        token_type: SemanticTokenType::Operator,
574                        modifiers: vec![],
575                    });
576                }
577
578                self.analyze_node(operand, scope_id);
579            }
580
581            NodeKind::Readline { filehandle } => {
582                // Handle readline/diamond operator: <STDIN>, <$fh>, <>
583                self.semantic_tokens.push(SemanticToken {
584                    location: node.location,
585                    token_type: SemanticTokenType::Operator, // diamond operator is an I/O operator
586                    modifiers: vec![],
587                });
588
589                // Add hover info for common filehandles
590                if let Some(fh) = filehandle {
591                    let hover = HoverInfo {
592                        signature: format!("<{}>", fh),
593                        documentation: match fh.as_str() {
594                            "STDIN" => Some("Standard input filehandle".to_string()),
595                            "STDOUT" => Some("Standard output filehandle".to_string()),
596                            "STDERR" => Some("Standard error filehandle".to_string()),
597                            _ => Some(format!("Read from filehandle {}", fh)),
598                        },
599                        details: vec![],
600                    };
601                    self.hover_info.insert(node.location, hover);
602                } else {
603                    // Bare <> reads from ARGV or STDIN
604                    let hover = HoverInfo {
605                        signature: "<>".to_string(),
606                        documentation: Some("Read from command-line files or STDIN".to_string()),
607                        details: vec![],
608                    };
609                    self.hover_info.insert(node.location, hover);
610                }
611            }
612
613            // Phase 2/3 Handlers
614            NodeKind::MethodCall { object, method, args } => {
615                self.analyze_node(object, scope_id);
616
617                if let Some(offset) =
618                    self.find_substring_in_source_after(node, method, object.location.end)
619                {
620                    self.semantic_tokens.push(SemanticToken {
621                        location: SourceLocation { start: offset, end: offset + method.len() },
622                        token_type: SemanticTokenType::Method,
623                        modifiers: vec![],
624                    });
625                }
626
627                for arg in args {
628                    self.analyze_node(arg, scope_id);
629                }
630            }
631
632            NodeKind::IndirectCall { method, object, args } => {
633                if let Some(offset) = self.find_method_name_in_source(node, method) {
634                    self.semantic_tokens.push(SemanticToken {
635                        location: SourceLocation { start: offset, end: offset + method.len() },
636                        token_type: SemanticTokenType::Method,
637                        modifiers: vec![],
638                    });
639                }
640                self.analyze_node(object, scope_id);
641                for arg in args {
642                    self.analyze_node(arg, scope_id);
643                }
644            }
645
646            NodeKind::Use { module, args, .. } => {
647                self.semantic_tokens.push(SemanticToken {
648                    location: SourceLocation {
649                        start: node.location.start,
650                        end: node.location.start + 3,
651                    },
652                    token_type: SemanticTokenType::Keyword,
653                    modifiers: vec![],
654                });
655
656                let mut args_start = node.location.start + 3;
657                if let Some(offset) = self.find_substring_in_source(node, module) {
658                    self.semantic_tokens.push(SemanticToken {
659                        location: SourceLocation { start: offset, end: offset + module.len() },
660                        token_type: SemanticTokenType::Namespace,
661                        modifiers: vec![],
662                    });
663                    args_start = offset + module.len();
664                }
665
666                self.analyze_string_args(node, args, args_start);
667            }
668
669            NodeKind::No { module, args, .. } => {
670                self.semantic_tokens.push(SemanticToken {
671                    location: SourceLocation {
672                        start: node.location.start,
673                        end: node.location.start + 2,
674                    },
675                    token_type: SemanticTokenType::Keyword,
676                    modifiers: vec![],
677                });
678
679                let mut args_start = node.location.start + 2;
680                if let Some(offset) = self.find_substring_in_source(node, module) {
681                    self.semantic_tokens.push(SemanticToken {
682                        location: SourceLocation { start: offset, end: offset + module.len() },
683                        token_type: SemanticTokenType::Namespace,
684                        modifiers: vec![],
685                    });
686                    args_start = offset + module.len();
687                }
688
689                self.analyze_string_args(node, args, args_start);
690            }
691
692            NodeKind::Given { expr, body } => {
693                self.semantic_tokens.push(SemanticToken {
694                    location: SourceLocation {
695                        start: node.location.start,
696                        end: node.location.start + 5,
697                    }, // given
698                    token_type: SemanticTokenType::KeywordControl,
699                    modifiers: vec![],
700                });
701                self.analyze_node(expr, scope_id);
702                self.analyze_node(body, scope_id);
703            }
704
705            NodeKind::When { condition, body } => {
706                self.semantic_tokens.push(SemanticToken {
707                    location: SourceLocation {
708                        start: node.location.start,
709                        end: node.location.start + 4,
710                    }, // when
711                    token_type: SemanticTokenType::KeywordControl,
712                    modifiers: vec![],
713                });
714                self.analyze_node(condition, scope_id);
715                self.analyze_node(body, scope_id);
716            }
717
718            NodeKind::Default { body } => {
719                self.semantic_tokens.push(SemanticToken {
720                    location: SourceLocation {
721                        start: node.location.start,
722                        end: node.location.start + 7,
723                    }, // default
724                    token_type: SemanticTokenType::KeywordControl,
725                    modifiers: vec![],
726                });
727                self.analyze_node(body, scope_id);
728            }
729
730            NodeKind::Return { value } => {
731                self.semantic_tokens.push(SemanticToken {
732                    location: SourceLocation {
733                        start: node.location.start,
734                        end: node.location.start + 6,
735                    }, // return
736                    token_type: SemanticTokenType::KeywordControl,
737                    modifiers: vec![],
738                });
739                if let Some(v) = value {
740                    self.analyze_node(v, scope_id);
741                }
742            }
743
744            NodeKind::Class { name, body, .. } => {
745                self.semantic_tokens.push(SemanticToken {
746                    location: SourceLocation {
747                        start: node.location.start,
748                        end: node.location.start + 5,
749                    }, // class
750                    token_type: SemanticTokenType::Keyword,
751                    modifiers: vec![],
752                });
753
754                if let Some(offset) = self.find_substring_in_source(node, name) {
755                    self.semantic_tokens.push(SemanticToken {
756                        location: SourceLocation { start: offset, end: offset + name.len() },
757                        token_type: SemanticTokenType::Class,
758                        modifiers: vec![SemanticTokenModifier::Declaration],
759                    });
760                }
761
762                let class_scope = self.get_scope_for(node, ScopeKind::Package);
763                self.analyze_node(body, class_scope);
764            }
765
766            NodeKind::Signature { parameters } => {
767                for param in parameters {
768                    self.analyze_node(param, scope_id);
769                }
770            }
771
772            NodeKind::MandatoryParameter { variable }
773            | NodeKind::OptionalParameter { variable, .. }
774            | NodeKind::SlurpyParameter { variable }
775            | NodeKind::NamedParameter { variable } => {
776                self.analyze_node(variable, scope_id);
777            }
778
779            NodeKind::Diamond | NodeKind::Ellipsis => {
780                self.semantic_tokens.push(SemanticToken {
781                    location: node.location,
782                    token_type: SemanticTokenType::Operator,
783                    modifiers: vec![],
784                });
785            }
786
787            NodeKind::Undef => {
788                self.semantic_tokens.push(SemanticToken {
789                    location: node.location,
790                    token_type: SemanticTokenType::Keyword,
791                    modifiers: vec![],
792                });
793            }
794
795            NodeKind::Identifier { .. } => {
796                // Bareword identifiers, usually left to lexical highlighting
797                // but we handle them to avoid the default case.
798            }
799
800            NodeKind::Heredoc { .. } => {
801                self.semantic_tokens.push(SemanticToken {
802                    location: node.location,
803                    token_type: SemanticTokenType::String,
804                    modifiers: vec![],
805                });
806            }
807
808            NodeKind::Glob { .. } => {
809                self.semantic_tokens.push(SemanticToken {
810                    location: node.location,
811                    token_type: SemanticTokenType::Operator,
812                    modifiers: vec![],
813                });
814            }
815
816            NodeKind::DataSection { .. } => {
817                self.semantic_tokens.push(SemanticToken {
818                    location: node.location,
819                    token_type: SemanticTokenType::Comment,
820                    modifiers: vec![],
821                });
822            }
823
824            NodeKind::Prototype { .. } => {
825                self.semantic_tokens.push(SemanticToken {
826                    location: node.location,
827                    token_type: SemanticTokenType::Punctuation,
828                    modifiers: vec![],
829                });
830            }
831
832            NodeKind::Typeglob { .. } => {
833                self.semantic_tokens.push(SemanticToken {
834                    location: node.location,
835                    token_type: SemanticTokenType::Variable,
836                    modifiers: vec![],
837                });
838            }
839
840            NodeKind::Untie { variable } => {
841                self.analyze_node(variable, scope_id);
842            }
843
844            NodeKind::LoopControl { .. } => {
845                self.semantic_tokens.push(SemanticToken {
846                    location: node.location,
847                    token_type: SemanticTokenType::KeywordControl,
848                    modifiers: vec![],
849                });
850            }
851
852            NodeKind::Goto { target } => {
853                self.semantic_tokens.push(SemanticToken {
854                    location: node.location,
855                    token_type: SemanticTokenType::KeywordControl,
856                    modifiers: vec![],
857                });
858                self.analyze_node(target, scope_id);
859            }
860
861            NodeKind::MissingExpression
862            | NodeKind::MissingStatement
863            | NodeKind::MissingIdentifier
864            | NodeKind::MissingBlock => {
865                // No tokens for missing constructs
866            }
867
868            NodeKind::Tie { variable, package, args } => {
869                self.analyze_node(variable, scope_id);
870                self.analyze_node(package, scope_id);
871                for arg in args {
872                    self.analyze_node(arg, scope_id);
873                }
874            }
875
876            NodeKind::StatementModifier { statement, condition, modifier } => {
877                // Handle postfix loop modifiers: for, while, until, foreach
878                // e.g., print $_ for @list; or $x++ while $x < 10;
879                if matches!(modifier.as_str(), "for" | "foreach" | "while" | "until") {
880                    self.semantic_tokens.push(SemanticToken {
881                        location: node.location,
882                        token_type: SemanticTokenType::KeywordControl,
883                        modifiers: vec![],
884                    });
885                }
886                self.analyze_node(statement, scope_id);
887                self.analyze_node(condition, scope_id);
888            }
889
890            NodeKind::Format { name, .. } => {
891                self.semantic_tokens.push(SemanticToken {
892                    location: node.location,
893                    token_type: SemanticTokenType::FunctionDeclaration,
894                    modifiers: vec![SemanticTokenModifier::Declaration],
895                });
896
897                let hover = HoverInfo {
898                    signature: format!("format {} =", name),
899                    documentation: None,
900                    details: vec![],
901                };
902                self.hover_info.insert(node.location, hover);
903            }
904
905            NodeKind::Error { .. } | NodeKind::UnknownRest => {
906                // No semantic tokens for error nodes
907            }
908        }
909    }
910
911    /// Extract documentation (POD or comments) immediately preceding a
912    /// position.
913    ///
914    /// The returned string is trimmed and corresponds to whichever of POD
915    /// or comments is found first at the very end of `source[..start]`. If
916    /// both kinds appear earlier in the source but neither is *immediately
917    /// before* `start`, this returns `None` — anchoring is intentional so
918    /// that documentation blocks belonging to one declaration do not bleed
919    /// into a later, unrelated declaration.
920    ///
921    /// Anchoring uses `\z` (absolute end of string) rather than `$`. With
922    /// the `m` flag, `$` matches at the end of every line, which made the
923    /// previous regex match POD blocks anywhere in `before` and leak them
924    /// into hover docs for unrelated subs that followed.
925    pub(super) fn extract_documentation(&self, start: usize) -> Option<String> {
926        static POD_RE: OnceLock<Result<Regex, regex::Error>> = OnceLock::new();
927        static COMMENT_RE: OnceLock<Result<Regex, regex::Error>> = OnceLock::new();
928
929        if self.source.is_empty() || start > self.source.len() {
930            return None;
931        }
932        let before = &self.source[..start];
933
934        // Check for POD blocks ending with =cut, anchored at end of string.
935        let pod_re = POD_RE
936            .get_or_init(|| Regex::new(r"(?s)(=[a-zA-Z0-9].*?\r?\n=cut(?:\r?\n)?)\s*\z"))
937            .as_ref()
938            .ok()?;
939        if let Some(caps) = pod_re.captures(before) {
940            if let Some(pod_text) = caps.get(1) {
941                return Some(pod_text.as_str().trim().to_string());
942            }
943        }
944
945        // Check for consecutive comment lines, anchored at end of string.
946        let comment_re =
947            COMMENT_RE.get_or_init(|| Regex::new(r"(?m)(#.*\r?\n)+[\t ]*\z")).as_ref().ok()?;
948        if let Some(caps) = comment_re.captures(before) {
949            if let Some(comment_match) = caps.get(0) {
950                // Strip the # prefix from each comment line
951                let doc = comment_match
952                    .as_str()
953                    .lines()
954                    .map(|line| line.trim_start_matches('#').trim())
955                    .filter(|line| !line.is_empty())
956                    .collect::<Vec<_>>()
957                    .join(" ");
958                return Some(doc);
959            }
960        }
961
962        None
963    }
964
965    /// Extract documentation for a subroutine or method, falling back to
966    /// inline POD blocks inside the body when no leading docs are found.
967    ///
968    /// Resolution order:
969    /// 1. Leading docs (POD or comments) immediately preceding `start` —
970    ///    matches `extract_documentation` and preserves the existing
971    ///    "explicit author intent wins" precedence.
972    /// 2. The first POD block inside `body` (between `body.location.start`
973    ///    and `body.location.end`). This handles the inline-POD style of
974    ///    documenting a sub from within its body, e.g.:
975    ///
976    /// ```perl
977    /// sub process_data {
978    ///     =pod
979    ///     Internal documentation for this sub
980    ///     =cut
981    ///     ...
982    /// }
983    /// ```
984    pub(super) fn extract_sub_documentation(&self, start: usize, body: &Node) -> Option<String> {
985        self.extract_documentation(start).or_else(|| self.find_pod_in_node_body(body))
986    }
987
988    /// Find the first POD block inside the source range covered by `body`.
989    ///
990    /// Matches any POD block (`=pod`, `=head1`, `=item`, etc.) that ends
991    /// with a `=cut` directive. The returned string is trimmed of
992    /// surrounding whitespace and includes the opening directive through
993    /// the closing `=cut`, mirroring the format produced by
994    /// `extract_documentation` for leading POD blocks.
995    ///
996    /// **Deliberate divergence from perlpod:** the regex allows optional
997    /// leading whitespace (`^\s*`) before the opening POD directive.  Per
998    /// [perlpod](https://perldoc.perl.org/perlpod), POD directives must
999    /// begin at column 0 — `perl` itself silently ignores lines like
1000    /// `    =pod`.  The LSP intentionally relaxes this rule so that authors
1001    /// who indent `=pod` inside a sub body (a common editor style) still
1002    /// get hover documentation.  This is a UX choice: we surface what the
1003    /// author *wrote* as documentation rather than enforcing the strict
1004    /// perlpod column-0 rule.  See issue #4599 for the decision record.
1005    pub(super) fn find_pod_in_node_body(&self, body: &Node) -> Option<String> {
1006        static BODY_POD_RE: OnceLock<Result<Regex, regex::Error>> = OnceLock::new();
1007
1008        let start = body.location.start;
1009        let end = body.location.end;
1010        if self.source.is_empty() || end <= start || end > self.source.len() {
1011            return None;
1012        }
1013        let body_src = &self.source[start..end];
1014
1015        let pod_re = BODY_POD_RE
1016            .get_or_init(|| Regex::new(r"(?ms)^\s*(=[a-zA-Z0-9].*?\r?\n=cut)\b"))
1017            .as_ref()
1018            .ok()?;
1019        let caps = pod_re.captures(body_src)?;
1020        let pod_text = caps.get(1)?.as_str().trim().to_string();
1021        Some(pod_text)
1022    }
1023
1024    /// Extract the POD `=head1 NAME` section for a package.
1025    ///
1026    /// Scans the entire source for a `=head1 NAME` POD section and returns
1027    /// its content if it mentions the given package name.
1028    pub(super) fn extract_pod_name_section(&self, package_name: &str) -> Option<String> {
1029        if self.source.is_empty() {
1030            return None;
1031        }
1032
1033        let mut in_name_section = false;
1034        let mut name_lines: Vec<&str> = Vec::new();
1035
1036        for line in self.source.lines() {
1037            let trimmed: &str = line.trim();
1038            if trimmed.starts_with("=head1") {
1039                if in_name_section {
1040                    break;
1041                }
1042                let heading = trimmed.strip_prefix("=head1").map(|s: &str| s.trim());
1043                if heading == Some("NAME") {
1044                    in_name_section = true;
1045                    continue;
1046                }
1047            } else if trimmed.starts_with("=cut") && in_name_section {
1048                break;
1049            } else if trimmed.starts_with('=') && in_name_section {
1050                break;
1051            } else if in_name_section && !trimmed.is_empty() {
1052                name_lines.push(trimmed);
1053            }
1054        }
1055
1056        if !name_lines.is_empty() {
1057            let name_doc = name_lines.join(" ");
1058            if name_doc.contains(package_name)
1059                || name_doc.contains(&package_name.replace("::", "-"))
1060            {
1061                return Some(name_doc);
1062            }
1063        }
1064
1065        None
1066    }
1067
1068    /// Get scope id for a node by consulting the symbol table
1069    pub(super) fn get_scope_for(&self, node: &Node, kind: ScopeKind) -> ScopeId {
1070        for scope in self.symbol_table.scopes.values() {
1071            if scope.kind == kind
1072                && scope.location.start == node.location.start
1073                && scope.location.end == node.location.end
1074            {
1075                return scope.id;
1076            }
1077        }
1078        0
1079    }
1080
1081    /// Get line number from byte offset (simplified version)
1082    pub(super) fn line_number(&self, offset: usize) -> usize {
1083        if self.source.is_empty() { 1 } else { self.source[..offset].lines().count() + 1 }
1084    }
1085
1086    /// Find substring in source within node's range
1087    pub(super) fn find_substring_in_source(&self, node: &Node, substring: &str) -> Option<usize> {
1088        if self.source.len() < node.location.end {
1089            return None;
1090        }
1091        let node_text = &self.source[node.location.start..node.location.end];
1092        if let Some(pos) = node_text.find(substring) {
1093            return Some(node.location.start + pos);
1094        }
1095        None
1096    }
1097
1098    /// Find method name in source within node's range
1099    pub(super) fn find_method_name_in_source(
1100        &self,
1101        node: &Node,
1102        method_name: &str,
1103    ) -> Option<usize> {
1104        self.find_substring_in_source(node, method_name)
1105    }
1106
1107    /// Find substring in source within node's range, starting search after a specific absolute offset
1108    pub(super) fn find_substring_in_source_after(
1109        &self,
1110        node: &Node,
1111        substring: &str,
1112        after: usize,
1113    ) -> Option<usize> {
1114        if self.source.len() < node.location.end || after >= node.location.end {
1115            return None;
1116        }
1117
1118        let start_rel = after.saturating_sub(node.location.start);
1119
1120        let node_text = &self.source[node.location.start..node.location.end];
1121        if start_rel >= node_text.len() {
1122            return None;
1123        }
1124
1125        let text_to_search = &node_text[start_rel..];
1126        if let Some(pos) = text_to_search.find(substring) {
1127            return Some(node.location.start + start_rel + pos);
1128        }
1129        None
1130    }
1131
1132    /// Analyze string arguments for highlighting (e.g. in use/no statements)
1133    pub(super) fn analyze_string_args(
1134        &mut self,
1135        node: &Node,
1136        args: &[String],
1137        start_offset: usize,
1138    ) {
1139        let mut current_offset = start_offset;
1140        for arg in args {
1141            if let Some(offset) = self.find_substring_in_source_after(node, arg, current_offset) {
1142                self.semantic_tokens.push(SemanticToken {
1143                    location: SourceLocation { start: offset, end: offset + arg.len() },
1144                    token_type: SemanticTokenType::String,
1145                    modifiers: vec![],
1146                });
1147                current_offset = offset + arg.len();
1148            }
1149        }
1150    }
1151
1152    /// Infer the type of a node based on its context and initialization.
1153    ///
1154    /// Provides basic type inference for Perl expressions to enhance hover
1155    /// information with derived type information. Supports common patterns:
1156    /// - Literal values (numbers, strings, arrays, hashes)
1157    /// - Variable references (looks up declaration)
1158    /// - Function calls (basic return type hints)
1159    ///
1160    /// In the semantic workflow (Parse -> Index -> Analyze), this method runs
1161    /// during the Analyze stage and consumes symbols produced during Index.
1162    ///
1163    /// # Arguments
1164    ///
1165    /// * `node` - The AST node to infer type for
1166    ///
1167    /// # Returns
1168    ///
1169    /// A string describing the inferred type, or None if type cannot be determined
1170    pub fn infer_type(&self, node: &Node) -> Option<String> {
1171        match &node.kind {
1172            NodeKind::Number { .. } => Some("number".to_string()),
1173            NodeKind::String { .. } => Some("string".to_string()),
1174            NodeKind::ArrayLiteral { .. } => Some("array".to_string()),
1175            NodeKind::HashLiteral { .. } => Some("hash".to_string()),
1176
1177            NodeKind::Variable { sigil, name } => {
1178                // Look up the variable in the symbol table
1179                let kind = match sigil.as_str() {
1180                    "$" => SymbolKind::scalar(),
1181                    "@" => SymbolKind::array(),
1182                    "%" => SymbolKind::hash(),
1183                    _ => return None,
1184                };
1185
1186                let symbols = self.symbol_table.find_symbol(name, 0, kind);
1187                symbols.first()?;
1188
1189                // Return the basic type based on sigil
1190                match sigil.as_str() {
1191                    "$" => Some("scalar".to_string()),
1192                    "@" => Some("array".to_string()),
1193                    "%" => Some("hash".to_string()),
1194                    _ => None,
1195                }
1196            }
1197
1198            NodeKind::FunctionCall { name, .. } => {
1199                // Basic return type inference for built-in functions
1200                match name.as_str() {
1201                    "scalar" => Some("scalar".to_string()),
1202                    "ref" => Some("string".to_string()),
1203                    "length" | "index" | "rindex" => Some("number".to_string()),
1204                    "split" => Some("array".to_string()),
1205                    "keys" | "values" => Some("array".to_string()),
1206                    _ => None,
1207                }
1208            }
1209
1210            NodeKind::Binary { op, .. } => {
1211                // Infer based on operator
1212                match op.as_str() {
1213                    "+" | "-" | "*" | "/" | "%" | "**" => Some("number".to_string()),
1214                    "." | "x" => Some("string".to_string()),
1215                    "==" | "!=" | "<" | ">" | "<=" | ">=" | "eq" | "ne" | "lt" | "gt" | "le"
1216                    | "ge" => Some("boolean".to_string()),
1217                    _ => None,
1218                }
1219            }
1220
1221            _ => None,
1222        }
1223    }
1224}
1225
1226/// Build a parenthesised parameter list string from a `Signature` AST node.
1227///
1228/// Extracts each parameter variable's sigil and name and joins them with
1229/// ", ".  Returns `"(...)"` as a safe fallback for any unrecognised structure.
1230fn format_signature_params(sig_node: &Node) -> String {
1231    let NodeKind::Signature { parameters } = &sig_node.kind else {
1232        return "(...)".to_string();
1233    };
1234
1235    let labels: Vec<String> = parameters
1236        .iter()
1237        .filter_map(|param| {
1238            let var = match &param.kind {
1239                NodeKind::MandatoryParameter { variable }
1240                | NodeKind::OptionalParameter { variable, .. }
1241                | NodeKind::SlurpyParameter { variable }
1242                | NodeKind::NamedParameter { variable } => variable.as_ref(),
1243                NodeKind::Variable { .. } => param,
1244                _ => return None,
1245            };
1246            if let NodeKind::Variable { sigil, name } = &var.kind {
1247                Some(format!("{}{}", sigil, name))
1248            } else {
1249                None
1250            }
1251        })
1252        .collect();
1253
1254    format!("({})", labels.join(", "))
1255}