Skip to main content

perl_ast/
ast.rs

1//! Abstract Syntax Tree definitions for Perl within the parsing and LSP workflow.
2//!
3//! This module defines the comprehensive AST node types that represent parsed Perl code
4//! during the Parse → Index → Navigate → Complete → Analyze stages. The design is optimized
5//! for both direct use in Rust analysis and for generating tree-sitter compatible
6//! S-expressions during large workspace processing operations.
7//!
8//! # LSP Workflow Integration
9//!
10//! The AST structures support Perl tooling workflows by:
11//! - **Parse**: Produced by the parser as the canonical syntax tree
12//! - **Index**: Traversed to build symbol and reference tables
13//! - **Navigate**: Provides locations for definition and reference lookups
14//! - **Complete**: Supplies context for completion, hover, and signature help
15//! - **Analyze**: Feeds semantic analysis, diagnostics, and refactoring
16//!
17//! # Performance Characteristics
18//!
19//! AST structures are optimized for large codebases with:
20//! - Memory-efficient node representation using `Box<Node>` for recursive structures
21//! - Fast pattern matching via enum variants for common Perl constructs
22//! - Location tracking for precise error reporting in large files
23//! - Cheap cloning for parallel analysis tasks
24//!
25//! # Usage Examples
26//!
27//! ## Basic AST Construction
28//!
29//! ```rust
30//! use perl_ast::{Node, NodeKind, SourceLocation};
31//!
32//! // Create a simple variable declaration node
33//! let location = SourceLocation { start: 0, end: 10 };
34//! let node = Node::new(
35//!     NodeKind::VariableDeclaration {
36//!         declarator: "my".to_string(),
37//!         variable: Box::new(Node::new(
38//!             NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
39//!             location,
40//!         )),
41//!         attributes: vec![],
42//!         initializer: None,
43//!     },
44//!     location,
45//! );
46//! assert_eq!(node.kind.kind_name(), "VariableDeclaration");
47//! ```
48//!
49//! ## Tree-sitter S-expression Generation
50//!
51//! ```rust
52//! use perl_ast::{Node, NodeKind, SourceLocation};
53//!
54//! let loc = SourceLocation { start: 0, end: 2 };
55//! let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
56//! let program = Node::new(NodeKind::Program { statements: vec![num] }, loc);
57//!
58//! let sexp = program.to_sexp();
59//! assert!(sexp.starts_with("(source_file"));
60//! ```
61//!
62//! ## AST Traversal and Analysis
63//!
64//! ```rust
65//! use perl_ast::{Node, NodeKind, SourceLocation};
66//!
67//! fn count_variables(node: &Node) -> usize {
68//!     let mut count = 0;
69//!     match &node.kind {
70//!         NodeKind::Variable { .. } => count += 1,
71//!         NodeKind::Program { statements } => {
72//!             for stmt in statements {
73//!                 count += count_variables(stmt);
74//!             }
75//!         }
76//!         _ => {} // Handle other node types as needed
77//!     }
78//!     count
79//! }
80//!
81//! let loc = SourceLocation { start: 0, end: 5 };
82//! let var = Node::new(
83//!     NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
84//!     loc,
85//! );
86//! let program = Node::new(NodeKind::Program { statements: vec![var] }, loc);
87//! assert_eq!(count_variables(&program), 1);
88//! ```
89//!
90//! ## Parsing Integration
91//!
92//! In practice the AST is produced by the parser rather than built by hand
93//! (requires `perl-parser-core`):
94//!
95//! ```rust,ignore
96//! use perl_parser_core::Parser;
97//! use perl_ast::NodeKind;
98//!
99//! let mut parser = Parser::new("my $x = 42;");
100//! let ast = parser.parse().expect("should parse");
101//! assert!(matches!(ast.kind, NodeKind::Program { .. }));
102//! ```
103
104// Re-export SourceLocation from perl-position-tracking for unified span handling
105pub use perl_position_tracking::SourceLocation;
106// Re-export Token and TokenKind from perl-token for AST error nodes
107pub use perl_token::{Token, TokenKind};
108use std::fmt;
109
110/// Core AST node representing any Perl language construct within parsing workflows.
111///
112/// This is the fundamental building block for representing parsed Perl code. Each node
113/// contains both the semantic information (kind) and positional information (location)
114/// necessary for comprehensive script analysis.
115///
116/// # LSP Workflow Role
117///
118/// Nodes flow through tooling stages:
119/// - **Parse**: Created by the parser as it builds the syntax tree
120/// - **Index**: Visited to build symbol and reference tables
121/// - **Navigate**: Used to resolve definitions, references, and call hierarchy
122/// - **Complete**: Provides contextual information for completion and hover
123/// - **Analyze**: Drives semantic analysis and diagnostics
124///
125/// # Memory Optimization
126///
127/// The structure is designed for efficient memory usage during large-scale parsing:
128/// - `SourceLocation` uses compact position encoding for large files
129/// - `NodeKind` enum variants minimize memory overhead for common constructs
130/// - Clone operations are optimized for shared analysis workflows
131///
132/// # Examples
133///
134/// Construct a variable declaration node manually:
135///
136/// ```
137/// use perl_ast::{Node, NodeKind, SourceLocation};
138///
139/// let loc = SourceLocation { start: 0, end: 11 };
140/// let var = Node::new(
141///     NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
142///     loc,
143/// );
144/// let decl = Node::new(
145///     NodeKind::VariableDeclaration {
146///         declarator: "my".to_string(),
147///         variable: Box::new(var),
148///         attributes: vec![],
149///         initializer: None,
150///     },
151///     loc,
152/// );
153/// assert_eq!(decl.kind.kind_name(), "VariableDeclaration");
154/// ```
155///
156/// Typically you obtain nodes from the parser rather than constructing them by hand:
157///
158/// ```ignore
159/// use perl_parser::Parser;
160///
161/// let mut parser = Parser::new("my $x = 42;");
162/// let ast = parser.parse()?;
163/// println!("AST: {}", ast.to_sexp());
164/// ```
165#[derive(Debug, Clone, PartialEq)]
166pub struct Node {
167    /// The specific type and semantic content of this AST node
168    pub kind: NodeKind,
169    /// Source position information for error reporting and code navigation
170    pub location: SourceLocation,
171}
172
173impl Node {
174    /// Create a new AST node with the given kind and source location.
175    ///
176    /// # Examples
177    ///
178    /// ```
179    /// use perl_ast::{Node, NodeKind, SourceLocation};
180    ///
181    /// let node = Node::new(
182    ///     NodeKind::Number { value: "42".to_string() },
183    ///     SourceLocation { start: 0, end: 2 },
184    /// );
185    /// assert_eq!(node.kind.kind_name(), "Number");
186    /// assert_eq!(node.location.start, 0);
187    /// ```
188    pub fn new(kind: NodeKind, location: SourceLocation) -> Self {
189        Node { kind, location }
190    }
191
192    /// Convert the AST to a tree-sitter compatible S-expression.
193    ///
194    /// Produces a parenthesized representation compatible with tree-sitter's
195    /// S-expression format, useful for debugging and snapshot testing.
196    ///
197    /// # Examples
198    ///
199    /// ```
200    /// use perl_ast::{Node, NodeKind, SourceLocation};
201    ///
202    /// let loc = SourceLocation { start: 0, end: 2 };
203    /// let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
204    /// let program = Node::new(
205    ///     NodeKind::Program { statements: vec![num] },
206    ///     loc,
207    /// );
208    /// let sexp = program.to_sexp();
209    /// assert!(sexp.starts_with("(source_file"));
210    /// ```
211    pub fn to_sexp(&self) -> String {
212        match &self.kind {
213            NodeKind::Program { statements } => {
214                let stmts =
215                    statements.iter().map(|s| s.to_sexp_inner()).collect::<Vec<_>>().join(" ");
216                format!("(source_file {})", stmts)
217            }
218
219            NodeKind::ExpressionStatement { expression } => {
220                format!("(expression_statement {})", expression.to_sexp())
221            }
222
223            NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
224                let attrs_str = if attributes.is_empty() {
225                    String::new()
226                } else {
227                    format!(" (attributes {})", attributes.join(" "))
228                };
229                if let Some(init) = initializer {
230                    format!(
231                        "({}_declaration {}{}{})",
232                        declarator,
233                        variable.to_sexp(),
234                        attrs_str,
235                        init.to_sexp()
236                    )
237                } else {
238                    format!("({}_declaration {}{})", declarator, variable.to_sexp(), attrs_str)
239                }
240            }
241
242            NodeKind::VariableListDeclaration {
243                declarator,
244                variables,
245                attributes,
246                initializer,
247            } => {
248                let vars = variables.iter().map(|v| v.to_sexp()).collect::<Vec<_>>().join(" ");
249                let attrs_str = if attributes.is_empty() {
250                    String::new()
251                } else {
252                    format!(" (attributes {})", attributes.join(" "))
253                };
254                if let Some(init) = initializer {
255                    format!(
256                        "({}_declaration ({}){}{})",
257                        declarator,
258                        vars,
259                        attrs_str,
260                        init.to_sexp()
261                    )
262                } else {
263                    format!("({}_declaration ({}){})", declarator, vars, attrs_str)
264                }
265            }
266
267            NodeKind::Variable { sigil, name } => {
268                // Format expected by bless parsing tests: (variable $ name)
269                format!("(variable {} {})", sigil, name)
270            }
271
272            NodeKind::VariableWithAttributes { variable, attributes } => {
273                let attrs = attributes.join(" ");
274                format!("({} (attributes {}))", variable.to_sexp(), attrs)
275            }
276
277            NodeKind::Assignment { lhs, rhs, op } => {
278                format!(
279                    "(assignment_{} {} {})",
280                    op.replace("=", "assign"),
281                    lhs.to_sexp(),
282                    rhs.to_sexp()
283                )
284            }
285
286            NodeKind::Binary { op, left, right } => {
287                // Tree-sitter format: (binary_op left right)
288                let op_name = format_binary_operator(op);
289                format!("({} {} {})", op_name, left.to_sexp(), right.to_sexp())
290            }
291
292            NodeKind::Ternary { condition, then_expr, else_expr } => {
293                format!(
294                    "(ternary {} {} {})",
295                    condition.to_sexp(),
296                    then_expr.to_sexp(),
297                    else_expr.to_sexp()
298                )
299            }
300
301            NodeKind::Unary { op, operand } => {
302                // Tree-sitter format: (unary_op operand)
303                let op_name = format_unary_operator(op);
304                format!("({} {})", op_name, operand.to_sexp())
305            }
306
307            NodeKind::Diamond => "(diamond)".to_string(),
308
309            NodeKind::Ellipsis => "(ellipsis)".to_string(),
310
311            NodeKind::Undef => "(undef)".to_string(),
312
313            NodeKind::Readline { filehandle } => {
314                if let Some(fh) = filehandle {
315                    format!("(readline {})", fh)
316                } else {
317                    "(readline)".to_string()
318                }
319            }
320
321            NodeKind::Glob { pattern } => {
322                format!("(glob {})", pattern)
323            }
324            NodeKind::Typeglob { name } => {
325                format!("(typeglob {})", name)
326            }
327
328            NodeKind::Number { value } => {
329                // Format expected by bless parsing tests: (number value)
330                format!("(number {})", value)
331            }
332
333            NodeKind::String { value, interpolated } => {
334                // Escape quotes in string value to prevent S-expression parsing issues
335                let escaped_value = value.replace('\\', "\\\\").replace('"', "\\\"");
336
337                // Format based on interpolation status
338                if *interpolated {
339                    format!("(string_interpolated \"{}\")", escaped_value)
340                } else {
341                    format!("(string \"{}\")", escaped_value)
342                }
343            }
344
345            NodeKind::Heredoc { delimiter, content, interpolated, indented, command, .. } => {
346                let type_str = if *command {
347                    "heredoc_command"
348                } else if *indented {
349                    if *interpolated { "heredoc_indented_interpolated" } else { "heredoc_indented" }
350                } else if *interpolated {
351                    "heredoc_interpolated"
352                } else {
353                    "heredoc"
354                };
355                format!("({} {:?} {:?})", type_str, delimiter, content)
356            }
357
358            NodeKind::ArrayLiteral { elements } => {
359                let elems = elements.iter().map(|e| e.to_sexp()).collect::<Vec<_>>().join(" ");
360                format!("(array {})", elems)
361            }
362
363            NodeKind::HashLiteral { pairs } => {
364                let kvs = pairs
365                    .iter()
366                    .map(|(k, v)| format!("({} {})", k.to_sexp(), v.to_sexp()))
367                    .collect::<Vec<_>>()
368                    .join(" ");
369                format!("(hash {})", kvs)
370            }
371
372            NodeKind::Block { statements } => {
373                let stmts = statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ");
374                format!("(block {})", stmts)
375            }
376
377            NodeKind::Eval { block } => {
378                format!("(eval {})", block.to_sexp())
379            }
380
381            NodeKind::Do { block } => {
382                format!("(do {})", block.to_sexp())
383            }
384
385            NodeKind::Defer { block } => {
386                format!("(defer {})", block.to_sexp())
387            }
388
389            NodeKind::Try { body, catch_blocks, finally_block } => {
390                let mut parts = vec![format!("(try {})", body.to_sexp())];
391
392                for (var, block) in catch_blocks {
393                    if let Some(v) = var {
394                        parts.push(format!("(catch {} {})", v, block.to_sexp()));
395                    } else {
396                        parts.push(format!("(catch {})", block.to_sexp()));
397                    }
398                }
399
400                if let Some(finally) = finally_block {
401                    parts.push(format!("(finally {})", finally.to_sexp()));
402                }
403
404                parts.join(" ")
405            }
406
407            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
408                let mut parts =
409                    vec![format!("(if {} {})", condition.to_sexp(), then_branch.to_sexp())];
410
411                for (cond, block) in elsif_branches {
412                    parts.push(format!("(elsif {} {})", cond.to_sexp(), block.to_sexp()));
413                }
414
415                if let Some(else_block) = else_branch {
416                    parts.push(format!("(else {})", else_block.to_sexp()));
417                }
418
419                parts.join(" ")
420            }
421
422            NodeKind::LabeledStatement { label, statement } => {
423                format!("(labeled_statement {} {})", label, statement.to_sexp())
424            }
425
426            NodeKind::While { condition, body, continue_block } => {
427                let mut s = format!("(while {} {})", condition.to_sexp(), body.to_sexp());
428                if let Some(cont) = continue_block {
429                    s.push_str(&format!(" (continue {})", cont.to_sexp()));
430                }
431                s
432            }
433            NodeKind::Tie { variable, package, args } => {
434                let mut s = format!("(tie {} {}", variable.to_sexp(), package.to_sexp());
435                for arg in args {
436                    s.push_str(&format!(" {}", arg.to_sexp()));
437                }
438                s.push(')');
439                s
440            }
441            NodeKind::Untie { variable } => {
442                format!("(untie {})", variable.to_sexp())
443            }
444            NodeKind::For { init, condition, update, body, continue_block } => {
445                let init_str =
446                    init.as_ref().map(|i| i.to_sexp()).unwrap_or_else(|| "()".to_string());
447                let cond_str =
448                    condition.as_ref().map(|c| c.to_sexp()).unwrap_or_else(|| "()".to_string());
449                let update_str =
450                    update.as_ref().map(|u| u.to_sexp()).unwrap_or_else(|| "()".to_string());
451                let mut result =
452                    format!("(for {} {} {} {})", init_str, cond_str, update_str, body.to_sexp());
453                if let Some(cont) = continue_block {
454                    result.push_str(&format!(" (continue {})", cont.to_sexp()));
455                }
456                result
457            }
458
459            NodeKind::Foreach { variable, list, body, continue_block } => {
460                let cont = if let Some(cb) = continue_block {
461                    format!(" {}", cb.to_sexp())
462                } else {
463                    String::new()
464                };
465                format!(
466                    "(foreach {} {} {}{})",
467                    variable.to_sexp(),
468                    list.to_sexp(),
469                    body.to_sexp(),
470                    cont
471                )
472            }
473
474            NodeKind::Given { expr, body } => {
475                format!("(given {} {})", expr.to_sexp(), body.to_sexp())
476            }
477
478            NodeKind::When { condition, body } => {
479                format!("(when {} {})", condition.to_sexp(), body.to_sexp())
480            }
481
482            NodeKind::Default { body } => {
483                format!("(default {})", body.to_sexp())
484            }
485
486            NodeKind::StatementModifier { statement, modifier, condition } => {
487                format!(
488                    "(statement_modifier_{} {} {})",
489                    modifier,
490                    statement.to_sexp(),
491                    condition.to_sexp()
492                )
493            }
494
495            NodeKind::Subroutine { name, prototype, signature, attributes, body, name_span: _ } => {
496                if let Some(sub_name) = name {
497                    // Named subroutine - bless test expected format: (sub name () block)
498                    let mut parts = vec![sub_name.clone()];
499
500                    // Add attributes if present (before prototype/signature)
501                    if !attributes.is_empty() {
502                        for attr in attributes {
503                            parts.push(format!(":{}", attr));
504                        }
505                    }
506
507                    // Add prototype/signature - use () for empty prototype
508                    if let Some(proto) = prototype {
509                        parts.push(format!("({})", proto.to_sexp()));
510                    } else if signature.is_some() {
511                        // If there's a signature but no prototype, still show ()
512                        parts.push("()".to_string());
513                    } else {
514                        parts.push("()".to_string());
515                    }
516
517                    // Add body
518                    parts.push(body.to_sexp());
519
520                    // Format: (sub name [attrs...] ()(block ...)) - space between name and (), no space between () and block
521                    if parts.len() >= 3 && parts[parts.len() - 2] == "()" {
522                        let name_and_attrs = parts[0..parts.len() - 2].join(" ");
523                        let proto = &parts[parts.len() - 2];
524                        let body = &parts[parts.len() - 1];
525                        format!("(sub {} {}{})", name_and_attrs, proto, body)
526                    } else {
527                        format!("(sub {})", parts.join(" "))
528                    }
529                } else {
530                    // Anonymous subroutine - tree-sitter format
531                    let mut parts = Vec::new();
532
533                    // Add attributes if present
534                    if !attributes.is_empty() {
535                        let attrs: Vec<String> = attributes
536                            .iter()
537                            .map(|_attr| "(attribute (attribute_name))".to_string())
538                            .collect();
539                        parts.push(format!("(attrlist {})", attrs.join("")));
540                    }
541
542                    // Add prototype if present
543                    if let Some(proto) = prototype {
544                        parts.push(proto.to_sexp());
545                    }
546
547                    // Add signature if present
548                    if let Some(sig) = signature {
549                        parts.push(sig.to_sexp());
550                    }
551
552                    // Add body
553                    parts.push(body.to_sexp());
554
555                    format!("(anonymous_subroutine_expression {})", parts.join(""))
556                }
557            }
558
559            NodeKind::Prototype { content: _ } => "(prototype)".to_string(),
560
561            NodeKind::Signature { parameters } => {
562                let params = parameters.iter().map(|p| p.to_sexp()).collect::<Vec<_>>().join(" ");
563                format!("(signature {})", params)
564            }
565
566            NodeKind::MandatoryParameter { variable } => {
567                format!("(mandatory_parameter {})", variable.to_sexp())
568            }
569
570            NodeKind::OptionalParameter { variable, default_value } => {
571                format!("(optional_parameter {} {})", variable.to_sexp(), default_value.to_sexp())
572            }
573
574            NodeKind::SlurpyParameter { variable } => {
575                format!("(slurpy_parameter {})", variable.to_sexp())
576            }
577
578            NodeKind::NamedParameter { variable } => {
579                format!("(named_parameter {})", variable.to_sexp())
580            }
581
582            NodeKind::Method { name: _, signature, attributes, body } => {
583                let block_contents = match &body.kind {
584                    NodeKind::Block { statements } => {
585                        statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ")
586                    }
587                    _ => body.to_sexp(),
588                };
589
590                let mut parts = vec!["(bareword)".to_string()];
591
592                // Add signature if present
593                if let Some(sig) = signature {
594                    parts.push(sig.to_sexp());
595                }
596
597                // Add attributes if present
598                if !attributes.is_empty() {
599                    let attrs: Vec<String> = attributes
600                        .iter()
601                        .map(|_attr| "(attribute (attribute_name))".to_string())
602                        .collect();
603                    parts.push(format!("(attrlist {})", attrs.join("")));
604                }
605
606                parts.push(format!("(block {})", block_contents));
607                format!("(method_declaration_statement {})", parts.join(" "))
608            }
609
610            NodeKind::Return { value } => {
611                if let Some(val) = value {
612                    format!("(return {})", val.to_sexp())
613                } else {
614                    "(return)".to_string()
615                }
616            }
617
618            NodeKind::LoopControl { op, label } => {
619                if let Some(l) = label {
620                    format!("({} {})", op, l)
621                } else {
622                    format!("({})", op)
623                }
624            }
625
626            NodeKind::Goto { target } => {
627                format!("(goto {})", target.to_sexp())
628            }
629
630            NodeKind::MethodCall { object, method, args } => {
631                let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
632                format!("(method_call {} {} ({}))", object.to_sexp(), method, args_str)
633            }
634
635            NodeKind::FunctionCall { name, args } => {
636                // Special handling for functions that should use call format in tree-sitter tests
637                if matches!(
638                    name.as_str(),
639                    "bless"
640                        | "shift"
641                        | "unshift"
642                        | "open"
643                        | "die"
644                        | "warn"
645                        | "print"
646                        | "printf"
647                        | "say"
648                        | "push"
649                        | "pop"
650                        | "map"
651                        | "sort"
652                        | "grep"
653                        | "keys"
654                        | "values"
655                        | "each"
656                        | "defined"
657                        | "scalar"
658                        | "ref"
659                ) {
660                    let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
661                    if args.is_empty() {
662                        format!("(call {} ())", name)
663                    } else {
664                        format!("(call {} ({}))", name, args_str)
665                    }
666                } else {
667                    // Tree-sitter format varies by context
668                    let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
669                    if args.is_empty() {
670                        "(function_call_expression (function))".to_string()
671                    } else {
672                        format!("(ambiguous_function_call_expression (function) {})", args_str)
673                    }
674                }
675            }
676
677            NodeKind::IndirectCall { method, object, args } => {
678                let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
679                format!("(indirect_call {} {} ({}))", method, object.to_sexp(), args_str)
680            }
681
682            NodeKind::Regex { pattern, replacement, modifiers, has_embedded_code } => {
683                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
684                format!("(regex {:?} {:?} {:?}{})", pattern, replacement, modifiers, risk_marker)
685            }
686
687            NodeKind::Match { expr, pattern, modifiers, has_embedded_code, negated } => {
688                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
689                let op = if *negated { "not_match" } else { "match" };
690                format!(
691                    "({} {} (regex {:?} {:?}{}))",
692                    op,
693                    expr.to_sexp(),
694                    pattern,
695                    modifiers,
696                    risk_marker
697                )
698            }
699
700            NodeKind::Substitution {
701                expr,
702                pattern,
703                replacement,
704                modifiers,
705                has_embedded_code,
706                negated,
707            } => {
708                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
709                let neg_marker = if *negated { " (negated)" } else { "" };
710                format!(
711                    "(substitution {} {:?} {:?} {:?}{}{})",
712                    expr.to_sexp(),
713                    pattern,
714                    replacement,
715                    modifiers,
716                    risk_marker,
717                    neg_marker
718                )
719            }
720
721            NodeKind::Transliteration { expr, search, replace, modifiers, negated } => {
722                let neg_marker = if *negated { " (negated)" } else { "" };
723                format!(
724                    "(transliteration {} {:?} {:?} {:?}{})",
725                    expr.to_sexp(),
726                    search,
727                    replace,
728                    modifiers,
729                    neg_marker
730                )
731            }
732
733            NodeKind::Package { name, block, name_span: _ } => {
734                if let Some(blk) = block {
735                    format!("(package {} {})", name, blk.to_sexp())
736                } else {
737                    format!("(package {})", name)
738                }
739            }
740
741            NodeKind::Use { module, args, has_filter_risk } => {
742                let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
743                if args.is_empty() {
744                    format!("(use {}{})", module, risk_marker)
745                } else {
746                    let args_str = args.join(" ");
747                    format!("(use {} ({}){})", module, args_str, risk_marker)
748                }
749            }
750
751            NodeKind::No { module, args, has_filter_risk } => {
752                let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
753                if args.is_empty() {
754                    format!("(no {}{})", module, risk_marker)
755                } else {
756                    let args_str = args.join(" ");
757                    format!("(no {} ({}){})", module, args_str, risk_marker)
758                }
759            }
760
761            NodeKind::PhaseBlock { phase, phase_span: _, block } => {
762                format!("({} {})", phase, block.to_sexp())
763            }
764
765            NodeKind::DataSection { marker, body } => {
766                if let Some(body_text) = body {
767                    format!("(data_section {} \"{}\")", marker, body_text.escape_default())
768                } else {
769                    format!("(data_section {})", marker)
770                }
771            }
772
773            NodeKind::Class { name, parents, body } => {
774                if parents.is_empty() {
775                    format!("(class {} {})", name, body.to_sexp())
776                } else {
777                    format!("(class {} :isa({}) {})", name, parents.join(","), body.to_sexp())
778                }
779            }
780
781            NodeKind::Format { name, body } => {
782                format!("(format {} {:?})", name, body)
783            }
784
785            NodeKind::Identifier { name } => {
786                // Format expected by tests: (identifier name)
787                format!("(identifier {})", name)
788            }
789
790            NodeKind::Error { message, partial, .. } => {
791                if let Some(node) = partial {
792                    format!("(ERROR \"{}\" {})", message.escape_default(), node.to_sexp())
793                } else {
794                    format!("(ERROR \"{}\")", message.escape_default())
795                }
796            }
797            NodeKind::MissingExpression => "(missing_expression)".to_string(),
798            NodeKind::MissingStatement => "(missing_statement)".to_string(),
799            NodeKind::MissingIdentifier => "(missing_identifier)".to_string(),
800            NodeKind::MissingBlock => "(missing_block)".to_string(),
801            NodeKind::UnknownRest => "(UNKNOWN_REST)".to_string(),
802        }
803    }
804
805    /// Convert the AST to S-expression format that unwraps expression statements in programs
806    pub fn to_sexp_inner(&self) -> String {
807        match &self.kind {
808            NodeKind::ExpressionStatement { expression } => {
809                // Check if this is an anonymous subroutine - if so, keep it wrapped
810                match &expression.kind {
811                    NodeKind::Subroutine { name, .. } if name.is_none() => {
812                        // Anonymous subroutine should remain wrapped in expression statement
813                        self.to_sexp()
814                    }
815                    _ => {
816                        // In the inner format, other expression statements are unwrapped
817                        expression.to_sexp()
818                    }
819                }
820            }
821            _ => {
822                // For all other node types, use regular to_sexp
823                self.to_sexp()
824            }
825        }
826    }
827
828    /// Call a function on every direct child node of this node.
829    ///
830    /// This enables depth-first traversal for operations like heredoc content attachment.
831    /// The closure receives a mutable reference to each child node.
832    #[inline]
833    pub fn for_each_child_mut<F: FnMut(&mut Node)>(&mut self, mut f: F) {
834        match &mut self.kind {
835            NodeKind::Tie { variable, package, args } => {
836                f(variable);
837                f(package);
838                for arg in args {
839                    f(arg);
840                }
841            }
842            NodeKind::Untie { variable } => f(variable),
843
844            // Root program node
845            NodeKind::Program { statements } => {
846                for stmt in statements {
847                    f(stmt);
848                }
849            }
850
851            // Statement wrappers
852            NodeKind::ExpressionStatement { expression } => f(expression),
853
854            // Variable declarations
855            NodeKind::VariableDeclaration { variable, initializer, .. } => {
856                f(variable);
857                if let Some(init) = initializer {
858                    f(init);
859                }
860            }
861            NodeKind::VariableListDeclaration { variables, initializer, .. } => {
862                for var in variables {
863                    f(var);
864                }
865                if let Some(init) = initializer {
866                    f(init);
867                }
868            }
869            NodeKind::VariableWithAttributes { variable, .. } => f(variable),
870
871            // Binary operations
872            NodeKind::Binary { left, right, .. } => {
873                f(left);
874                f(right);
875            }
876            NodeKind::Ternary { condition, then_expr, else_expr } => {
877                f(condition);
878                f(then_expr);
879                f(else_expr);
880            }
881            NodeKind::Unary { operand, .. } => f(operand),
882            NodeKind::Assignment { lhs, rhs, .. } => {
883                f(lhs);
884                f(rhs);
885            }
886
887            // Control flow
888            NodeKind::Block { statements } => {
889                for stmt in statements {
890                    f(stmt);
891                }
892            }
893            NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
894                f(condition);
895                f(then_branch);
896                for (elsif_cond, elsif_body) in elsif_branches {
897                    f(elsif_cond);
898                    f(elsif_body);
899                }
900                if let Some(else_body) = else_branch {
901                    f(else_body);
902                }
903            }
904            NodeKind::While { condition, body, continue_block, .. } => {
905                f(condition);
906                f(body);
907                if let Some(cont) = continue_block {
908                    f(cont);
909                }
910            }
911            NodeKind::For { init, condition, update, body, continue_block, .. } => {
912                if let Some(i) = init {
913                    f(i);
914                }
915                if let Some(c) = condition {
916                    f(c);
917                }
918                if let Some(u) = update {
919                    f(u);
920                }
921                f(body);
922                if let Some(cont) = continue_block {
923                    f(cont);
924                }
925            }
926            NodeKind::Foreach { variable, list, body, continue_block } => {
927                f(variable);
928                f(list);
929                f(body);
930                if let Some(cb) = continue_block {
931                    f(cb);
932                }
933            }
934            NodeKind::Given { expr, body } => {
935                f(expr);
936                f(body);
937            }
938            NodeKind::When { condition, body } => {
939                f(condition);
940                f(body);
941            }
942            NodeKind::Default { body } => f(body),
943            NodeKind::StatementModifier { statement, condition, .. } => {
944                f(statement);
945                f(condition);
946            }
947            NodeKind::LabeledStatement { statement, .. } => f(statement),
948
949            // Eval and Do blocks
950            NodeKind::Eval { block } => f(block),
951            NodeKind::Do { block } => f(block),
952            NodeKind::Defer { block } => f(block),
953            NodeKind::Try { body, catch_blocks, finally_block } => {
954                f(body);
955                for (_, catch_body) in catch_blocks {
956                    f(catch_body);
957                }
958                if let Some(finally) = finally_block {
959                    f(finally);
960                }
961            }
962
963            // Function calls
964            NodeKind::FunctionCall { args, .. } => {
965                for arg in args {
966                    f(arg);
967                }
968            }
969            NodeKind::MethodCall { object, args, .. } => {
970                f(object);
971                for arg in args {
972                    f(arg);
973                }
974            }
975            NodeKind::IndirectCall { object, args, .. } => {
976                f(object);
977                for arg in args {
978                    f(arg);
979                }
980            }
981
982            // Functions
983            NodeKind::Subroutine { prototype, signature, body, .. } => {
984                if let Some(proto) = prototype {
985                    f(proto);
986                }
987                if let Some(sig) = signature {
988                    f(sig);
989                }
990                f(body);
991            }
992            NodeKind::Method { signature, body, .. } => {
993                if let Some(sig) = signature {
994                    f(sig);
995                }
996                f(body);
997            }
998            NodeKind::Return { value } => {
999                if let Some(v) = value {
1000                    f(v);
1001                }
1002            }
1003            NodeKind::Goto { target } => f(target),
1004            NodeKind::Signature { parameters } => {
1005                for param in parameters {
1006                    f(param);
1007                }
1008            }
1009            NodeKind::MandatoryParameter { variable } => f(variable),
1010            NodeKind::OptionalParameter { variable, default_value } => {
1011                f(variable);
1012                f(default_value);
1013            }
1014            NodeKind::SlurpyParameter { variable } => f(variable),
1015            NodeKind::NamedParameter { variable } => f(variable),
1016
1017            // Pattern matching
1018            NodeKind::Match { expr, .. } => f(expr),
1019            NodeKind::Substitution { expr, .. } => f(expr),
1020            NodeKind::Transliteration { expr, .. } => f(expr),
1021
1022            // Containers
1023            NodeKind::ArrayLiteral { elements } => {
1024                for elem in elements {
1025                    f(elem);
1026                }
1027            }
1028            NodeKind::HashLiteral { pairs } => {
1029                for (key, value) in pairs {
1030                    f(key);
1031                    f(value);
1032                }
1033            }
1034
1035            // Package system
1036            NodeKind::Package { block, .. } => {
1037                if let Some(b) = block {
1038                    f(b);
1039                }
1040            }
1041            NodeKind::PhaseBlock { block, .. } => f(block),
1042            NodeKind::Class { body, .. } => f(body),
1043
1044            // Error node might have a partial valid tree
1045            NodeKind::Error { partial, .. } => {
1046                if let Some(node) = partial {
1047                    f(node);
1048                }
1049            }
1050
1051            // Leaf nodes (no children to traverse)
1052            NodeKind::Variable { .. }
1053            | NodeKind::Identifier { .. }
1054            | NodeKind::Number { .. }
1055            | NodeKind::String { .. }
1056            | NodeKind::Heredoc { .. }
1057            | NodeKind::Regex { .. }
1058            | NodeKind::Readline { .. }
1059            | NodeKind::Glob { .. }
1060            | NodeKind::Typeglob { .. }
1061            | NodeKind::Diamond
1062            | NodeKind::Ellipsis
1063            | NodeKind::Undef
1064            | NodeKind::Use { .. }
1065            | NodeKind::No { .. }
1066            | NodeKind::Prototype { .. }
1067            | NodeKind::DataSection { .. }
1068            | NodeKind::Format { .. }
1069            | NodeKind::LoopControl { .. }
1070            | NodeKind::MissingExpression
1071            | NodeKind::MissingStatement
1072            | NodeKind::MissingIdentifier
1073            | NodeKind::MissingBlock
1074            | NodeKind::UnknownRest => {}
1075        }
1076    }
1077
1078    /// Call a function on every direct child node of this node (immutable version).
1079    ///
1080    /// This enables depth-first traversal for read-only operations like AST analysis.
1081    /// The closure receives an immutable reference to each child node.
1082    #[inline]
1083    pub fn for_each_child<'a, F: FnMut(&'a Node)>(&'a self, mut f: F) {
1084        match &self.kind {
1085            NodeKind::Tie { variable, package, args } => {
1086                f(variable);
1087                f(package);
1088                for arg in args {
1089                    f(arg);
1090                }
1091            }
1092            NodeKind::Untie { variable } => f(variable),
1093
1094            // Root program node
1095            NodeKind::Program { statements } => {
1096                for stmt in statements {
1097                    f(stmt);
1098                }
1099            }
1100
1101            // Statement wrappers
1102            NodeKind::ExpressionStatement { expression } => f(expression),
1103
1104            // Variable declarations
1105            NodeKind::VariableDeclaration { variable, initializer, .. } => {
1106                f(variable);
1107                if let Some(init) = initializer {
1108                    f(init);
1109                }
1110            }
1111            NodeKind::VariableListDeclaration { variables, initializer, .. } => {
1112                for var in variables {
1113                    f(var);
1114                }
1115                if let Some(init) = initializer {
1116                    f(init);
1117                }
1118            }
1119            NodeKind::VariableWithAttributes { variable, .. } => f(variable),
1120
1121            // Binary operations
1122            NodeKind::Binary { left, right, .. } => {
1123                f(left);
1124                f(right);
1125            }
1126            NodeKind::Ternary { condition, then_expr, else_expr } => {
1127                f(condition);
1128                f(then_expr);
1129                f(else_expr);
1130            }
1131            NodeKind::Unary { operand, .. } => f(operand),
1132            NodeKind::Assignment { lhs, rhs, .. } => {
1133                f(lhs);
1134                f(rhs);
1135            }
1136
1137            // Control flow
1138            NodeKind::Block { statements } => {
1139                for stmt in statements {
1140                    f(stmt);
1141                }
1142            }
1143            NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
1144                f(condition);
1145                f(then_branch);
1146                for (elsif_cond, elsif_body) in elsif_branches {
1147                    f(elsif_cond);
1148                    f(elsif_body);
1149                }
1150                if let Some(else_body) = else_branch {
1151                    f(else_body);
1152                }
1153            }
1154            NodeKind::While { condition, body, continue_block, .. } => {
1155                f(condition);
1156                f(body);
1157                if let Some(cont) = continue_block {
1158                    f(cont);
1159                }
1160            }
1161            NodeKind::For { init, condition, update, body, continue_block, .. } => {
1162                if let Some(i) = init {
1163                    f(i);
1164                }
1165                if let Some(c) = condition {
1166                    f(c);
1167                }
1168                if let Some(u) = update {
1169                    f(u);
1170                }
1171                f(body);
1172                if let Some(cont) = continue_block {
1173                    f(cont);
1174                }
1175            }
1176            NodeKind::Foreach { variable, list, body, continue_block } => {
1177                f(variable);
1178                f(list);
1179                f(body);
1180                if let Some(cb) = continue_block {
1181                    f(cb);
1182                }
1183            }
1184            NodeKind::Given { expr, body } => {
1185                f(expr);
1186                f(body);
1187            }
1188            NodeKind::When { condition, body } => {
1189                f(condition);
1190                f(body);
1191            }
1192            NodeKind::Default { body } => f(body),
1193            NodeKind::StatementModifier { statement, condition, .. } => {
1194                f(statement);
1195                f(condition);
1196            }
1197            NodeKind::LabeledStatement { statement, .. } => f(statement),
1198
1199            // Eval and Do blocks
1200            NodeKind::Eval { block } => f(block),
1201            NodeKind::Do { block } => f(block),
1202            NodeKind::Defer { block } => f(block),
1203            NodeKind::Try { body, catch_blocks, finally_block } => {
1204                f(body);
1205                for (_, catch_body) in catch_blocks {
1206                    f(catch_body);
1207                }
1208                if let Some(finally) = finally_block {
1209                    f(finally);
1210                }
1211            }
1212
1213            // Function calls
1214            NodeKind::FunctionCall { args, .. } => {
1215                for arg in args {
1216                    f(arg);
1217                }
1218            }
1219            NodeKind::MethodCall { object, args, .. } => {
1220                f(object);
1221                for arg in args {
1222                    f(arg);
1223                }
1224            }
1225            NodeKind::IndirectCall { object, args, .. } => {
1226                f(object);
1227                for arg in args {
1228                    f(arg);
1229                }
1230            }
1231
1232            // Functions
1233            NodeKind::Subroutine { prototype, signature, body, .. } => {
1234                if let Some(proto) = prototype {
1235                    f(proto);
1236                }
1237                if let Some(sig) = signature {
1238                    f(sig);
1239                }
1240                f(body);
1241            }
1242            NodeKind::Method { signature, body, .. } => {
1243                if let Some(sig) = signature {
1244                    f(sig);
1245                }
1246                f(body);
1247            }
1248            NodeKind::Return { value } => {
1249                if let Some(v) = value {
1250                    f(v);
1251                }
1252            }
1253            NodeKind::Goto { target } => f(target),
1254            NodeKind::Signature { parameters } => {
1255                for param in parameters {
1256                    f(param);
1257                }
1258            }
1259            NodeKind::MandatoryParameter { variable } => f(variable),
1260            NodeKind::OptionalParameter { variable, default_value } => {
1261                f(variable);
1262                f(default_value);
1263            }
1264            NodeKind::SlurpyParameter { variable } => f(variable),
1265            NodeKind::NamedParameter { variable } => f(variable),
1266
1267            // Pattern matching
1268            NodeKind::Match { expr, .. } => f(expr),
1269            NodeKind::Substitution { expr, .. } => f(expr),
1270            NodeKind::Transliteration { expr, .. } => f(expr),
1271
1272            // Containers
1273            NodeKind::ArrayLiteral { elements } => {
1274                for elem in elements {
1275                    f(elem);
1276                }
1277            }
1278            NodeKind::HashLiteral { pairs } => {
1279                for (key, value) in pairs {
1280                    f(key);
1281                    f(value);
1282                }
1283            }
1284
1285            // Package system
1286            NodeKind::Package { block, .. } => {
1287                if let Some(b) = block {
1288                    f(b);
1289                }
1290            }
1291            NodeKind::PhaseBlock { block, .. } => f(block),
1292            NodeKind::Class { body, .. } => f(body),
1293
1294            // Error node might have a partial valid tree
1295            NodeKind::Error { partial, .. } => {
1296                if let Some(node) = partial {
1297                    f(node);
1298                }
1299            }
1300
1301            // Leaf nodes (no children to traverse)
1302            NodeKind::Variable { .. }
1303            | NodeKind::Identifier { .. }
1304            | NodeKind::Number { .. }
1305            | NodeKind::String { .. }
1306            | NodeKind::Heredoc { .. }
1307            | NodeKind::Regex { .. }
1308            | NodeKind::Readline { .. }
1309            | NodeKind::Glob { .. }
1310            | NodeKind::Typeglob { .. }
1311            | NodeKind::Diamond
1312            | NodeKind::Ellipsis
1313            | NodeKind::Undef
1314            | NodeKind::Use { .. }
1315            | NodeKind::No { .. }
1316            | NodeKind::Prototype { .. }
1317            | NodeKind::DataSection { .. }
1318            | NodeKind::Format { .. }
1319            | NodeKind::LoopControl { .. }
1320            | NodeKind::MissingExpression
1321            | NodeKind::MissingStatement
1322            | NodeKind::MissingIdentifier
1323            | NodeKind::MissingBlock
1324            | NodeKind::UnknownRest => {}
1325        }
1326    }
1327
1328    /// Count the total number of nodes in this subtree (inclusive).
1329    ///
1330    /// # Examples
1331    ///
1332    /// ```
1333    /// use perl_ast::{Node, NodeKind, SourceLocation};
1334    ///
1335    /// let loc = SourceLocation { start: 0, end: 1 };
1336    /// let leaf = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1337    /// assert_eq!(leaf.count_nodes(), 1);
1338    ///
1339    /// let program = Node::new(
1340    ///     NodeKind::Program { statements: vec![leaf] },
1341    ///     loc,
1342    /// );
1343    /// assert_eq!(program.count_nodes(), 2);
1344    /// ```
1345    pub fn count_nodes(&self) -> usize {
1346        let mut count = 1;
1347        self.for_each_child(|child| {
1348            count += child.count_nodes();
1349        });
1350        count
1351    }
1352
1353    /// Collect direct child nodes into a vector for convenience APIs.
1354    ///
1355    /// # Examples
1356    ///
1357    /// ```
1358    /// use perl_ast::{Node, NodeKind, SourceLocation};
1359    ///
1360    /// let loc = SourceLocation { start: 0, end: 1 };
1361    /// let stmt = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1362    /// let program = Node::new(
1363    ///     NodeKind::Program { statements: vec![stmt] },
1364    ///     loc,
1365    /// );
1366    /// assert_eq!(program.children().len(), 1);
1367    /// ```
1368    #[inline]
1369    pub fn children(&self) -> Vec<&Node> {
1370        let mut children = Vec::new();
1371        self.for_each_child(|child| children.push(child));
1372        children
1373    }
1374
1375    /// Count direct child nodes without allocating an intermediate vector.
1376    ///
1377    /// This is more efficient than `children().len()` when callers only need
1378    /// cardinality.
1379    #[inline]
1380    pub fn child_count(&self) -> usize {
1381        let mut count = 0;
1382        self.for_each_child(|_| count += 1);
1383        count
1384    }
1385
1386    /// Get the first direct child node, if any.
1387    ///
1388    /// Optimized to avoid allocating the children vector.
1389    #[inline]
1390    pub fn first_child(&self) -> Option<&Node> {
1391        let mut result = None;
1392        self.for_each_child(|child| {
1393            if result.is_none() {
1394                result = Some(child);
1395            }
1396        });
1397        result
1398    }
1399
1400    /// Returns `true` when this node's source span contains `offset`.
1401    ///
1402    /// The start position is inclusive and the end position is exclusive.
1403    #[inline]
1404    pub fn contains_offset(&self, offset: usize) -> bool {
1405        self.location.start <= offset && offset < self.location.end
1406    }
1407
1408    /// Returns the byte length of this node's source span.
1409    ///
1410    /// Uses saturating subtraction so malformed spans never underflow.
1411    #[inline]
1412    pub fn span_len(&self) -> usize {
1413        self.location.end.saturating_sub(self.location.start)
1414    }
1415
1416    /// Get the last direct child node, if any.
1417    ///
1418    /// Optimized to avoid allocating the children vector.
1419    ///
1420    /// # Examples
1421    ///
1422    /// ```
1423    /// use perl_ast::{Node, NodeKind, SourceLocation};
1424    ///
1425    /// let loc = SourceLocation { start: 0, end: 1 };
1426    /// let first = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1427    /// let second = Node::new(NodeKind::Number { value: "2".to_string() }, loc);
1428    /// let program = Node::new(
1429    ///     NodeKind::Program { statements: vec![first, second] },
1430    ///     loc,
1431    /// );
1432    ///
1433    /// assert_eq!(program.last_child().map(|n| n.kind.kind_name()), Some("Number"));
1434    /// assert_eq!(Node::new(NodeKind::Block { statements: vec![] }, loc).last_child(), None);
1435    /// ```
1436    #[inline]
1437    pub fn last_child(&self) -> Option<&Node> {
1438        let mut result = None;
1439        self.for_each_child(|child| {
1440            result = Some(child);
1441        });
1442        result
1443    }
1444}
1445
1446/// Comprehensive enumeration of all Perl language constructs supported by the parser.
1447///
1448/// This enum represents every possible AST node type that can be parsed from Perl code
1449/// during the Parse → Index → Navigate → Complete → Analyze workflow. Each variant captures
1450/// the semantic meaning and structural relationships needed for complete script analysis
1451/// and transformation.
1452///
1453/// # LSP Workflow Integration
1454///
1455/// Node kinds are processed differently across workflow stages:
1456/// - **Parse**: All variants are produced by the parser
1457/// - **Index**: Symbol-bearing variants feed workspace indexing
1458/// - **Navigate**: Call and reference variants support navigation features
1459/// - **Complete**: Expression variants provide completion context
1460/// - **Analyze**: Semantic variants drive diagnostics and refactoring
1461///
1462/// # Examples
1463///
1464/// Pattern-match on node kinds to extract semantic information:
1465///
1466/// ```
1467/// use perl_ast::{Node, NodeKind, SourceLocation};
1468///
1469/// let loc = SourceLocation { start: 0, end: 5 };
1470/// let node = Node::new(
1471///     NodeKind::Variable { sigil: "$".to_string(), name: "foo".to_string() },
1472///     loc,
1473/// );
1474///
1475/// assert!(matches!(
1476///     &node.kind,
1477///     NodeKind::Variable { sigil, name } if sigil == "$" && name == "foo"
1478/// ));
1479/// ```
1480///
1481/// Use [`kind_name()`](NodeKind::kind_name) for debugging and diagnostics:
1482///
1483/// ```
1484/// use perl_ast::NodeKind;
1485///
1486/// let kind = NodeKind::Number { value: "99".to_string() };
1487/// assert_eq!(kind.kind_name(), "Number");
1488///
1489/// let kind = NodeKind::Variable { sigil: "@".to_string(), name: "list".to_string() };
1490/// assert_eq!(kind.kind_name(), "Variable");
1491/// ```
1492///
1493/// # Performance Considerations
1494///
1495/// The enum design optimizes for large codebases:
1496/// - Box pointers minimize stack usage for recursive structures
1497/// - Vector storage enables efficient bulk operations on child nodes
1498/// - Clone operations optimized for concurrent analysis workflows
1499/// - Pattern matching performance tuned for common Perl constructs
1500#[derive(Debug, Clone, PartialEq)]
1501pub enum NodeKind {
1502    /// Top-level program containing all statements in an Perl script
1503    ///
1504    /// This is the root node for any parsed Perl script content, containing all
1505    /// top-level statements found during the Parse stage of LSP workflow.
1506    Program {
1507        /// All top-level statements in the Perl script
1508        statements: Vec<Node>,
1509    },
1510
1511    /// Statement wrapper for expressions that appear at statement level
1512    ///
1513    /// Used during Analyze stage to distinguish between expressions used as
1514    /// statements versus expressions within other contexts during Perl parsing.
1515    ExpressionStatement {
1516        /// The expression being used as a statement
1517        expression: Box<Node>,
1518    },
1519
1520    /// Variable declaration with scope declarator in Perl script processing
1521    ///
1522    /// Represents declarations like `my $var`, `our $global`, `local $dynamic`, etc.
1523    /// Critical for Analyze stage symbol table construction during Perl parsing.
1524    VariableDeclaration {
1525        /// Scope declarator: "my", "our", "local", "state"
1526        declarator: String,
1527        /// The variable being declared
1528        variable: Box<Node>,
1529        /// Variable attributes (e.g., ":shared", ":locked")
1530        attributes: Vec<String>,
1531        /// Optional initializer expression
1532        initializer: Option<Box<Node>>,
1533    },
1534
1535    /// Multiple variable declaration in a single statement
1536    ///
1537    /// Handles constructs like `my ($x, $y) = @values` common in Perl script processing.
1538    /// Supports efficient bulk variable analysis during Navigate stage operations.
1539    VariableListDeclaration {
1540        /// Scope declarator for all variables in the list
1541        declarator: String,
1542        /// All variables being declared in the list
1543        variables: Vec<Node>,
1544        /// Attributes applied to the variable list
1545        attributes: Vec<String>,
1546        /// Optional initializer for the entire variable list
1547        initializer: Option<Box<Node>>,
1548    },
1549
1550    /// Perl variable reference (scalar, array, hash, etc.) in Perl parsing workflow
1551    Variable {
1552        /// Variable sigil indicating type: $, @, %, &, *
1553        sigil: String, // $, @, %, &, *
1554        /// Variable name without sigil
1555        name: String,
1556    },
1557
1558    /// Variable with additional attributes for enhanced LSP workflow
1559    VariableWithAttributes {
1560        /// The base variable node
1561        variable: Box<Node>,
1562        /// List of attribute names applied to the variable
1563        attributes: Vec<String>,
1564    },
1565
1566    /// Assignment operation for LSP data processing workflows
1567    Assignment {
1568        /// Left-hand side of assignment
1569        lhs: Box<Node>,
1570        /// Right-hand side of assignment
1571        rhs: Box<Node>,
1572        /// Assignment operator: =, +=, -=, etc.
1573        op: String, // =, +=, -=, etc.
1574    },
1575
1576    // Expressions
1577    /// Binary operation for Perl parsing workflow calculations
1578    Binary {
1579        /// Binary operator
1580        op: String,
1581        /// Left operand
1582        left: Box<Node>,
1583        /// Right operand
1584        right: Box<Node>,
1585    },
1586
1587    /// Ternary conditional expression for Perl parsing workflow logic
1588    Ternary {
1589        /// Condition to evaluate
1590        condition: Box<Node>,
1591        /// Expression when condition is true
1592        then_expr: Box<Node>,
1593        /// Expression when condition is false
1594        else_expr: Box<Node>,
1595    },
1596
1597    /// Unary operation for Perl parsing workflow
1598    Unary {
1599        /// Unary operator
1600        op: String,
1601        /// Operand to apply operator to
1602        operand: Box<Node>,
1603    },
1604
1605    // I/O operations
1606    /// Diamond operator for file input in Perl parsing workflow
1607    Diamond, // <>
1608
1609    /// Ellipsis operator for Perl parsing workflow
1610    Ellipsis, // ...
1611
1612    /// Undef value for Perl parsing workflow
1613    Undef, // undef
1614
1615    /// Readline operation for LSP file processing
1616    Readline {
1617        /// Optional filehandle: `<STDIN>`, `<$fh>`, etc.
1618        filehandle: Option<String>, // <STDIN>, <$fh>, etc.
1619    },
1620
1621    /// Glob pattern for LSP workspace file matching
1622    Glob {
1623        /// Pattern string for file matching
1624        pattern: String, // <*.txt>
1625    },
1626
1627    /// Typeglob expression: `*foo` or `*main::bar`
1628    ///
1629    /// Provides access to all symbol table entries for a given name.
1630    Typeglob {
1631        /// Name of the symbol (including package qualification)
1632        name: String,
1633    },
1634
1635    /// Numeric literal in Perl code (integer, float, hex, octal, binary)
1636    ///
1637    /// Represents all numeric literal forms: `42`, `3.14`, `0x1A`, `0o755`, `0b1010`.
1638    Number {
1639        /// String representation preserving original format
1640        value: String,
1641    },
1642
1643    /// String literal with optional interpolation
1644    ///
1645    /// Handles both single-quoted (`'literal'`) and double-quoted (`"$interpolated"`) strings.
1646    String {
1647        /// String content (after quote processing)
1648        value: String,
1649        /// Whether the string supports variable interpolation
1650        interpolated: bool,
1651    },
1652
1653    /// Heredoc string literal for multi-line content
1654    ///
1655    /// Supports all heredoc forms: `<<EOF`, `<<'EOF'`, `<<"EOF"`, `<<~EOF` (indented).
1656    Heredoc {
1657        /// Delimiter marking heredoc boundaries
1658        delimiter: String,
1659        /// Content between delimiters
1660        content: String,
1661        /// Whether content supports variable interpolation
1662        interpolated: bool,
1663        /// Whether leading whitespace is stripped (<<~ form)
1664        indented: bool,
1665        /// Whether this is a command execution heredoc (<<`EOF`)
1666        command: bool,
1667        /// Body span for breakpoint detection (populated by drain_pending_heredocs)
1668        body_span: Option<SourceLocation>,
1669    },
1670
1671    /// Array literal expression: `(1, 2, 3)` or `[1, 2, 3]`
1672    ArrayLiteral {
1673        /// Elements in the array
1674        elements: Vec<Node>,
1675    },
1676
1677    /// Hash literal expression: `(key => 'value')` or `{key => 'value'}`
1678    HashLiteral {
1679        /// Key-value pairs in the hash
1680        pairs: Vec<(Node, Node)>,
1681    },
1682
1683    /// Block of statements: `{ ... }`
1684    ///
1685    /// Used for control structures, subroutine bodies, and bare blocks.
1686    Block {
1687        /// Statements within the block
1688        statements: Vec<Node>,
1689    },
1690
1691    /// Eval block for exception handling: `eval { ... }`
1692    Eval {
1693        /// Block to evaluate with exception trapping
1694        block: Box<Node>,
1695    },
1696
1697    /// Do block for file inclusion or expression evaluation: `do { ... }` or `do "file"`
1698    Do {
1699        /// Block to execute or file expression
1700        block: Box<Node>,
1701    },
1702
1703    /// Defer block for deferred cleanup on scope exit (Perl 5.36+ experimental, stable in 5.40)
1704    Defer {
1705        /// Block to execute on scope exit
1706        block: Box<Node>,
1707    },
1708
1709    /// Try-catch-finally for modern exception handling (Syntax::Keyword::Try style)
1710    Try {
1711        /// Try block body
1712        body: Box<Node>,
1713        /// Catch blocks: (optional exception variable, handler block)
1714        catch_blocks: Vec<(Option<String>, Box<Node>)>,
1715        /// Optional finally block
1716        finally_block: Option<Box<Node>>,
1717    },
1718
1719    /// If-elsif-else conditional statement
1720    If {
1721        /// Condition expression
1722        condition: Box<Node>,
1723        /// Then branch block
1724        then_branch: Box<Node>,
1725        /// Elsif branches: (condition, block) pairs
1726        elsif_branches: Vec<(Box<Node>, Box<Node>)>,
1727        /// Optional else branch
1728        else_branch: Option<Box<Node>>,
1729    },
1730
1731    /// Statement with a label for loop control: `LABEL: while (...)`
1732    LabeledStatement {
1733        /// Label name (e.g., "OUTER", "LINE")
1734        label: String,
1735        /// Labeled statement (typically a loop)
1736        statement: Box<Node>,
1737    },
1738
1739    /// While loop: `while (condition) { ... }`
1740    While {
1741        /// Loop condition
1742        condition: Box<Node>,
1743        /// Loop body
1744        body: Box<Node>,
1745        /// Optional continue block
1746        continue_block: Option<Box<Node>>,
1747    },
1748
1749    /// Tie operation for binding variables to objects: `tie %hash, 'Package', @args`
1750    Tie {
1751        /// Variable being tied
1752        variable: Box<Node>,
1753        /// Class/package name to tie to
1754        package: Box<Node>,
1755        /// Arguments passed to TIE* method
1756        args: Vec<Node>,
1757    },
1758
1759    /// Untie operation for unbinding variables: `untie %hash`
1760    Untie {
1761        /// Variable being untied
1762        variable: Box<Node>,
1763    },
1764
1765    /// C-style for loop: `for (init; cond; update) { ... }`
1766    For {
1767        /// Initialization expression
1768        init: Option<Box<Node>>,
1769        /// Loop condition
1770        condition: Option<Box<Node>>,
1771        /// Update expression
1772        update: Option<Box<Node>>,
1773        /// Loop body
1774        body: Box<Node>,
1775        /// Optional continue block
1776        continue_block: Option<Box<Node>>,
1777    },
1778
1779    /// Foreach loop: `foreach my $item (@list) { ... }`
1780    Foreach {
1781        /// Iterator variable
1782        variable: Box<Node>,
1783        /// List to iterate
1784        list: Box<Node>,
1785        /// Loop body
1786        body: Box<Node>,
1787        /// Optional continue block
1788        continue_block: Option<Box<Node>>,
1789    },
1790
1791    /// Given statement for switch-like matching (Perl 5.10+)
1792    Given {
1793        /// Expression to match against
1794        expr: Box<Node>,
1795        /// Body containing when/default blocks
1796        body: Box<Node>,
1797    },
1798
1799    /// When clause in given/switch: `when ($pattern) { ... }`
1800    When {
1801        /// Pattern to match
1802        condition: Box<Node>,
1803        /// Handler block
1804        body: Box<Node>,
1805    },
1806
1807    /// Default clause in given/switch: `default { ... }`
1808    Default {
1809        /// Handler block for unmatched cases
1810        body: Box<Node>,
1811    },
1812
1813    /// Statement modifier syntax: `print "ok" if $condition`
1814    StatementModifier {
1815        /// Statement to conditionally execute
1816        statement: Box<Node>,
1817        /// Modifier keyword: if, unless, while, until, for, foreach
1818        modifier: String,
1819        /// Modifier condition
1820        condition: Box<Node>,
1821    },
1822
1823    // Functions
1824    /// Subroutine declaration (function) including name, prototype, signature and body.
1825    Subroutine {
1826        /// Name of the subroutine
1827        ///
1828        /// # Precise Navigation Support
1829        /// - Added name_span for exact LSP navigation
1830        /// - Enables precise go-to-definition and hover behavior
1831        /// - O(1) span lookup in workspace symbols
1832        ///
1833        /// ## Integration Points
1834        /// - Semantic token providers
1835        /// - Cross-reference generation
1836        /// - Symbol renaming
1837        name: Option<String>,
1838
1839        /// Source location span of the subroutine name
1840        ///
1841        /// ## Usage Notes
1842        /// - Always corresponds to the name field
1843        /// - Provides constant-time position information
1844        /// - Essential for precise editor interactions
1845        name_span: Option<SourceLocation>,
1846
1847        /// Optional prototype node (e.g. `($;@)`).
1848        prototype: Option<Box<Node>>,
1849        /// Optional signature node (Perl 5.20+ feature).
1850        signature: Option<Box<Node>>,
1851        /// Attributes attached to the subroutine (`:lvalue`, etc.).
1852        attributes: Vec<String>,
1853        /// The body block of the subroutine.
1854        body: Box<Node>,
1855    },
1856
1857    /// Subroutine prototype specification: `sub foo ($;@) { ... }`
1858    Prototype {
1859        /// Prototype string defining argument behavior
1860        content: String,
1861    },
1862
1863    /// Subroutine signature (Perl 5.20+): `sub foo ($x, $y = 0) { ... }`
1864    Signature {
1865        /// List of signature parameters
1866        parameters: Vec<Node>,
1867    },
1868
1869    /// Mandatory signature parameter: `$x` in `sub foo ($x) { }`
1870    MandatoryParameter {
1871        /// Variable being bound
1872        variable: Box<Node>,
1873    },
1874
1875    /// Optional signature parameter with default: `$y = 0` in `sub foo ($y = 0) { }`
1876    OptionalParameter {
1877        /// Variable being bound
1878        variable: Box<Node>,
1879        /// Default value expression
1880        default_value: Box<Node>,
1881    },
1882
1883    /// Slurpy parameter collecting remaining args: `@rest` or `%opts` in signature
1884    SlurpyParameter {
1885        /// Array or hash variable to receive remaining arguments
1886        variable: Box<Node>,
1887    },
1888
1889    /// Named parameter placeholder in signature (future Perl feature)
1890    NamedParameter {
1891        /// Variable for named parameter binding
1892        variable: Box<Node>,
1893    },
1894
1895    /// Method declaration (Perl 5.38+ with `use feature 'class'`)
1896    Method {
1897        /// Method name
1898        name: String,
1899        /// Optional signature
1900        signature: Option<Box<Node>>,
1901        /// Method attributes (e.g., `:lvalue`)
1902        attributes: Vec<String>,
1903        /// Method body
1904        body: Box<Node>,
1905    },
1906
1907    /// Return statement: `return;` or `return $value;`
1908    Return {
1909        /// Optional return value
1910        value: Option<Box<Node>>,
1911    },
1912
1913    /// Loop control statement: `next`, `last`, or `redo`
1914    LoopControl {
1915        /// Control keyword: "next", "last", or "redo"
1916        op: String,
1917        /// Optional label: `next LABEL`
1918        label: Option<String>,
1919    },
1920
1921    /// Goto statement: `goto LABEL`, `goto &sub`, or `goto $expr`
1922    Goto {
1923        /// The target of the goto (label identifier, sub reference, or expression)
1924        target: Box<Node>,
1925    },
1926
1927    /// Method call: `$obj->method(@args)` or `$obj->method`
1928    MethodCall {
1929        /// Object or class expression
1930        object: Box<Node>,
1931        /// Method name being called
1932        method: String,
1933        /// Method arguments
1934        args: Vec<Node>,
1935    },
1936
1937    /// Function call: `foo(@args)` or `foo()`
1938    FunctionCall {
1939        /// Function name (may be qualified: `Package::func`)
1940        name: String,
1941        /// Function arguments
1942        args: Vec<Node>,
1943    },
1944
1945    /// Indirect object call (legacy syntax): `new Class @args`
1946    IndirectCall {
1947        /// Method name
1948        method: String,
1949        /// Object or class
1950        object: Box<Node>,
1951        /// Arguments
1952        args: Vec<Node>,
1953    },
1954
1955    /// Regex literal: `/pattern/modifiers` or `qr/pattern/modifiers`
1956    Regex {
1957        /// Regular expression pattern
1958        pattern: String,
1959        /// Replacement string (for s/// when parsed as regex)
1960        replacement: Option<String>,
1961        /// Regex modifiers (i, m, s, x, g, etc.)
1962        modifiers: String,
1963        /// Whether the regex contains embedded code `(?{...})`
1964        has_embedded_code: bool,
1965    },
1966
1967    /// Match operation: `$str =~ /pattern/modifiers` or `$str !~ /pattern/modifiers`
1968    Match {
1969        /// Expression to match against
1970        expr: Box<Node>,
1971        /// Pattern to match
1972        pattern: String,
1973        /// Match modifiers
1974        modifiers: String,
1975        /// Whether the regex contains embedded code `(?{...})`
1976        has_embedded_code: bool,
1977        /// Whether the binding operator was `!~` (negated match)
1978        negated: bool,
1979    },
1980
1981    /// Substitution operation: `$str =~ s/pattern/replacement/modifiers`
1982    Substitution {
1983        /// Expression to substitute in
1984        expr: Box<Node>,
1985        /// Pattern to find
1986        pattern: String,
1987        /// Replacement string
1988        replacement: String,
1989        /// Substitution modifiers (g, e, r, etc.)
1990        modifiers: String,
1991        /// Whether the regex contains embedded code `(?{...})`
1992        has_embedded_code: bool,
1993        /// Whether the binding operator was `!~` (negated match)
1994        negated: bool,
1995    },
1996
1997    /// Transliteration operation: `$str =~ tr/search/replace/` or `y///`
1998    Transliteration {
1999        /// Expression to transliterate
2000        expr: Box<Node>,
2001        /// Characters to search for
2002        search: String,
2003        /// Replacement characters
2004        replace: String,
2005        /// Transliteration modifiers (c, d, s, r)
2006        modifiers: String,
2007        /// Whether the binding operator was `!~` (negated match)
2008        negated: bool,
2009    },
2010
2011    // Package system
2012    /// Package declaration (e.g. `package Foo;`) and optional inline block form.
2013    Package {
2014        /// Name of the package
2015        ///
2016        /// # Precise Navigation Support
2017        /// - Added name_span for exact LSP navigation
2018        /// - Enables precise go-to-definition and hover behavior
2019        /// - O(1) span lookup in workspace symbols
2020        ///
2021        /// ## Integration Points
2022        /// - Workspace indexing
2023        /// - Cross-module symbol resolution
2024        /// - Code action providers
2025        name: String,
2026
2027        /// Source location span of the package name
2028        ///
2029        /// ## Usage Notes
2030        /// - Always corresponds to the name field
2031        /// - Provides constant-time position information
2032        /// - Essential for precise editor interactions
2033        name_span: SourceLocation,
2034
2035        /// Optional inline block for `package Foo { ... }` declarations.
2036        block: Option<Box<Node>>,
2037    },
2038
2039    /// Use statement for module loading: `use Module qw(imports);`
2040    Use {
2041        /// Module name to load
2042        module: String,
2043        /// Import arguments (symbols to import)
2044        args: Vec<String>,
2045        /// Whether this module is a known source filter (security risk)
2046        has_filter_risk: bool,
2047    },
2048
2049    /// No statement for disabling features: `no strict;`
2050    No {
2051        /// Module/pragma name to disable
2052        module: String,
2053        /// Arguments for the no statement
2054        args: Vec<String>,
2055        /// Whether this module is a known source filter (security risk)
2056        has_filter_risk: bool,
2057    },
2058
2059    /// Phase block for compile/runtime hooks: `BEGIN`, `END`, `CHECK`, `INIT`, `UNITCHECK`
2060    PhaseBlock {
2061        /// Phase name: BEGIN, END, CHECK, INIT, UNITCHECK
2062        phase: String,
2063        /// Source location span of the phase block name for precise navigation
2064        phase_span: Option<SourceLocation>,
2065        /// Block to execute during the specified phase
2066        block: Box<Node>,
2067    },
2068
2069    /// Data section marker: `__DATA__` or `__END__`
2070    DataSection {
2071        /// Section marker (__DATA__ or __END__)
2072        marker: String,
2073        /// Content following the marker (if any)
2074        body: Option<String>,
2075    },
2076
2077    /// Class declaration (Perl 5.38+ with `use feature 'class'`)
2078    Class {
2079        /// Class name
2080        name: String,
2081        /// Parent class names from `:isa(Parent)` attributes
2082        parents: Vec<String>,
2083        /// Class body containing methods and attributes
2084        body: Box<Node>,
2085    },
2086
2087    /// Format declaration for legacy report generation
2088    Format {
2089        /// Format name (defaults to filehandle name)
2090        name: String,
2091        /// Format specification body
2092        body: String,
2093    },
2094
2095    /// Bare identifier (bareword or package-qualified name)
2096    Identifier {
2097        /// Identifier string
2098        name: String,
2099    },
2100
2101    /// Parse error placeholder with error message and recovery context
2102    Error {
2103        /// Error description
2104        message: String,
2105        /// Expected token types (if any)
2106        expected: Vec<TokenKind>,
2107        /// The token actually found (if any)
2108        found: Option<Token>,
2109        /// Partial AST node parsed before error (if any)
2110        partial: Option<Box<Node>>,
2111    },
2112
2113    /// Missing expression where one was expected
2114    MissingExpression,
2115    /// Missing statement where one was expected
2116    MissingStatement,
2117    /// Missing identifier where one was expected
2118    MissingIdentifier,
2119    /// Missing block where one was expected
2120    MissingBlock,
2121
2122    /// Lexer budget exceeded marker preserving partial parse results
2123    ///
2124    /// Used when recursion or token limits are hit to preserve already-parsed content.
2125    UnknownRest,
2126}
2127
2128impl NodeKind {
2129    /// Get the name of this `NodeKind` as a static string.
2130    ///
2131    /// Useful for diagnostics, logging, and human-readable AST dumps.
2132    ///
2133    /// # Examples
2134    ///
2135    /// ```
2136    /// use perl_ast::NodeKind;
2137    ///
2138    /// let kind = NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() };
2139    /// assert_eq!(kind.kind_name(), "Variable");
2140    ///
2141    /// let kind = NodeKind::Program { statements: vec![] };
2142    /// assert_eq!(kind.kind_name(), "Program");
2143    /// ```
2144    pub fn kind_name(&self) -> &'static str {
2145        match self {
2146            NodeKind::Program { .. } => "Program",
2147            NodeKind::ExpressionStatement { .. } => "ExpressionStatement",
2148            NodeKind::VariableDeclaration { .. } => "VariableDeclaration",
2149            NodeKind::VariableListDeclaration { .. } => "VariableListDeclaration",
2150            NodeKind::Variable { .. } => "Variable",
2151            NodeKind::VariableWithAttributes { .. } => "VariableWithAttributes",
2152            NodeKind::Assignment { .. } => "Assignment",
2153            NodeKind::Binary { .. } => "Binary",
2154            NodeKind::Ternary { .. } => "Ternary",
2155            NodeKind::Unary { .. } => "Unary",
2156            NodeKind::Diamond => "Diamond",
2157            NodeKind::Ellipsis => "Ellipsis",
2158            NodeKind::Undef => "Undef",
2159            NodeKind::Readline { .. } => "Readline",
2160            NodeKind::Glob { .. } => "Glob",
2161            NodeKind::Typeglob { .. } => "Typeglob",
2162            NodeKind::Number { .. } => "Number",
2163            NodeKind::String { .. } => "String",
2164            NodeKind::Heredoc { .. } => "Heredoc",
2165            NodeKind::ArrayLiteral { .. } => "ArrayLiteral",
2166            NodeKind::HashLiteral { .. } => "HashLiteral",
2167            NodeKind::Block { .. } => "Block",
2168            NodeKind::Eval { .. } => "Eval",
2169            NodeKind::Do { .. } => "Do",
2170            NodeKind::Defer { .. } => "Defer",
2171            NodeKind::Try { .. } => "Try",
2172            NodeKind::If { .. } => "If",
2173            NodeKind::LabeledStatement { .. } => "LabeledStatement",
2174            NodeKind::While { .. } => "While",
2175            NodeKind::Tie { .. } => "Tie",
2176            NodeKind::Untie { .. } => "Untie",
2177            NodeKind::For { .. } => "For",
2178            NodeKind::Foreach { .. } => "Foreach",
2179            NodeKind::Given { .. } => "Given",
2180            NodeKind::When { .. } => "When",
2181            NodeKind::Default { .. } => "Default",
2182            NodeKind::StatementModifier { .. } => "StatementModifier",
2183            NodeKind::Subroutine { .. } => "Subroutine",
2184            NodeKind::Prototype { .. } => "Prototype",
2185            NodeKind::Signature { .. } => "Signature",
2186            NodeKind::MandatoryParameter { .. } => "MandatoryParameter",
2187            NodeKind::OptionalParameter { .. } => "OptionalParameter",
2188            NodeKind::SlurpyParameter { .. } => "SlurpyParameter",
2189            NodeKind::NamedParameter { .. } => "NamedParameter",
2190            NodeKind::Method { .. } => "Method",
2191            NodeKind::Return { .. } => "Return",
2192            NodeKind::LoopControl { .. } => "LoopControl",
2193            NodeKind::Goto { .. } => "Goto",
2194            NodeKind::MethodCall { .. } => "MethodCall",
2195            NodeKind::FunctionCall { .. } => "FunctionCall",
2196            NodeKind::IndirectCall { .. } => "IndirectCall",
2197            NodeKind::Regex { .. } => "Regex",
2198            NodeKind::Match { .. } => "Match",
2199            NodeKind::Substitution { .. } => "Substitution",
2200            NodeKind::Transliteration { .. } => "Transliteration",
2201            NodeKind::Package { .. } => "Package",
2202            NodeKind::Use { .. } => "Use",
2203            NodeKind::No { .. } => "No",
2204            NodeKind::PhaseBlock { .. } => "PhaseBlock",
2205            NodeKind::DataSection { .. } => "DataSection",
2206            NodeKind::Class { .. } => "Class",
2207            NodeKind::Format { .. } => "Format",
2208            NodeKind::Identifier { .. } => "Identifier",
2209            NodeKind::Error { .. } => "Error",
2210            NodeKind::MissingExpression => "MissingExpression",
2211            NodeKind::MissingStatement => "MissingStatement",
2212            NodeKind::MissingIdentifier => "MissingIdentifier",
2213            NodeKind::MissingBlock => "MissingBlock",
2214            NodeKind::UnknownRest => "UnknownRest",
2215        }
2216    }
2217
2218    /// Canonical list of **all** `kind_name()` strings, in alphabetical order.
2219    ///
2220    /// Every consumer that needs the full set of NodeKind names should reference
2221    /// this constant instead of maintaining a hand-written copy.
2222    pub const ALL_KIND_NAMES: &[&'static str] = &[
2223        "ArrayLiteral",
2224        "Assignment",
2225        "Binary",
2226        "Block",
2227        "Class",
2228        "DataSection",
2229        "Default",
2230        "Defer",
2231        "Diamond",
2232        "Do",
2233        "Ellipsis",
2234        "Error",
2235        "Eval",
2236        "ExpressionStatement",
2237        "For",
2238        "Foreach",
2239        "Format",
2240        "FunctionCall",
2241        "Given",
2242        "Glob",
2243        "Goto",
2244        "HashLiteral",
2245        "Heredoc",
2246        "Identifier",
2247        "If",
2248        "IndirectCall",
2249        "LabeledStatement",
2250        "LoopControl",
2251        "MandatoryParameter",
2252        "Match",
2253        "Method",
2254        "MethodCall",
2255        "MissingBlock",
2256        "MissingExpression",
2257        "MissingIdentifier",
2258        "MissingStatement",
2259        "NamedParameter",
2260        "No",
2261        "Number",
2262        "OptionalParameter",
2263        "Package",
2264        "PhaseBlock",
2265        "Program",
2266        "Prototype",
2267        "Readline",
2268        "Regex",
2269        "Return",
2270        "Signature",
2271        "SlurpyParameter",
2272        "StatementModifier",
2273        "String",
2274        "Subroutine",
2275        "Substitution",
2276        "Ternary",
2277        "Tie",
2278        "Transliteration",
2279        "Try",
2280        "Typeglob",
2281        "Unary",
2282        "Undef",
2283        "UnknownRest",
2284        "Untie",
2285        "Use",
2286        "Variable",
2287        "VariableDeclaration",
2288        "VariableListDeclaration",
2289        "VariableWithAttributes",
2290        "When",
2291        "While",
2292    ];
2293
2294    /// Subset of `ALL_KIND_NAMES` that represent synthetic/recovery nodes.
2295    ///
2296    /// These kinds are only produced by `parse_with_recovery()` on malformed
2297    /// input and should not be expected in clean parses.
2298    pub const RECOVERY_KIND_NAMES: &[&'static str] = &[
2299        "Error",
2300        "MissingBlock",
2301        "MissingExpression",
2302        "MissingIdentifier",
2303        "MissingStatement",
2304        "UnknownRest",
2305    ];
2306}
2307
2308impl fmt::Display for NodeKind {
2309    /// Formats as the canonical `kind_name()` string.
2310    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2311        f.write_str(self.kind_name())
2312    }
2313}
2314
2315impl fmt::Display for Node {
2316    /// Formats as the tree-sitter compatible S-expression.
2317    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2318        f.write_str(&self.to_sexp())
2319    }
2320}
2321
2322/// Format unary operator for S-expression output
2323fn format_unary_operator(op: &str) -> String {
2324    match op {
2325        // Arithmetic unary operators
2326        "+" => "unary_+".to_string(),
2327        "-" => "unary_-".to_string(),
2328
2329        // Logical unary operators
2330        "!" => "unary_not".to_string(),
2331        "not" => "unary_not".to_string(),
2332
2333        // Bitwise complement
2334        "~" => "unary_complement".to_string(),
2335
2336        // Reference operator
2337        "\\" => "unary_ref".to_string(),
2338
2339        // Postfix operators
2340        "++" => "unary_++".to_string(),
2341        "--" => "unary_--".to_string(),
2342
2343        // File test operators
2344        "-f" => "unary_-f".to_string(),
2345        "-d" => "unary_-d".to_string(),
2346        "-e" => "unary_-e".to_string(),
2347        "-r" => "unary_-r".to_string(),
2348        "-w" => "unary_-w".to_string(),
2349        "-x" => "unary_-x".to_string(),
2350        "-o" => "unary_-o".to_string(),
2351        "-R" => "unary_-R".to_string(),
2352        "-W" => "unary_-W".to_string(),
2353        "-X" => "unary_-X".to_string(),
2354        "-O" => "unary_-O".to_string(),
2355        "-s" => "unary_-s".to_string(),
2356        "-p" => "unary_-p".to_string(),
2357        "-S" => "unary_-S".to_string(),
2358        "-b" => "unary_-b".to_string(),
2359        "-c" => "unary_-c".to_string(),
2360        "-t" => "unary_-t".to_string(),
2361        "-u" => "unary_-u".to_string(),
2362        "-g" => "unary_-g".to_string(),
2363        "-k" => "unary_-k".to_string(),
2364        "-T" => "unary_-T".to_string(),
2365        "-B" => "unary_-B".to_string(),
2366        "-M" => "unary_-M".to_string(),
2367        "-A" => "unary_-A".to_string(),
2368        "-C" => "unary_-C".to_string(),
2369        "-l" => "unary_-l".to_string(),
2370        "-z" => "unary_-z".to_string(),
2371
2372        // Postfix dereferencing
2373        "->@*" => "unary_->@*".to_string(),
2374        "->%*" => "unary_->%*".to_string(),
2375        "->$*" => "unary_->$*".to_string(),
2376        "->&*" => "unary_->&*".to_string(),
2377        "->**" => "unary_->**".to_string(),
2378
2379        // Defined operator
2380        "defined" => "unary_defined".to_string(),
2381
2382        // Default case for unknown operators
2383        _ => format!("unary_{}", op.replace(' ', "_")),
2384    }
2385}
2386
2387/// Format binary operator for S-expression output
2388fn format_binary_operator(op: &str) -> String {
2389    match op {
2390        // Arithmetic operators
2391        "+" => "binary_+".to_string(),
2392        "-" => "binary_-".to_string(),
2393        "*" => "binary_*".to_string(),
2394        "/" => "binary_/".to_string(),
2395        "%" => "binary_%".to_string(),
2396        "**" => "binary_**".to_string(),
2397
2398        // Comparison operators
2399        "==" => "binary_==".to_string(),
2400        "!=" => "binary_!=".to_string(),
2401        "<" => "binary_<".to_string(),
2402        ">" => "binary_>".to_string(),
2403        "<=" => "binary_<=".to_string(),
2404        ">=" => "binary_>=".to_string(),
2405        "<=>" => "binary_<=>".to_string(),
2406
2407        // String comparison
2408        "eq" => "binary_eq".to_string(),
2409        "ne" => "binary_ne".to_string(),
2410        "lt" => "binary_lt".to_string(),
2411        "le" => "binary_le".to_string(),
2412        "gt" => "binary_gt".to_string(),
2413        "ge" => "binary_ge".to_string(),
2414        "cmp" => "binary_cmp".to_string(),
2415
2416        // Logical operators
2417        "&&" => "binary_&&".to_string(),
2418        "||" => "binary_||".to_string(),
2419        "and" => "binary_and".to_string(),
2420        "or" => "binary_or".to_string(),
2421        "xor" => "binary_xor".to_string(),
2422
2423        // Bitwise operators
2424        "&" => "binary_&".to_string(),
2425        "|" => "binary_|".to_string(),
2426        "^" => "binary_^".to_string(),
2427        "<<" => "binary_<<".to_string(),
2428        ">>" => "binary_>>".to_string(),
2429
2430        // Pattern matching
2431        "=~" => "binary_=~".to_string(),
2432        "!~" => "binary_!~".to_string(),
2433
2434        // Smart match
2435        "~~" => "binary_~~".to_string(),
2436
2437        // String repetition
2438        "x" => "binary_x".to_string(),
2439
2440        // Concatenation
2441        "." => "binary_.".to_string(),
2442
2443        // Range operators
2444        ".." => "binary_..".to_string(),
2445        "..." => "binary_...".to_string(),
2446
2447        // Type checking
2448        "isa" => "binary_isa".to_string(),
2449
2450        // Assignment operators
2451        "=" => "binary_=".to_string(),
2452        "+=" => "binary_+=".to_string(),
2453        "-=" => "binary_-=".to_string(),
2454        "*=" => "binary_*=".to_string(),
2455        "/=" => "binary_/=".to_string(),
2456        "%=" => "binary_%=".to_string(),
2457        "**=" => "binary_**=".to_string(),
2458        ".=" => "binary_.=".to_string(),
2459        "&=" => "binary_&=".to_string(),
2460        "|=" => "binary_|=".to_string(),
2461        "^=" => "binary_^=".to_string(),
2462        "<<=" => "binary_<<=".to_string(),
2463        ">>=" => "binary_>>=".to_string(),
2464        "&&=" => "binary_&&=".to_string(),
2465        "||=" => "binary_||=".to_string(),
2466        "//=" => "binary_//=".to_string(),
2467
2468        // Defined-or operator
2469        "//" => "binary_//".to_string(),
2470
2471        // Method calls and dereferencing
2472        "->" => "binary_->".to_string(),
2473
2474        // Hash/array access
2475        "{}" => "binary_{}".to_string(),
2476        "[]" => "binary_[]".to_string(),
2477
2478        // Arrow hash/array dereference
2479        "->{}" => "arrow_hash_deref".to_string(),
2480        "->[]" => "arrow_array_deref".to_string(),
2481
2482        // Default case for unknown operators
2483        _ => format!("binary_{}", op.replace(' ', "_")),
2484    }
2485}
2486
2487// SourceLocation is now provided by perl-position-tracking crate
2488// See the re-export at the top of this file
2489
2490#[cfg(test)]
2491mod tests {
2492    use super::*;
2493    use std::collections::BTreeSet;
2494
2495    /// Build a dummy instance for every `NodeKind` variant and return its
2496    /// `kind_name()`.  This ensures the compiler forces us to update here
2497    /// whenever a variant is added/removed.
2498    fn all_kind_names_from_variants() -> BTreeSet<&'static str> {
2499        let loc = SourceLocation { start: 0, end: 0 };
2500        let dummy_node = || Node::new(NodeKind::Undef, loc);
2501
2502        let variants: Vec<NodeKind> = vec![
2503            NodeKind::Program { statements: vec![] },
2504            NodeKind::ExpressionStatement { expression: Box::new(dummy_node()) },
2505            NodeKind::VariableDeclaration {
2506                declarator: String::new(),
2507                variable: Box::new(dummy_node()),
2508                attributes: vec![],
2509                initializer: None,
2510            },
2511            NodeKind::VariableListDeclaration {
2512                declarator: String::new(),
2513                variables: vec![],
2514                attributes: vec![],
2515                initializer: None,
2516            },
2517            NodeKind::Variable { sigil: String::new(), name: String::new() },
2518            NodeKind::VariableWithAttributes {
2519                variable: Box::new(dummy_node()),
2520                attributes: vec![],
2521            },
2522            NodeKind::Assignment {
2523                lhs: Box::new(dummy_node()),
2524                rhs: Box::new(dummy_node()),
2525                op: String::new(),
2526            },
2527            NodeKind::Binary {
2528                op: String::new(),
2529                left: Box::new(dummy_node()),
2530                right: Box::new(dummy_node()),
2531            },
2532            NodeKind::Ternary {
2533                condition: Box::new(dummy_node()),
2534                then_expr: Box::new(dummy_node()),
2535                else_expr: Box::new(dummy_node()),
2536            },
2537            NodeKind::Unary { op: String::new(), operand: Box::new(dummy_node()) },
2538            NodeKind::Diamond,
2539            NodeKind::Ellipsis,
2540            NodeKind::Undef,
2541            NodeKind::Readline { filehandle: None },
2542            NodeKind::Glob { pattern: String::new() },
2543            NodeKind::Typeglob { name: String::new() },
2544            NodeKind::Number { value: String::new() },
2545            NodeKind::String { value: String::new(), interpolated: false },
2546            NodeKind::Heredoc {
2547                delimiter: String::new(),
2548                content: String::new(),
2549                interpolated: false,
2550                indented: false,
2551                command: false,
2552                body_span: None,
2553            },
2554            NodeKind::ArrayLiteral { elements: vec![] },
2555            NodeKind::HashLiteral { pairs: vec![] },
2556            NodeKind::Block { statements: vec![] },
2557            NodeKind::Eval { block: Box::new(dummy_node()) },
2558            NodeKind::Do { block: Box::new(dummy_node()) },
2559            NodeKind::Defer { block: Box::new(dummy_node()) },
2560            NodeKind::Try {
2561                body: Box::new(dummy_node()),
2562                catch_blocks: vec![],
2563                finally_block: None,
2564            },
2565            NodeKind::If {
2566                condition: Box::new(dummy_node()),
2567                then_branch: Box::new(dummy_node()),
2568                elsif_branches: vec![],
2569                else_branch: None,
2570            },
2571            NodeKind::LabeledStatement { label: String::new(), statement: Box::new(dummy_node()) },
2572            NodeKind::While {
2573                condition: Box::new(dummy_node()),
2574                body: Box::new(dummy_node()),
2575                continue_block: None,
2576            },
2577            NodeKind::Tie {
2578                variable: Box::new(dummy_node()),
2579                package: Box::new(dummy_node()),
2580                args: vec![],
2581            },
2582            NodeKind::Untie { variable: Box::new(dummy_node()) },
2583            NodeKind::For {
2584                init: None,
2585                condition: None,
2586                update: None,
2587                body: Box::new(dummy_node()),
2588                continue_block: None,
2589            },
2590            NodeKind::Foreach {
2591                variable: Box::new(dummy_node()),
2592                list: Box::new(dummy_node()),
2593                body: Box::new(dummy_node()),
2594                continue_block: None,
2595            },
2596            NodeKind::Given { expr: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2597            NodeKind::When { condition: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2598            NodeKind::Default { body: Box::new(dummy_node()) },
2599            NodeKind::StatementModifier {
2600                statement: Box::new(dummy_node()),
2601                modifier: String::new(),
2602                condition: Box::new(dummy_node()),
2603            },
2604            NodeKind::Subroutine {
2605                name: None,
2606                name_span: None,
2607                prototype: None,
2608                signature: None,
2609                attributes: vec![],
2610                body: Box::new(dummy_node()),
2611            },
2612            NodeKind::Prototype { content: String::new() },
2613            NodeKind::Signature { parameters: vec![] },
2614            NodeKind::MandatoryParameter { variable: Box::new(dummy_node()) },
2615            NodeKind::OptionalParameter {
2616                variable: Box::new(dummy_node()),
2617                default_value: Box::new(dummy_node()),
2618            },
2619            NodeKind::SlurpyParameter { variable: Box::new(dummy_node()) },
2620            NodeKind::NamedParameter { variable: Box::new(dummy_node()) },
2621            NodeKind::Method {
2622                name: String::new(),
2623                signature: None,
2624                attributes: vec![],
2625                body: Box::new(dummy_node()),
2626            },
2627            NodeKind::Return { value: None },
2628            NodeKind::LoopControl { op: String::new(), label: None },
2629            NodeKind::Goto { target: Box::new(dummy_node()) },
2630            NodeKind::MethodCall {
2631                object: Box::new(dummy_node()),
2632                method: String::new(),
2633                args: vec![],
2634            },
2635            NodeKind::FunctionCall { name: String::new(), args: vec![] },
2636            NodeKind::IndirectCall {
2637                method: String::new(),
2638                object: Box::new(dummy_node()),
2639                args: vec![],
2640            },
2641            NodeKind::Regex {
2642                pattern: String::new(),
2643                replacement: None,
2644                modifiers: String::new(),
2645                has_embedded_code: false,
2646            },
2647            NodeKind::Match {
2648                expr: Box::new(dummy_node()),
2649                pattern: String::new(),
2650                modifiers: String::new(),
2651                has_embedded_code: false,
2652                negated: false,
2653            },
2654            NodeKind::Substitution {
2655                expr: Box::new(dummy_node()),
2656                pattern: String::new(),
2657                replacement: String::new(),
2658                modifiers: String::new(),
2659                has_embedded_code: false,
2660                negated: false,
2661            },
2662            NodeKind::Transliteration {
2663                expr: Box::new(dummy_node()),
2664                search: String::new(),
2665                replace: String::new(),
2666                modifiers: String::new(),
2667                negated: false,
2668            },
2669            NodeKind::Package { name: String::new(), name_span: loc, block: None },
2670            NodeKind::Use { module: String::new(), args: vec![], has_filter_risk: false },
2671            NodeKind::No { module: String::new(), args: vec![], has_filter_risk: false },
2672            NodeKind::PhaseBlock {
2673                phase: String::new(),
2674                phase_span: None,
2675                block: Box::new(dummy_node()),
2676            },
2677            NodeKind::DataSection { marker: String::new(), body: None },
2678            NodeKind::Class { name: String::new(), parents: vec![], body: Box::new(dummy_node()) },
2679            NodeKind::Format { name: String::new(), body: String::new() },
2680            NodeKind::Identifier { name: String::new() },
2681            NodeKind::Error {
2682                message: String::new(),
2683                expected: vec![],
2684                found: None,
2685                partial: None,
2686            },
2687            NodeKind::MissingExpression,
2688            NodeKind::MissingStatement,
2689            NodeKind::MissingIdentifier,
2690            NodeKind::MissingBlock,
2691            NodeKind::UnknownRest,
2692        ];
2693
2694        variants.iter().map(|v| v.kind_name()).collect()
2695    }
2696
2697    #[test]
2698    fn all_kind_names_is_consistent_with_kind_name() {
2699        let from_enum = all_kind_names_from_variants();
2700        let from_const: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2701
2702        // Check for duplicates in the const array
2703        assert_eq!(
2704            NodeKind::ALL_KIND_NAMES.len(),
2705            from_const.len(),
2706            "ALL_KIND_NAMES contains duplicates"
2707        );
2708
2709        let only_in_enum: Vec<_> = from_enum.difference(&from_const).collect();
2710        let only_in_const: Vec<_> = from_const.difference(&from_enum).collect();
2711
2712        assert!(
2713            only_in_enum.is_empty() && only_in_const.is_empty(),
2714            "ALL_KIND_NAMES is out of sync with NodeKind variants:\n  \
2715             in enum but not in ALL_KIND_NAMES: {only_in_enum:?}\n  \
2716             in ALL_KIND_NAMES but not in enum: {only_in_const:?}"
2717        );
2718    }
2719
2720    #[test]
2721    fn recovery_kind_names_is_subset_of_all() {
2722        let all: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2723        let recovery: BTreeSet<&str> = NodeKind::RECOVERY_KIND_NAMES.iter().copied().collect();
2724
2725        // No duplicates
2726        assert_eq!(
2727            NodeKind::RECOVERY_KIND_NAMES.len(),
2728            recovery.len(),
2729            "RECOVERY_KIND_NAMES contains duplicates"
2730        );
2731
2732        let not_in_all: Vec<_> = recovery.difference(&all).collect();
2733        assert!(
2734            not_in_all.is_empty(),
2735            "RECOVERY_KIND_NAMES contains entries not in ALL_KIND_NAMES: {not_in_all:?}"
2736        );
2737    }
2738}