Skip to main content

perl_ast/
ast.rs

1//! Abstract Syntax Tree definitions for Perl within the parsing and LSP workflow.
2//!
3//! This module defines the comprehensive AST node types that represent parsed Perl code
4//! during the Parse → Index → Navigate → Complete → Analyze stages. The design is optimized
5//! for both direct use in Rust analysis and for generating tree-sitter compatible
6//! S-expressions during large workspace processing operations.
7//!
8//! # LSP Workflow Integration
9//!
10//! The AST structures support Perl tooling workflows by:
11//! - **Parse**: Produced by the parser as the canonical syntax tree
12//! - **Index**: Traversed to build symbol and reference tables
13//! - **Navigate**: Provides locations for definition and reference lookups
14//! - **Complete**: Supplies context for completion, hover, and signature help
15//! - **Analyze**: Feeds semantic analysis, diagnostics, and refactoring
16//!
17//! # Performance Characteristics
18//!
19//! AST structures are optimized for large codebases with:
20//! - Memory-efficient node representation using `Box<Node>` for recursive structures
21//! - Fast pattern matching via enum variants for common Perl constructs
22//! - Location tracking for precise error reporting in large files
23//! - Cheap cloning for parallel analysis tasks
24//!
25//! # Usage Examples
26//!
27//! ## Basic AST Construction
28//!
29//! ```rust
30//! use perl_ast::{Node, NodeKind, SourceLocation};
31//!
32//! // Create a simple variable declaration node
33//! let location = SourceLocation { start: 0, end: 10 };
34//! let node = Node::new(
35//!     NodeKind::VariableDeclaration {
36//!         declarator: "my".to_string(),
37//!         variable: Box::new(Node::new(
38//!             NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
39//!             location,
40//!         )),
41//!         attributes: vec![],
42//!         initializer: None,
43//!     },
44//!     location,
45//! );
46//! assert_eq!(node.kind.kind_name(), "VariableDeclaration");
47//! ```
48//!
49//! ## Tree-sitter S-expression Generation
50//!
51//! ```rust
52//! use perl_ast::{Node, NodeKind, SourceLocation};
53//!
54//! let loc = SourceLocation { start: 0, end: 2 };
55//! let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
56//! let program = Node::new(NodeKind::Program { statements: vec![num] }, loc);
57//!
58//! let sexp = program.to_sexp();
59//! assert!(sexp.starts_with("(source_file"));
60//! ```
61//!
62//! ## AST Traversal and Analysis
63//!
64//! ```rust
65//! use perl_ast::{Node, NodeKind, SourceLocation};
66//!
67//! fn count_variables(node: &Node) -> usize {
68//!     let mut count = 0;
69//!     match &node.kind {
70//!         NodeKind::Variable { .. } => count += 1,
71//!         NodeKind::Program { statements } => {
72//!             for stmt in statements {
73//!                 count += count_variables(stmt);
74//!             }
75//!         }
76//!         _ => {} // Handle other node types as needed
77//!     }
78//!     count
79//! }
80//!
81//! let loc = SourceLocation { start: 0, end: 5 };
82//! let var = Node::new(
83//!     NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
84//!     loc,
85//! );
86//! let program = Node::new(NodeKind::Program { statements: vec![var] }, loc);
87//! assert_eq!(count_variables(&program), 1);
88//! ```
89//!
90//! ## Parsing Integration
91//!
92//! In practice the AST is produced by the parser rather than built by hand
93//! (requires `perl-parser-core`):
94//!
95//! ```rust,ignore
96//! use perl_parser_core::Parser;
97//! use perl_ast::NodeKind;
98//!
99//! let mut parser = Parser::new("my $x = 42;");
100//! let ast = parser.parse().expect("should parse");
101//! assert!(matches!(ast.kind, NodeKind::Program { .. }));
102//! ```
103
104// Re-export SourceLocation from perl-position-tracking for unified span handling
105pub use perl_position_tracking::SourceLocation;
106// Re-export Token and TokenKind from perl-token for AST error nodes
107pub use perl_token::{Token, TokenKind};
108
109/// Core AST node representing any Perl language construct within parsing workflows.
110///
111/// This is the fundamental building block for representing parsed Perl code. Each node
112/// contains both the semantic information (kind) and positional information (location)
113/// necessary for comprehensive script analysis.
114///
115/// # LSP Workflow Role
116///
117/// Nodes flow through tooling stages:
118/// - **Parse**: Created by the parser as it builds the syntax tree
119/// - **Index**: Visited to build symbol and reference tables
120/// - **Navigate**: Used to resolve definitions, references, and call hierarchy
121/// - **Complete**: Provides contextual information for completion and hover
122/// - **Analyze**: Drives semantic analysis and diagnostics
123///
124/// # Memory Optimization
125///
126/// The structure is designed for efficient memory usage during large-scale parsing:
127/// - `SourceLocation` uses compact position encoding for large files
128/// - `NodeKind` enum variants minimize memory overhead for common constructs
129/// - Clone operations are optimized for shared analysis workflows
130///
131/// # Examples
132///
133/// Construct a variable declaration node manually:
134///
135/// ```
136/// use perl_ast::{Node, NodeKind, SourceLocation};
137///
138/// let loc = SourceLocation { start: 0, end: 11 };
139/// let var = Node::new(
140///     NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
141///     loc,
142/// );
143/// let decl = Node::new(
144///     NodeKind::VariableDeclaration {
145///         declarator: "my".to_string(),
146///         variable: Box::new(var),
147///         attributes: vec![],
148///         initializer: None,
149///     },
150///     loc,
151/// );
152/// assert_eq!(decl.kind.kind_name(), "VariableDeclaration");
153/// ```
154///
155/// Typically you obtain nodes from the parser rather than constructing them by hand:
156///
157/// ```ignore
158/// use perl_parser::Parser;
159///
160/// let mut parser = Parser::new("my $x = 42;");
161/// let ast = parser.parse()?;
162/// println!("AST: {}", ast.to_sexp());
163/// ```
164#[derive(Debug, Clone, PartialEq)]
165pub struct Node {
166    /// The specific type and semantic content of this AST node
167    pub kind: NodeKind,
168    /// Source position information for error reporting and code navigation
169    pub location: SourceLocation,
170}
171
172impl Node {
173    /// Create a new AST node with the given kind and source location.
174    ///
175    /// # Examples
176    ///
177    /// ```
178    /// use perl_ast::{Node, NodeKind, SourceLocation};
179    ///
180    /// let node = Node::new(
181    ///     NodeKind::Number { value: "42".to_string() },
182    ///     SourceLocation { start: 0, end: 2 },
183    /// );
184    /// assert_eq!(node.kind.kind_name(), "Number");
185    /// assert_eq!(node.location.start, 0);
186    /// ```
187    pub fn new(kind: NodeKind, location: SourceLocation) -> Self {
188        Node { kind, location }
189    }
190
191    /// Convert the AST to a tree-sitter compatible S-expression.
192    ///
193    /// Produces a parenthesized representation compatible with tree-sitter's
194    /// S-expression format, useful for debugging and snapshot testing.
195    ///
196    /// # Examples
197    ///
198    /// ```
199    /// use perl_ast::{Node, NodeKind, SourceLocation};
200    ///
201    /// let loc = SourceLocation { start: 0, end: 2 };
202    /// let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
203    /// let program = Node::new(
204    ///     NodeKind::Program { statements: vec![num] },
205    ///     loc,
206    /// );
207    /// let sexp = program.to_sexp();
208    /// assert!(sexp.starts_with("(source_file"));
209    /// ```
210    pub fn to_sexp(&self) -> String {
211        match &self.kind {
212            NodeKind::Program { statements } => {
213                let stmts =
214                    statements.iter().map(|s| s.to_sexp_inner()).collect::<Vec<_>>().join(" ");
215                format!("(source_file {})", stmts)
216            }
217
218            NodeKind::ExpressionStatement { expression } => {
219                format!("(expression_statement {})", expression.to_sexp())
220            }
221
222            NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
223                let attrs_str = if attributes.is_empty() {
224                    String::new()
225                } else {
226                    format!(" (attributes {})", attributes.join(" "))
227                };
228                if let Some(init) = initializer {
229                    format!(
230                        "({}_declaration {}{}{})",
231                        declarator,
232                        variable.to_sexp(),
233                        attrs_str,
234                        init.to_sexp()
235                    )
236                } else {
237                    format!("({}_declaration {}{})", declarator, variable.to_sexp(), attrs_str)
238                }
239            }
240
241            NodeKind::VariableListDeclaration {
242                declarator,
243                variables,
244                attributes,
245                initializer,
246            } => {
247                let vars = variables.iter().map(|v| v.to_sexp()).collect::<Vec<_>>().join(" ");
248                let attrs_str = if attributes.is_empty() {
249                    String::new()
250                } else {
251                    format!(" (attributes {})", attributes.join(" "))
252                };
253                if let Some(init) = initializer {
254                    format!(
255                        "({}_declaration ({}){}{})",
256                        declarator,
257                        vars,
258                        attrs_str,
259                        init.to_sexp()
260                    )
261                } else {
262                    format!("({}_declaration ({}){})", declarator, vars, attrs_str)
263                }
264            }
265
266            NodeKind::Variable { sigil, name } => {
267                // Format expected by bless parsing tests: (variable $ name)
268                format!("(variable {} {})", sigil, name)
269            }
270
271            NodeKind::VariableWithAttributes { variable, attributes } => {
272                let attrs = attributes.join(" ");
273                format!("({} (attributes {}))", variable.to_sexp(), attrs)
274            }
275
276            NodeKind::Assignment { lhs, rhs, op } => {
277                format!(
278                    "(assignment_{} {} {})",
279                    op.replace("=", "assign"),
280                    lhs.to_sexp(),
281                    rhs.to_sexp()
282                )
283            }
284
285            NodeKind::Binary { op, left, right } => {
286                // Tree-sitter format: (binary_op left right)
287                let op_name = format_binary_operator(op);
288                format!("({} {} {})", op_name, left.to_sexp(), right.to_sexp())
289            }
290
291            NodeKind::Ternary { condition, then_expr, else_expr } => {
292                format!(
293                    "(ternary {} {} {})",
294                    condition.to_sexp(),
295                    then_expr.to_sexp(),
296                    else_expr.to_sexp()
297                )
298            }
299
300            NodeKind::Unary { op, operand } => {
301                // Tree-sitter format: (unary_op operand)
302                let op_name = format_unary_operator(op);
303                format!("({} {})", op_name, operand.to_sexp())
304            }
305
306            NodeKind::Diamond => "(diamond)".to_string(),
307
308            NodeKind::Ellipsis => "(ellipsis)".to_string(),
309
310            NodeKind::Undef => "(undef)".to_string(),
311
312            NodeKind::Readline { filehandle } => {
313                if let Some(fh) = filehandle {
314                    format!("(readline {})", fh)
315                } else {
316                    "(readline)".to_string()
317                }
318            }
319
320            NodeKind::Glob { pattern } => {
321                format!("(glob {})", pattern)
322            }
323            NodeKind::Typeglob { name } => {
324                format!("(typeglob {})", name)
325            }
326
327            NodeKind::Number { value } => {
328                // Format expected by bless parsing tests: (number value)
329                format!("(number {})", value)
330            }
331
332            NodeKind::String { value, interpolated } => {
333                // Escape quotes in string value to prevent S-expression parsing issues
334                let escaped_value = value.replace('\\', "\\\\").replace('"', "\\\"");
335
336                // Format based on interpolation status
337                if *interpolated {
338                    format!("(string_interpolated \"{}\")", escaped_value)
339                } else {
340                    format!("(string \"{}\")", escaped_value)
341                }
342            }
343
344            NodeKind::Heredoc { delimiter, content, interpolated, indented, command, .. } => {
345                let type_str = if *command {
346                    "heredoc_command"
347                } else if *indented {
348                    if *interpolated { "heredoc_indented_interpolated" } else { "heredoc_indented" }
349                } else if *interpolated {
350                    "heredoc_interpolated"
351                } else {
352                    "heredoc"
353                };
354                format!("({} {:?} {:?})", type_str, delimiter, content)
355            }
356
357            NodeKind::ArrayLiteral { elements } => {
358                let elems = elements.iter().map(|e| e.to_sexp()).collect::<Vec<_>>().join(" ");
359                format!("(array {})", elems)
360            }
361
362            NodeKind::HashLiteral { pairs } => {
363                let kvs = pairs
364                    .iter()
365                    .map(|(k, v)| format!("({} {})", k.to_sexp(), v.to_sexp()))
366                    .collect::<Vec<_>>()
367                    .join(" ");
368                format!("(hash {})", kvs)
369            }
370
371            NodeKind::Block { statements } => {
372                let stmts = statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ");
373                format!("(block {})", stmts)
374            }
375
376            NodeKind::Eval { block } => {
377                format!("(eval {})", block.to_sexp())
378            }
379
380            NodeKind::Do { block } => {
381                format!("(do {})", block.to_sexp())
382            }
383
384            NodeKind::Try { body, catch_blocks, finally_block } => {
385                let mut parts = vec![format!("(try {})", body.to_sexp())];
386
387                for (var, block) in catch_blocks {
388                    if let Some(v) = var {
389                        parts.push(format!("(catch {} {})", v, block.to_sexp()));
390                    } else {
391                        parts.push(format!("(catch {})", block.to_sexp()));
392                    }
393                }
394
395                if let Some(finally) = finally_block {
396                    parts.push(format!("(finally {})", finally.to_sexp()));
397                }
398
399                parts.join(" ")
400            }
401
402            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
403                let mut parts =
404                    vec![format!("(if {} {})", condition.to_sexp(), then_branch.to_sexp())];
405
406                for (cond, block) in elsif_branches {
407                    parts.push(format!("(elsif {} {})", cond.to_sexp(), block.to_sexp()));
408                }
409
410                if let Some(else_block) = else_branch {
411                    parts.push(format!("(else {})", else_block.to_sexp()));
412                }
413
414                parts.join(" ")
415            }
416
417            NodeKind::LabeledStatement { label, statement } => {
418                format!("(labeled_statement {} {})", label, statement.to_sexp())
419            }
420
421            NodeKind::While { condition, body, continue_block } => {
422                let mut s = format!("(while {} {})", condition.to_sexp(), body.to_sexp());
423                if let Some(cont) = continue_block {
424                    s.push_str(&format!(" (continue {})", cont.to_sexp()));
425                }
426                s
427            }
428            NodeKind::Tie { variable, package, args } => {
429                let mut s = format!("(tie {} {}", variable.to_sexp(), package.to_sexp());
430                for arg in args {
431                    s.push_str(&format!(" {}", arg.to_sexp()));
432                }
433                s.push(')');
434                s
435            }
436            NodeKind::Untie { variable } => {
437                format!("(untie {})", variable.to_sexp())
438            }
439            NodeKind::For { init, condition, update, body, continue_block } => {
440                let init_str =
441                    init.as_ref().map(|i| i.to_sexp()).unwrap_or_else(|| "()".to_string());
442                let cond_str =
443                    condition.as_ref().map(|c| c.to_sexp()).unwrap_or_else(|| "()".to_string());
444                let update_str =
445                    update.as_ref().map(|u| u.to_sexp()).unwrap_or_else(|| "()".to_string());
446                let mut result =
447                    format!("(for {} {} {} {})", init_str, cond_str, update_str, body.to_sexp());
448                if let Some(cont) = continue_block {
449                    result.push_str(&format!(" (continue {})", cont.to_sexp()));
450                }
451                result
452            }
453
454            NodeKind::Foreach { variable, list, body, continue_block } => {
455                let cont = if let Some(cb) = continue_block {
456                    format!(" {}", cb.to_sexp())
457                } else {
458                    String::new()
459                };
460                format!(
461                    "(foreach {} {} {}{})",
462                    variable.to_sexp(),
463                    list.to_sexp(),
464                    body.to_sexp(),
465                    cont
466                )
467            }
468
469            NodeKind::Given { expr, body } => {
470                format!("(given {} {})", expr.to_sexp(), body.to_sexp())
471            }
472
473            NodeKind::When { condition, body } => {
474                format!("(when {} {})", condition.to_sexp(), body.to_sexp())
475            }
476
477            NodeKind::Default { body } => {
478                format!("(default {})", body.to_sexp())
479            }
480
481            NodeKind::StatementModifier { statement, modifier, condition } => {
482                format!(
483                    "(statement_modifier_{} {} {})",
484                    modifier,
485                    statement.to_sexp(),
486                    condition.to_sexp()
487                )
488            }
489
490            NodeKind::Subroutine { name, prototype, signature, attributes, body, name_span: _ } => {
491                if let Some(sub_name) = name {
492                    // Named subroutine - bless test expected format: (sub name () block)
493                    let mut parts = vec![sub_name.clone()];
494
495                    // Add attributes if present (before prototype/signature)
496                    if !attributes.is_empty() {
497                        for attr in attributes {
498                            parts.push(format!(":{}", attr));
499                        }
500                    }
501
502                    // Add prototype/signature - use () for empty prototype
503                    if let Some(proto) = prototype {
504                        parts.push(format!("({})", proto.to_sexp()));
505                    } else if signature.is_some() {
506                        // If there's a signature but no prototype, still show ()
507                        parts.push("()".to_string());
508                    } else {
509                        parts.push("()".to_string());
510                    }
511
512                    // Add body
513                    parts.push(body.to_sexp());
514
515                    // Format: (sub name [attrs...] ()(block ...)) - space between name and (), no space between () and block
516                    if parts.len() >= 3 && parts[parts.len() - 2] == "()" {
517                        let name_and_attrs = parts[0..parts.len() - 2].join(" ");
518                        let proto = &parts[parts.len() - 2];
519                        let body = &parts[parts.len() - 1];
520                        format!("(sub {} {}{})", name_and_attrs, proto, body)
521                    } else {
522                        format!("(sub {})", parts.join(" "))
523                    }
524                } else {
525                    // Anonymous subroutine - tree-sitter format
526                    let mut parts = Vec::new();
527
528                    // Add attributes if present
529                    if !attributes.is_empty() {
530                        let attrs: Vec<String> = attributes
531                            .iter()
532                            .map(|_attr| "(attribute (attribute_name))".to_string())
533                            .collect();
534                        parts.push(format!("(attrlist {})", attrs.join("")));
535                    }
536
537                    // Add prototype if present
538                    if let Some(proto) = prototype {
539                        parts.push(proto.to_sexp());
540                    }
541
542                    // Add signature if present
543                    if let Some(sig) = signature {
544                        parts.push(sig.to_sexp());
545                    }
546
547                    // Add body
548                    parts.push(body.to_sexp());
549
550                    format!("(anonymous_subroutine_expression {})", parts.join(""))
551                }
552            }
553
554            NodeKind::Prototype { content: _ } => "(prototype)".to_string(),
555
556            NodeKind::Signature { parameters } => {
557                let params = parameters.iter().map(|p| p.to_sexp()).collect::<Vec<_>>().join(" ");
558                format!("(signature {})", params)
559            }
560
561            NodeKind::MandatoryParameter { variable } => {
562                format!("(mandatory_parameter {})", variable.to_sexp())
563            }
564
565            NodeKind::OptionalParameter { variable, default_value } => {
566                format!("(optional_parameter {} {})", variable.to_sexp(), default_value.to_sexp())
567            }
568
569            NodeKind::SlurpyParameter { variable } => {
570                format!("(slurpy_parameter {})", variable.to_sexp())
571            }
572
573            NodeKind::NamedParameter { variable } => {
574                format!("(named_parameter {})", variable.to_sexp())
575            }
576
577            NodeKind::Method { name: _, signature, attributes, body } => {
578                let block_contents = match &body.kind {
579                    NodeKind::Block { statements } => {
580                        statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ")
581                    }
582                    _ => body.to_sexp(),
583                };
584
585                let mut parts = vec!["(bareword)".to_string()];
586
587                // Add signature if present
588                if let Some(sig) = signature {
589                    parts.push(sig.to_sexp());
590                }
591
592                // Add attributes if present
593                if !attributes.is_empty() {
594                    let attrs: Vec<String> = attributes
595                        .iter()
596                        .map(|_attr| "(attribute (attribute_name))".to_string())
597                        .collect();
598                    parts.push(format!("(attrlist {})", attrs.join("")));
599                }
600
601                parts.push(format!("(block {})", block_contents));
602                format!("(method_declaration_statement {})", parts.join(" "))
603            }
604
605            NodeKind::Return { value } => {
606                if let Some(val) = value {
607                    format!("(return {})", val.to_sexp())
608                } else {
609                    "(return)".to_string()
610                }
611            }
612
613            NodeKind::LoopControl { op, label } => {
614                if let Some(l) = label {
615                    format!("({} {})", op, l)
616                } else {
617                    format!("({})", op)
618                }
619            }
620
621            NodeKind::Goto { target } => {
622                format!("(goto {})", target.to_sexp())
623            }
624
625            NodeKind::MethodCall { object, method, args } => {
626                let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
627                format!("(method_call {} {} ({}))", object.to_sexp(), method, args_str)
628            }
629
630            NodeKind::FunctionCall { name, args } => {
631                // Special handling for functions that should use call format in tree-sitter tests
632                if matches!(
633                    name.as_str(),
634                    "bless"
635                        | "shift"
636                        | "unshift"
637                        | "open"
638                        | "die"
639                        | "warn"
640                        | "print"
641                        | "printf"
642                        | "say"
643                        | "push"
644                        | "pop"
645                        | "map"
646                        | "sort"
647                        | "grep"
648                        | "keys"
649                        | "values"
650                        | "each"
651                        | "defined"
652                        | "scalar"
653                        | "ref"
654                ) {
655                    let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
656                    if args.is_empty() {
657                        format!("(call {} ())", name)
658                    } else {
659                        format!("(call {} ({}))", name, args_str)
660                    }
661                } else {
662                    // Tree-sitter format varies by context
663                    let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
664                    if args.is_empty() {
665                        "(function_call_expression (function))".to_string()
666                    } else {
667                        format!("(ambiguous_function_call_expression (function) {})", args_str)
668                    }
669                }
670            }
671
672            NodeKind::IndirectCall { method, object, args } => {
673                let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
674                format!("(indirect_call {} {} ({}))", method, object.to_sexp(), args_str)
675            }
676
677            NodeKind::Regex { pattern, replacement, modifiers, has_embedded_code } => {
678                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
679                format!("(regex {:?} {:?} {:?}{})", pattern, replacement, modifiers, risk_marker)
680            }
681
682            NodeKind::Match { expr, pattern, modifiers, has_embedded_code, negated } => {
683                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
684                let op = if *negated { "not_match" } else { "match" };
685                format!(
686                    "({} {} (regex {:?} {:?}{}))",
687                    op,
688                    expr.to_sexp(),
689                    pattern,
690                    modifiers,
691                    risk_marker
692                )
693            }
694
695            NodeKind::Substitution {
696                expr,
697                pattern,
698                replacement,
699                modifiers,
700                has_embedded_code,
701                negated,
702            } => {
703                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
704                let neg_marker = if *negated { " (negated)" } else { "" };
705                format!(
706                    "(substitution {} {:?} {:?} {:?}{}{})",
707                    expr.to_sexp(),
708                    pattern,
709                    replacement,
710                    modifiers,
711                    risk_marker,
712                    neg_marker
713                )
714            }
715
716            NodeKind::Transliteration { expr, search, replace, modifiers, negated } => {
717                let neg_marker = if *negated { " (negated)" } else { "" };
718                format!(
719                    "(transliteration {} {:?} {:?} {:?}{})",
720                    expr.to_sexp(),
721                    search,
722                    replace,
723                    modifiers,
724                    neg_marker
725                )
726            }
727
728            NodeKind::Package { name, block, name_span: _ } => {
729                if let Some(blk) = block {
730                    format!("(package {} {})", name, blk.to_sexp())
731                } else {
732                    format!("(package {})", name)
733                }
734            }
735
736            NodeKind::Use { module, args, has_filter_risk } => {
737                let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
738                if args.is_empty() {
739                    format!("(use {}{})", module, risk_marker)
740                } else {
741                    let args_str = args.join(" ");
742                    format!("(use {} ({}){})", module, args_str, risk_marker)
743                }
744            }
745
746            NodeKind::No { module, args, has_filter_risk } => {
747                let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
748                if args.is_empty() {
749                    format!("(no {}{})", module, risk_marker)
750                } else {
751                    let args_str = args.join(" ");
752                    format!("(no {} ({}){})", module, args_str, risk_marker)
753                }
754            }
755
756            NodeKind::PhaseBlock { phase, phase_span: _, block } => {
757                format!("({} {})", phase, block.to_sexp())
758            }
759
760            NodeKind::DataSection { marker, body } => {
761                if let Some(body_text) = body {
762                    format!("(data_section {} \"{}\")", marker, body_text.escape_default())
763                } else {
764                    format!("(data_section {})", marker)
765                }
766            }
767
768            NodeKind::Class { name, body } => {
769                format!("(class {} {})", name, body.to_sexp())
770            }
771
772            NodeKind::Format { name, body } => {
773                format!("(format {} {:?})", name, body)
774            }
775
776            NodeKind::Identifier { name } => {
777                // Format expected by tests: (identifier name)
778                format!("(identifier {})", name)
779            }
780
781            NodeKind::Error { message, partial, .. } => {
782                if let Some(node) = partial {
783                    format!("(ERROR \"{}\" {})", message.escape_default(), node.to_sexp())
784                } else {
785                    format!("(ERROR \"{}\")", message.escape_default())
786                }
787            }
788            NodeKind::MissingExpression => "(missing_expression)".to_string(),
789            NodeKind::MissingStatement => "(missing_statement)".to_string(),
790            NodeKind::MissingIdentifier => "(missing_identifier)".to_string(),
791            NodeKind::MissingBlock => "(missing_block)".to_string(),
792            NodeKind::UnknownRest => "(UNKNOWN_REST)".to_string(),
793        }
794    }
795
796    /// Convert the AST to S-expression format that unwraps expression statements in programs
797    pub fn to_sexp_inner(&self) -> String {
798        match &self.kind {
799            NodeKind::ExpressionStatement { expression } => {
800                // Check if this is an anonymous subroutine - if so, keep it wrapped
801                match &expression.kind {
802                    NodeKind::Subroutine { name, .. } if name.is_none() => {
803                        // Anonymous subroutine should remain wrapped in expression statement
804                        self.to_sexp()
805                    }
806                    _ => {
807                        // In the inner format, other expression statements are unwrapped
808                        expression.to_sexp()
809                    }
810                }
811            }
812            _ => {
813                // For all other node types, use regular to_sexp
814                self.to_sexp()
815            }
816        }
817    }
818
819    /// Call a function on every direct child node of this node.
820    ///
821    /// This enables depth-first traversal for operations like heredoc content attachment.
822    /// The closure receives a mutable reference to each child node.
823    #[inline]
824    pub fn for_each_child_mut<F: FnMut(&mut Node)>(&mut self, mut f: F) {
825        match &mut self.kind {
826            NodeKind::Tie { variable, package, args } => {
827                f(variable);
828                f(package);
829                for arg in args {
830                    f(arg);
831                }
832            }
833            NodeKind::Untie { variable } => f(variable),
834
835            // Root program node
836            NodeKind::Program { statements } => {
837                for stmt in statements {
838                    f(stmt);
839                }
840            }
841
842            // Statement wrappers
843            NodeKind::ExpressionStatement { expression } => f(expression),
844
845            // Variable declarations
846            NodeKind::VariableDeclaration { variable, initializer, .. } => {
847                f(variable);
848                if let Some(init) = initializer {
849                    f(init);
850                }
851            }
852            NodeKind::VariableListDeclaration { variables, initializer, .. } => {
853                for var in variables {
854                    f(var);
855                }
856                if let Some(init) = initializer {
857                    f(init);
858                }
859            }
860            NodeKind::VariableWithAttributes { variable, .. } => f(variable),
861
862            // Binary operations
863            NodeKind::Binary { left, right, .. } => {
864                f(left);
865                f(right);
866            }
867            NodeKind::Ternary { condition, then_expr, else_expr } => {
868                f(condition);
869                f(then_expr);
870                f(else_expr);
871            }
872            NodeKind::Unary { operand, .. } => f(operand),
873            NodeKind::Assignment { lhs, rhs, .. } => {
874                f(lhs);
875                f(rhs);
876            }
877
878            // Control flow
879            NodeKind::Block { statements } => {
880                for stmt in statements {
881                    f(stmt);
882                }
883            }
884            NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
885                f(condition);
886                f(then_branch);
887                for (elsif_cond, elsif_body) in elsif_branches {
888                    f(elsif_cond);
889                    f(elsif_body);
890                }
891                if let Some(else_body) = else_branch {
892                    f(else_body);
893                }
894            }
895            NodeKind::While { condition, body, continue_block, .. } => {
896                f(condition);
897                f(body);
898                if let Some(cont) = continue_block {
899                    f(cont);
900                }
901            }
902            NodeKind::For { init, condition, update, body, continue_block, .. } => {
903                if let Some(i) = init {
904                    f(i);
905                }
906                if let Some(c) = condition {
907                    f(c);
908                }
909                if let Some(u) = update {
910                    f(u);
911                }
912                f(body);
913                if let Some(cont) = continue_block {
914                    f(cont);
915                }
916            }
917            NodeKind::Foreach { variable, list, body, continue_block } => {
918                f(variable);
919                f(list);
920                f(body);
921                if let Some(cb) = continue_block {
922                    f(cb);
923                }
924            }
925            NodeKind::Given { expr, body } => {
926                f(expr);
927                f(body);
928            }
929            NodeKind::When { condition, body } => {
930                f(condition);
931                f(body);
932            }
933            NodeKind::Default { body } => f(body),
934            NodeKind::StatementModifier { statement, condition, .. } => {
935                f(statement);
936                f(condition);
937            }
938            NodeKind::LabeledStatement { statement, .. } => f(statement),
939
940            // Eval and Do blocks
941            NodeKind::Eval { block } => f(block),
942            NodeKind::Do { block } => f(block),
943            NodeKind::Try { body, catch_blocks, finally_block } => {
944                f(body);
945                for (_, catch_body) in catch_blocks {
946                    f(catch_body);
947                }
948                if let Some(finally) = finally_block {
949                    f(finally);
950                }
951            }
952
953            // Function calls
954            NodeKind::FunctionCall { args, .. } => {
955                for arg in args {
956                    f(arg);
957                }
958            }
959            NodeKind::MethodCall { object, args, .. } => {
960                f(object);
961                for arg in args {
962                    f(arg);
963                }
964            }
965            NodeKind::IndirectCall { object, args, .. } => {
966                f(object);
967                for arg in args {
968                    f(arg);
969                }
970            }
971
972            // Functions
973            NodeKind::Subroutine { prototype, signature, body, .. } => {
974                if let Some(proto) = prototype {
975                    f(proto);
976                }
977                if let Some(sig) = signature {
978                    f(sig);
979                }
980                f(body);
981            }
982            NodeKind::Method { signature, body, .. } => {
983                if let Some(sig) = signature {
984                    f(sig);
985                }
986                f(body);
987            }
988            NodeKind::Return { value } => {
989                if let Some(v) = value {
990                    f(v);
991                }
992            }
993            NodeKind::Goto { target } => f(target),
994            NodeKind::Signature { parameters } => {
995                for param in parameters {
996                    f(param);
997                }
998            }
999            NodeKind::MandatoryParameter { variable } => f(variable),
1000            NodeKind::OptionalParameter { variable, default_value } => {
1001                f(variable);
1002                f(default_value);
1003            }
1004            NodeKind::SlurpyParameter { variable } => f(variable),
1005            NodeKind::NamedParameter { variable } => f(variable),
1006
1007            // Pattern matching
1008            NodeKind::Match { expr, .. } => f(expr),
1009            NodeKind::Substitution { expr, .. } => f(expr),
1010            NodeKind::Transliteration { expr, .. } => f(expr),
1011
1012            // Containers
1013            NodeKind::ArrayLiteral { elements } => {
1014                for elem in elements {
1015                    f(elem);
1016                }
1017            }
1018            NodeKind::HashLiteral { pairs } => {
1019                for (key, value) in pairs {
1020                    f(key);
1021                    f(value);
1022                }
1023            }
1024
1025            // Package system
1026            NodeKind::Package { block, .. } => {
1027                if let Some(b) = block {
1028                    f(b);
1029                }
1030            }
1031            NodeKind::PhaseBlock { block, .. } => f(block),
1032            NodeKind::Class { body, .. } => f(body),
1033
1034            // Error node might have a partial valid tree
1035            NodeKind::Error { partial, .. } => {
1036                if let Some(node) = partial {
1037                    f(node);
1038                }
1039            }
1040
1041            // Leaf nodes (no children to traverse)
1042            NodeKind::Variable { .. }
1043            | NodeKind::Identifier { .. }
1044            | NodeKind::Number { .. }
1045            | NodeKind::String { .. }
1046            | NodeKind::Heredoc { .. }
1047            | NodeKind::Regex { .. }
1048            | NodeKind::Readline { .. }
1049            | NodeKind::Glob { .. }
1050            | NodeKind::Typeglob { .. }
1051            | NodeKind::Diamond
1052            | NodeKind::Ellipsis
1053            | NodeKind::Undef
1054            | NodeKind::Use { .. }
1055            | NodeKind::No { .. }
1056            | NodeKind::Prototype { .. }
1057            | NodeKind::DataSection { .. }
1058            | NodeKind::Format { .. }
1059            | NodeKind::LoopControl { .. }
1060            | NodeKind::MissingExpression
1061            | NodeKind::MissingStatement
1062            | NodeKind::MissingIdentifier
1063            | NodeKind::MissingBlock
1064            | NodeKind::UnknownRest => {}
1065        }
1066    }
1067
1068    /// Call a function on every direct child node of this node (immutable version).
1069    ///
1070    /// This enables depth-first traversal for read-only operations like AST analysis.
1071    /// The closure receives an immutable reference to each child node.
1072    #[inline]
1073    pub fn for_each_child<'a, F: FnMut(&'a Node)>(&'a self, mut f: F) {
1074        match &self.kind {
1075            NodeKind::Tie { variable, package, args } => {
1076                f(variable);
1077                f(package);
1078                for arg in args {
1079                    f(arg);
1080                }
1081            }
1082            NodeKind::Untie { variable } => f(variable),
1083
1084            // Root program node
1085            NodeKind::Program { statements } => {
1086                for stmt in statements {
1087                    f(stmt);
1088                }
1089            }
1090
1091            // Statement wrappers
1092            NodeKind::ExpressionStatement { expression } => f(expression),
1093
1094            // Variable declarations
1095            NodeKind::VariableDeclaration { variable, initializer, .. } => {
1096                f(variable);
1097                if let Some(init) = initializer {
1098                    f(init);
1099                }
1100            }
1101            NodeKind::VariableListDeclaration { variables, initializer, .. } => {
1102                for var in variables {
1103                    f(var);
1104                }
1105                if let Some(init) = initializer {
1106                    f(init);
1107                }
1108            }
1109            NodeKind::VariableWithAttributes { variable, .. } => f(variable),
1110
1111            // Binary operations
1112            NodeKind::Binary { left, right, .. } => {
1113                f(left);
1114                f(right);
1115            }
1116            NodeKind::Ternary { condition, then_expr, else_expr } => {
1117                f(condition);
1118                f(then_expr);
1119                f(else_expr);
1120            }
1121            NodeKind::Unary { operand, .. } => f(operand),
1122            NodeKind::Assignment { lhs, rhs, .. } => {
1123                f(lhs);
1124                f(rhs);
1125            }
1126
1127            // Control flow
1128            NodeKind::Block { statements } => {
1129                for stmt in statements {
1130                    f(stmt);
1131                }
1132            }
1133            NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
1134                f(condition);
1135                f(then_branch);
1136                for (elsif_cond, elsif_body) in elsif_branches {
1137                    f(elsif_cond);
1138                    f(elsif_body);
1139                }
1140                if let Some(else_body) = else_branch {
1141                    f(else_body);
1142                }
1143            }
1144            NodeKind::While { condition, body, continue_block, .. } => {
1145                f(condition);
1146                f(body);
1147                if let Some(cont) = continue_block {
1148                    f(cont);
1149                }
1150            }
1151            NodeKind::For { init, condition, update, body, continue_block, .. } => {
1152                if let Some(i) = init {
1153                    f(i);
1154                }
1155                if let Some(c) = condition {
1156                    f(c);
1157                }
1158                if let Some(u) = update {
1159                    f(u);
1160                }
1161                f(body);
1162                if let Some(cont) = continue_block {
1163                    f(cont);
1164                }
1165            }
1166            NodeKind::Foreach { variable, list, body, continue_block } => {
1167                f(variable);
1168                f(list);
1169                f(body);
1170                if let Some(cb) = continue_block {
1171                    f(cb);
1172                }
1173            }
1174            NodeKind::Given { expr, body } => {
1175                f(expr);
1176                f(body);
1177            }
1178            NodeKind::When { condition, body } => {
1179                f(condition);
1180                f(body);
1181            }
1182            NodeKind::Default { body } => f(body),
1183            NodeKind::StatementModifier { statement, condition, .. } => {
1184                f(statement);
1185                f(condition);
1186            }
1187            NodeKind::LabeledStatement { statement, .. } => f(statement),
1188
1189            // Eval and Do blocks
1190            NodeKind::Eval { block } => f(block),
1191            NodeKind::Do { block } => f(block),
1192            NodeKind::Try { body, catch_blocks, finally_block } => {
1193                f(body);
1194                for (_, catch_body) in catch_blocks {
1195                    f(catch_body);
1196                }
1197                if let Some(finally) = finally_block {
1198                    f(finally);
1199                }
1200            }
1201
1202            // Function calls
1203            NodeKind::FunctionCall { args, .. } => {
1204                for arg in args {
1205                    f(arg);
1206                }
1207            }
1208            NodeKind::MethodCall { object, args, .. } => {
1209                f(object);
1210                for arg in args {
1211                    f(arg);
1212                }
1213            }
1214            NodeKind::IndirectCall { object, args, .. } => {
1215                f(object);
1216                for arg in args {
1217                    f(arg);
1218                }
1219            }
1220
1221            // Functions
1222            NodeKind::Subroutine { prototype, signature, body, .. } => {
1223                if let Some(proto) = prototype {
1224                    f(proto);
1225                }
1226                if let Some(sig) = signature {
1227                    f(sig);
1228                }
1229                f(body);
1230            }
1231            NodeKind::Method { signature, body, .. } => {
1232                if let Some(sig) = signature {
1233                    f(sig);
1234                }
1235                f(body);
1236            }
1237            NodeKind::Return { value } => {
1238                if let Some(v) = value {
1239                    f(v);
1240                }
1241            }
1242            NodeKind::Goto { target } => f(target),
1243            NodeKind::Signature { parameters } => {
1244                for param in parameters {
1245                    f(param);
1246                }
1247            }
1248            NodeKind::MandatoryParameter { variable } => f(variable),
1249            NodeKind::OptionalParameter { variable, default_value } => {
1250                f(variable);
1251                f(default_value);
1252            }
1253            NodeKind::SlurpyParameter { variable } => f(variable),
1254            NodeKind::NamedParameter { variable } => f(variable),
1255
1256            // Pattern matching
1257            NodeKind::Match { expr, .. } => f(expr),
1258            NodeKind::Substitution { expr, .. } => f(expr),
1259            NodeKind::Transliteration { expr, .. } => f(expr),
1260
1261            // Containers
1262            NodeKind::ArrayLiteral { elements } => {
1263                for elem in elements {
1264                    f(elem);
1265                }
1266            }
1267            NodeKind::HashLiteral { pairs } => {
1268                for (key, value) in pairs {
1269                    f(key);
1270                    f(value);
1271                }
1272            }
1273
1274            // Package system
1275            NodeKind::Package { block, .. } => {
1276                if let Some(b) = block {
1277                    f(b);
1278                }
1279            }
1280            NodeKind::PhaseBlock { block, .. } => f(block),
1281            NodeKind::Class { body, .. } => f(body),
1282
1283            // Error node might have a partial valid tree
1284            NodeKind::Error { partial, .. } => {
1285                if let Some(node) = partial {
1286                    f(node);
1287                }
1288            }
1289
1290            // Leaf nodes (no children to traverse)
1291            NodeKind::Variable { .. }
1292            | NodeKind::Identifier { .. }
1293            | NodeKind::Number { .. }
1294            | NodeKind::String { .. }
1295            | NodeKind::Heredoc { .. }
1296            | NodeKind::Regex { .. }
1297            | NodeKind::Readline { .. }
1298            | NodeKind::Glob { .. }
1299            | NodeKind::Typeglob { .. }
1300            | NodeKind::Diamond
1301            | NodeKind::Ellipsis
1302            | NodeKind::Undef
1303            | NodeKind::Use { .. }
1304            | NodeKind::No { .. }
1305            | NodeKind::Prototype { .. }
1306            | NodeKind::DataSection { .. }
1307            | NodeKind::Format { .. }
1308            | NodeKind::LoopControl { .. }
1309            | NodeKind::MissingExpression
1310            | NodeKind::MissingStatement
1311            | NodeKind::MissingIdentifier
1312            | NodeKind::MissingBlock
1313            | NodeKind::UnknownRest => {}
1314        }
1315    }
1316
1317    /// Count the total number of nodes in this subtree (inclusive).
1318    ///
1319    /// # Examples
1320    ///
1321    /// ```
1322    /// use perl_ast::{Node, NodeKind, SourceLocation};
1323    ///
1324    /// let loc = SourceLocation { start: 0, end: 1 };
1325    /// let leaf = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1326    /// assert_eq!(leaf.count_nodes(), 1);
1327    ///
1328    /// let program = Node::new(
1329    ///     NodeKind::Program { statements: vec![leaf] },
1330    ///     loc,
1331    /// );
1332    /// assert_eq!(program.count_nodes(), 2);
1333    /// ```
1334    pub fn count_nodes(&self) -> usize {
1335        let mut count = 1;
1336        self.for_each_child(|child| {
1337            count += child.count_nodes();
1338        });
1339        count
1340    }
1341
1342    /// Collect direct child nodes into a vector for convenience APIs.
1343    ///
1344    /// # Examples
1345    ///
1346    /// ```
1347    /// use perl_ast::{Node, NodeKind, SourceLocation};
1348    ///
1349    /// let loc = SourceLocation { start: 0, end: 1 };
1350    /// let stmt = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1351    /// let program = Node::new(
1352    ///     NodeKind::Program { statements: vec![stmt] },
1353    ///     loc,
1354    /// );
1355    /// assert_eq!(program.children().len(), 1);
1356    /// ```
1357    #[inline]
1358    pub fn children(&self) -> Vec<&Node> {
1359        let mut children = Vec::new();
1360        self.for_each_child(|child| children.push(child));
1361        children
1362    }
1363
1364    /// Get the first direct child node, if any.
1365    ///
1366    /// Optimized to avoid allocating the children vector.
1367    #[inline]
1368    pub fn first_child(&self) -> Option<&Node> {
1369        let mut result = None;
1370        self.for_each_child(|child| {
1371            if result.is_none() {
1372                result = Some(child);
1373            }
1374        });
1375        result
1376    }
1377}
1378
1379/// Comprehensive enumeration of all Perl language constructs supported by the parser.
1380///
1381/// This enum represents every possible AST node type that can be parsed from Perl code
1382/// during the Parse → Index → Navigate → Complete → Analyze workflow. Each variant captures
1383/// the semantic meaning and structural relationships needed for complete script analysis
1384/// and transformation.
1385///
1386/// # LSP Workflow Integration
1387///
1388/// Node kinds are processed differently across workflow stages:
1389/// - **Parse**: All variants are produced by the parser
1390/// - **Index**: Symbol-bearing variants feed workspace indexing
1391/// - **Navigate**: Call and reference variants support navigation features
1392/// - **Complete**: Expression variants provide completion context
1393/// - **Analyze**: Semantic variants drive diagnostics and refactoring
1394///
1395/// # Examples
1396///
1397/// Pattern-match on node kinds to extract semantic information:
1398///
1399/// ```
1400/// use perl_ast::{Node, NodeKind, SourceLocation};
1401///
1402/// let loc = SourceLocation { start: 0, end: 5 };
1403/// let node = Node::new(
1404///     NodeKind::Variable { sigil: "$".to_string(), name: "foo".to_string() },
1405///     loc,
1406/// );
1407///
1408/// match &node.kind {
1409///     NodeKind::Variable { sigil, name } => {
1410///         assert_eq!(sigil, "$");
1411///         assert_eq!(name, "foo");
1412///     }
1413///     _ => panic!("expected Variable"),
1414/// }
1415/// ```
1416///
1417/// Use [`kind_name()`](NodeKind::kind_name) for debugging and diagnostics:
1418///
1419/// ```
1420/// use perl_ast::NodeKind;
1421///
1422/// let kind = NodeKind::Number { value: "99".to_string() };
1423/// assert_eq!(kind.kind_name(), "Number");
1424///
1425/// let kind = NodeKind::Variable { sigil: "@".to_string(), name: "list".to_string() };
1426/// assert_eq!(kind.kind_name(), "Variable");
1427/// ```
1428///
1429/// # Performance Considerations
1430///
1431/// The enum design optimizes for large codebases:
1432/// - Box pointers minimize stack usage for recursive structures
1433/// - Vector storage enables efficient bulk operations on child nodes
1434/// - Clone operations optimized for concurrent analysis workflows
1435/// - Pattern matching performance tuned for common Perl constructs
1436#[derive(Debug, Clone, PartialEq)]
1437pub enum NodeKind {
1438    /// Top-level program containing all statements in an Perl script
1439    ///
1440    /// This is the root node for any parsed Perl script content, containing all
1441    /// top-level statements found during the Parse stage of LSP workflow.
1442    Program {
1443        /// All top-level statements in the Perl script
1444        statements: Vec<Node>,
1445    },
1446
1447    /// Statement wrapper for expressions that appear at statement level
1448    ///
1449    /// Used during Analyze stage to distinguish between expressions used as
1450    /// statements versus expressions within other contexts during Perl parsing.
1451    ExpressionStatement {
1452        /// The expression being used as a statement
1453        expression: Box<Node>,
1454    },
1455
1456    /// Variable declaration with scope declarator in Perl script processing
1457    ///
1458    /// Represents declarations like `my $var`, `our $global`, `local $dynamic`, etc.
1459    /// Critical for Analyze stage symbol table construction during Perl parsing.
1460    VariableDeclaration {
1461        /// Scope declarator: "my", "our", "local", "state"
1462        declarator: String,
1463        /// The variable being declared
1464        variable: Box<Node>,
1465        /// Variable attributes (e.g., ":shared", ":locked")
1466        attributes: Vec<String>,
1467        /// Optional initializer expression
1468        initializer: Option<Box<Node>>,
1469    },
1470
1471    /// Multiple variable declaration in a single statement
1472    ///
1473    /// Handles constructs like `my ($x, $y) = @values` common in Perl script processing.
1474    /// Supports efficient bulk variable analysis during Navigate stage operations.
1475    VariableListDeclaration {
1476        /// Scope declarator for all variables in the list
1477        declarator: String,
1478        /// All variables being declared in the list
1479        variables: Vec<Node>,
1480        /// Attributes applied to the variable list
1481        attributes: Vec<String>,
1482        /// Optional initializer for the entire variable list
1483        initializer: Option<Box<Node>>,
1484    },
1485
1486    /// Perl variable reference (scalar, array, hash, etc.) in Perl parsing workflow
1487    Variable {
1488        /// Variable sigil indicating type: $, @, %, &, *
1489        sigil: String, // $, @, %, &, *
1490        /// Variable name without sigil
1491        name: String,
1492    },
1493
1494    /// Variable with additional attributes for enhanced LSP workflow
1495    VariableWithAttributes {
1496        /// The base variable node
1497        variable: Box<Node>,
1498        /// List of attribute names applied to the variable
1499        attributes: Vec<String>,
1500    },
1501
1502    /// Assignment operation for LSP data processing workflows
1503    Assignment {
1504        /// Left-hand side of assignment
1505        lhs: Box<Node>,
1506        /// Right-hand side of assignment
1507        rhs: Box<Node>,
1508        /// Assignment operator: =, +=, -=, etc.
1509        op: String, // =, +=, -=, etc.
1510    },
1511
1512    // Expressions
1513    /// Binary operation for Perl parsing workflow calculations
1514    Binary {
1515        /// Binary operator
1516        op: String,
1517        /// Left operand
1518        left: Box<Node>,
1519        /// Right operand
1520        right: Box<Node>,
1521    },
1522
1523    /// Ternary conditional expression for Perl parsing workflow logic
1524    Ternary {
1525        /// Condition to evaluate
1526        condition: Box<Node>,
1527        /// Expression when condition is true
1528        then_expr: Box<Node>,
1529        /// Expression when condition is false
1530        else_expr: Box<Node>,
1531    },
1532
1533    /// Unary operation for Perl parsing workflow
1534    Unary {
1535        /// Unary operator
1536        op: String,
1537        /// Operand to apply operator to
1538        operand: Box<Node>,
1539    },
1540
1541    // I/O operations
1542    /// Diamond operator for file input in Perl parsing workflow
1543    Diamond, // <>
1544
1545    /// Ellipsis operator for Perl parsing workflow
1546    Ellipsis, // ...
1547
1548    /// Undef value for Perl parsing workflow
1549    Undef, // undef
1550
1551    /// Readline operation for LSP file processing
1552    Readline {
1553        /// Optional filehandle: `<STDIN>`, `<$fh>`, etc.
1554        filehandle: Option<String>, // <STDIN>, <$fh>, etc.
1555    },
1556
1557    /// Glob pattern for LSP workspace file matching
1558    Glob {
1559        /// Pattern string for file matching
1560        pattern: String, // <*.txt>
1561    },
1562
1563    /// Typeglob expression: `*foo` or `*main::bar`
1564    ///
1565    /// Provides access to all symbol table entries for a given name.
1566    Typeglob {
1567        /// Name of the symbol (including package qualification)
1568        name: String,
1569    },
1570
1571    /// Numeric literal in Perl code (integer, float, hex, octal, binary)
1572    ///
1573    /// Represents all numeric literal forms: `42`, `3.14`, `0x1A`, `0o755`, `0b1010`.
1574    Number {
1575        /// String representation preserving original format
1576        value: String,
1577    },
1578
1579    /// String literal with optional interpolation
1580    ///
1581    /// Handles both single-quoted (`'literal'`) and double-quoted (`"$interpolated"`) strings.
1582    String {
1583        /// String content (after quote processing)
1584        value: String,
1585        /// Whether the string supports variable interpolation
1586        interpolated: bool,
1587    },
1588
1589    /// Heredoc string literal for multi-line content
1590    ///
1591    /// Supports all heredoc forms: `<<EOF`, `<<'EOF'`, `<<"EOF"`, `<<~EOF` (indented).
1592    Heredoc {
1593        /// Delimiter marking heredoc boundaries
1594        delimiter: String,
1595        /// Content between delimiters
1596        content: String,
1597        /// Whether content supports variable interpolation
1598        interpolated: bool,
1599        /// Whether leading whitespace is stripped (<<~ form)
1600        indented: bool,
1601        /// Whether this is a command execution heredoc (<<`EOF`)
1602        command: bool,
1603        /// Body span for breakpoint detection (populated by drain_pending_heredocs)
1604        body_span: Option<SourceLocation>,
1605    },
1606
1607    /// Array literal expression: `(1, 2, 3)` or `[1, 2, 3]`
1608    ArrayLiteral {
1609        /// Elements in the array
1610        elements: Vec<Node>,
1611    },
1612
1613    /// Hash literal expression: `(key => 'value')` or `{key => 'value'}`
1614    HashLiteral {
1615        /// Key-value pairs in the hash
1616        pairs: Vec<(Node, Node)>,
1617    },
1618
1619    /// Block of statements: `{ ... }`
1620    ///
1621    /// Used for control structures, subroutine bodies, and bare blocks.
1622    Block {
1623        /// Statements within the block
1624        statements: Vec<Node>,
1625    },
1626
1627    /// Eval block for exception handling: `eval { ... }`
1628    Eval {
1629        /// Block to evaluate with exception trapping
1630        block: Box<Node>,
1631    },
1632
1633    /// Do block for file inclusion or expression evaluation: `do { ... }` or `do "file"`
1634    Do {
1635        /// Block to execute or file expression
1636        block: Box<Node>,
1637    },
1638
1639    /// Try-catch-finally for modern exception handling (Syntax::Keyword::Try style)
1640    Try {
1641        /// Try block body
1642        body: Box<Node>,
1643        /// Catch blocks: (optional exception variable, handler block)
1644        catch_blocks: Vec<(Option<String>, Box<Node>)>,
1645        /// Optional finally block
1646        finally_block: Option<Box<Node>>,
1647    },
1648
1649    /// If-elsif-else conditional statement
1650    If {
1651        /// Condition expression
1652        condition: Box<Node>,
1653        /// Then branch block
1654        then_branch: Box<Node>,
1655        /// Elsif branches: (condition, block) pairs
1656        elsif_branches: Vec<(Box<Node>, Box<Node>)>,
1657        /// Optional else branch
1658        else_branch: Option<Box<Node>>,
1659    },
1660
1661    /// Statement with a label for loop control: `LABEL: while (...)`
1662    LabeledStatement {
1663        /// Label name (e.g., "OUTER", "LINE")
1664        label: String,
1665        /// Labeled statement (typically a loop)
1666        statement: Box<Node>,
1667    },
1668
1669    /// While loop: `while (condition) { ... }`
1670    While {
1671        /// Loop condition
1672        condition: Box<Node>,
1673        /// Loop body
1674        body: Box<Node>,
1675        /// Optional continue block
1676        continue_block: Option<Box<Node>>,
1677    },
1678
1679    /// Tie operation for binding variables to objects: `tie %hash, 'Package', @args`
1680    Tie {
1681        /// Variable being tied
1682        variable: Box<Node>,
1683        /// Class/package name to tie to
1684        package: Box<Node>,
1685        /// Arguments passed to TIE* method
1686        args: Vec<Node>,
1687    },
1688
1689    /// Untie operation for unbinding variables: `untie %hash`
1690    Untie {
1691        /// Variable being untied
1692        variable: Box<Node>,
1693    },
1694
1695    /// C-style for loop: `for (init; cond; update) { ... }`
1696    For {
1697        /// Initialization expression
1698        init: Option<Box<Node>>,
1699        /// Loop condition
1700        condition: Option<Box<Node>>,
1701        /// Update expression
1702        update: Option<Box<Node>>,
1703        /// Loop body
1704        body: Box<Node>,
1705        /// Optional continue block
1706        continue_block: Option<Box<Node>>,
1707    },
1708
1709    /// Foreach loop: `foreach my $item (@list) { ... }`
1710    Foreach {
1711        /// Iterator variable
1712        variable: Box<Node>,
1713        /// List to iterate
1714        list: Box<Node>,
1715        /// Loop body
1716        body: Box<Node>,
1717        /// Optional continue block
1718        continue_block: Option<Box<Node>>,
1719    },
1720
1721    /// Given statement for switch-like matching (Perl 5.10+)
1722    Given {
1723        /// Expression to match against
1724        expr: Box<Node>,
1725        /// Body containing when/default blocks
1726        body: Box<Node>,
1727    },
1728
1729    /// When clause in given/switch: `when ($pattern) { ... }`
1730    When {
1731        /// Pattern to match
1732        condition: Box<Node>,
1733        /// Handler block
1734        body: Box<Node>,
1735    },
1736
1737    /// Default clause in given/switch: `default { ... }`
1738    Default {
1739        /// Handler block for unmatched cases
1740        body: Box<Node>,
1741    },
1742
1743    /// Statement modifier syntax: `print "ok" if $condition`
1744    StatementModifier {
1745        /// Statement to conditionally execute
1746        statement: Box<Node>,
1747        /// Modifier keyword: if, unless, while, until, for, foreach
1748        modifier: String,
1749        /// Modifier condition
1750        condition: Box<Node>,
1751    },
1752
1753    // Functions
1754    /// Subroutine declaration (function) including name, prototype, signature and body.
1755    Subroutine {
1756        /// Name of the subroutine
1757        ///
1758        /// # Precise Navigation Support
1759        /// - Added name_span for exact LSP navigation
1760        /// - Enables precise go-to-definition and hover behavior
1761        /// - O(1) span lookup in workspace symbols
1762        ///
1763        /// ## Integration Points
1764        /// - Semantic token providers
1765        /// - Cross-reference generation
1766        /// - Symbol renaming
1767        name: Option<String>,
1768
1769        /// Source location span of the subroutine name
1770        ///
1771        /// ## Usage Notes
1772        /// - Always corresponds to the name field
1773        /// - Provides constant-time position information
1774        /// - Essential for precise editor interactions
1775        name_span: Option<SourceLocation>,
1776
1777        /// Optional prototype node (e.g. `($;@)`).
1778        prototype: Option<Box<Node>>,
1779        /// Optional signature node (Perl 5.20+ feature).
1780        signature: Option<Box<Node>>,
1781        /// Attributes attached to the subroutine (`:lvalue`, etc.).
1782        attributes: Vec<String>,
1783        /// The body block of the subroutine.
1784        body: Box<Node>,
1785    },
1786
1787    /// Subroutine prototype specification: `sub foo ($;@) { ... }`
1788    Prototype {
1789        /// Prototype string defining argument behavior
1790        content: String,
1791    },
1792
1793    /// Subroutine signature (Perl 5.20+): `sub foo ($x, $y = 0) { ... }`
1794    Signature {
1795        /// List of signature parameters
1796        parameters: Vec<Node>,
1797    },
1798
1799    /// Mandatory signature parameter: `$x` in `sub foo ($x) { }`
1800    MandatoryParameter {
1801        /// Variable being bound
1802        variable: Box<Node>,
1803    },
1804
1805    /// Optional signature parameter with default: `$y = 0` in `sub foo ($y = 0) { }`
1806    OptionalParameter {
1807        /// Variable being bound
1808        variable: Box<Node>,
1809        /// Default value expression
1810        default_value: Box<Node>,
1811    },
1812
1813    /// Slurpy parameter collecting remaining args: `@rest` or `%opts` in signature
1814    SlurpyParameter {
1815        /// Array or hash variable to receive remaining arguments
1816        variable: Box<Node>,
1817    },
1818
1819    /// Named parameter placeholder in signature (future Perl feature)
1820    NamedParameter {
1821        /// Variable for named parameter binding
1822        variable: Box<Node>,
1823    },
1824
1825    /// Method declaration (Perl 5.38+ with `use feature 'class'`)
1826    Method {
1827        /// Method name
1828        name: String,
1829        /// Optional signature
1830        signature: Option<Box<Node>>,
1831        /// Method attributes (e.g., `:lvalue`)
1832        attributes: Vec<String>,
1833        /// Method body
1834        body: Box<Node>,
1835    },
1836
1837    /// Return statement: `return;` or `return $value;`
1838    Return {
1839        /// Optional return value
1840        value: Option<Box<Node>>,
1841    },
1842
1843    /// Loop control statement: `next`, `last`, or `redo`
1844    LoopControl {
1845        /// Control keyword: "next", "last", or "redo"
1846        op: String,
1847        /// Optional label: `next LABEL`
1848        label: Option<String>,
1849    },
1850
1851    /// Goto statement: `goto LABEL`, `goto &sub`, or `goto $expr`
1852    Goto {
1853        /// The target of the goto (label identifier, sub reference, or expression)
1854        target: Box<Node>,
1855    },
1856
1857    /// Method call: `$obj->method(@args)` or `$obj->method`
1858    MethodCall {
1859        /// Object or class expression
1860        object: Box<Node>,
1861        /// Method name being called
1862        method: String,
1863        /// Method arguments
1864        args: Vec<Node>,
1865    },
1866
1867    /// Function call: `foo(@args)` or `foo()`
1868    FunctionCall {
1869        /// Function name (may be qualified: `Package::func`)
1870        name: String,
1871        /// Function arguments
1872        args: Vec<Node>,
1873    },
1874
1875    /// Indirect object call (legacy syntax): `new Class @args`
1876    IndirectCall {
1877        /// Method name
1878        method: String,
1879        /// Object or class
1880        object: Box<Node>,
1881        /// Arguments
1882        args: Vec<Node>,
1883    },
1884
1885    /// Regex literal: `/pattern/modifiers` or `qr/pattern/modifiers`
1886    Regex {
1887        /// Regular expression pattern
1888        pattern: String,
1889        /// Replacement string (for s/// when parsed as regex)
1890        replacement: Option<String>,
1891        /// Regex modifiers (i, m, s, x, g, etc.)
1892        modifiers: String,
1893        /// Whether the regex contains embedded code `(?{...})`
1894        has_embedded_code: bool,
1895    },
1896
1897    /// Match operation: `$str =~ /pattern/modifiers` or `$str !~ /pattern/modifiers`
1898    Match {
1899        /// Expression to match against
1900        expr: Box<Node>,
1901        /// Pattern to match
1902        pattern: String,
1903        /// Match modifiers
1904        modifiers: String,
1905        /// Whether the regex contains embedded code `(?{...})`
1906        has_embedded_code: bool,
1907        /// Whether the binding operator was `!~` (negated match)
1908        negated: bool,
1909    },
1910
1911    /// Substitution operation: `$str =~ s/pattern/replacement/modifiers`
1912    Substitution {
1913        /// Expression to substitute in
1914        expr: Box<Node>,
1915        /// Pattern to find
1916        pattern: String,
1917        /// Replacement string
1918        replacement: String,
1919        /// Substitution modifiers (g, e, r, etc.)
1920        modifiers: String,
1921        /// Whether the regex contains embedded code `(?{...})`
1922        has_embedded_code: bool,
1923        /// Whether the binding operator was `!~` (negated match)
1924        negated: bool,
1925    },
1926
1927    /// Transliteration operation: `$str =~ tr/search/replace/` or `y///`
1928    Transliteration {
1929        /// Expression to transliterate
1930        expr: Box<Node>,
1931        /// Characters to search for
1932        search: String,
1933        /// Replacement characters
1934        replace: String,
1935        /// Transliteration modifiers (c, d, s, r)
1936        modifiers: String,
1937        /// Whether the binding operator was `!~` (negated match)
1938        negated: bool,
1939    },
1940
1941    // Package system
1942    /// Package declaration (e.g. `package Foo;`) and optional inline block form.
1943    Package {
1944        /// Name of the package
1945        ///
1946        /// # Precise Navigation Support
1947        /// - Added name_span for exact LSP navigation
1948        /// - Enables precise go-to-definition and hover behavior
1949        /// - O(1) span lookup in workspace symbols
1950        ///
1951        /// ## Integration Points
1952        /// - Workspace indexing
1953        /// - Cross-module symbol resolution
1954        /// - Code action providers
1955        name: String,
1956
1957        /// Source location span of the package name
1958        ///
1959        /// ## Usage Notes
1960        /// - Always corresponds to the name field
1961        /// - Provides constant-time position information
1962        /// - Essential for precise editor interactions
1963        name_span: SourceLocation,
1964
1965        /// Optional inline block for `package Foo { ... }` declarations.
1966        block: Option<Box<Node>>,
1967    },
1968
1969    /// Use statement for module loading: `use Module qw(imports);`
1970    Use {
1971        /// Module name to load
1972        module: String,
1973        /// Import arguments (symbols to import)
1974        args: Vec<String>,
1975        /// Whether this module is a known source filter (security risk)
1976        has_filter_risk: bool,
1977    },
1978
1979    /// No statement for disabling features: `no strict;`
1980    No {
1981        /// Module/pragma name to disable
1982        module: String,
1983        /// Arguments for the no statement
1984        args: Vec<String>,
1985        /// Whether this module is a known source filter (security risk)
1986        has_filter_risk: bool,
1987    },
1988
1989    /// Phase block for compile/runtime hooks: `BEGIN`, `END`, `CHECK`, `INIT`, `UNITCHECK`
1990    PhaseBlock {
1991        /// Phase name: BEGIN, END, CHECK, INIT, UNITCHECK
1992        phase: String,
1993        /// Source location span of the phase block name for precise navigation
1994        phase_span: Option<SourceLocation>,
1995        /// Block to execute during the specified phase
1996        block: Box<Node>,
1997    },
1998
1999    /// Data section marker: `__DATA__` or `__END__`
2000    DataSection {
2001        /// Section marker (__DATA__ or __END__)
2002        marker: String,
2003        /// Content following the marker (if any)
2004        body: Option<String>,
2005    },
2006
2007    /// Class declaration (Perl 5.38+ with `use feature 'class'`)
2008    Class {
2009        /// Class name
2010        name: String,
2011        /// Class body containing methods and attributes
2012        body: Box<Node>,
2013    },
2014
2015    /// Format declaration for legacy report generation
2016    Format {
2017        /// Format name (defaults to filehandle name)
2018        name: String,
2019        /// Format specification body
2020        body: String,
2021    },
2022
2023    /// Bare identifier (bareword or package-qualified name)
2024    Identifier {
2025        /// Identifier string
2026        name: String,
2027    },
2028
2029    /// Parse error placeholder with error message and recovery context
2030    Error {
2031        /// Error description
2032        message: String,
2033        /// Expected token types (if any)
2034        expected: Vec<TokenKind>,
2035        /// The token actually found (if any)
2036        found: Option<Token>,
2037        /// Partial AST node parsed before error (if any)
2038        partial: Option<Box<Node>>,
2039    },
2040
2041    /// Missing expression where one was expected
2042    MissingExpression,
2043    /// Missing statement where one was expected
2044    MissingStatement,
2045    /// Missing identifier where one was expected
2046    MissingIdentifier,
2047    /// Missing block where one was expected
2048    MissingBlock,
2049
2050    /// Lexer budget exceeded marker preserving partial parse results
2051    ///
2052    /// Used when recursion or token limits are hit to preserve already-parsed content.
2053    UnknownRest,
2054}
2055
2056impl NodeKind {
2057    /// Get the name of this `NodeKind` as a static string.
2058    ///
2059    /// Useful for diagnostics, logging, and human-readable AST dumps.
2060    ///
2061    /// # Examples
2062    ///
2063    /// ```
2064    /// use perl_ast::NodeKind;
2065    ///
2066    /// let kind = NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() };
2067    /// assert_eq!(kind.kind_name(), "Variable");
2068    ///
2069    /// let kind = NodeKind::Program { statements: vec![] };
2070    /// assert_eq!(kind.kind_name(), "Program");
2071    /// ```
2072    pub fn kind_name(&self) -> &'static str {
2073        match self {
2074            NodeKind::Program { .. } => "Program",
2075            NodeKind::ExpressionStatement { .. } => "ExpressionStatement",
2076            NodeKind::VariableDeclaration { .. } => "VariableDeclaration",
2077            NodeKind::VariableListDeclaration { .. } => "VariableListDeclaration",
2078            NodeKind::Variable { .. } => "Variable",
2079            NodeKind::VariableWithAttributes { .. } => "VariableWithAttributes",
2080            NodeKind::Assignment { .. } => "Assignment",
2081            NodeKind::Binary { .. } => "Binary",
2082            NodeKind::Ternary { .. } => "Ternary",
2083            NodeKind::Unary { .. } => "Unary",
2084            NodeKind::Diamond => "Diamond",
2085            NodeKind::Ellipsis => "Ellipsis",
2086            NodeKind::Undef => "Undef",
2087            NodeKind::Readline { .. } => "Readline",
2088            NodeKind::Glob { .. } => "Glob",
2089            NodeKind::Typeglob { .. } => "Typeglob",
2090            NodeKind::Number { .. } => "Number",
2091            NodeKind::String { .. } => "String",
2092            NodeKind::Heredoc { .. } => "Heredoc",
2093            NodeKind::ArrayLiteral { .. } => "ArrayLiteral",
2094            NodeKind::HashLiteral { .. } => "HashLiteral",
2095            NodeKind::Block { .. } => "Block",
2096            NodeKind::Eval { .. } => "Eval",
2097            NodeKind::Do { .. } => "Do",
2098            NodeKind::Try { .. } => "Try",
2099            NodeKind::If { .. } => "If",
2100            NodeKind::LabeledStatement { .. } => "LabeledStatement",
2101            NodeKind::While { .. } => "While",
2102            NodeKind::Tie { .. } => "Tie",
2103            NodeKind::Untie { .. } => "Untie",
2104            NodeKind::For { .. } => "For",
2105            NodeKind::Foreach { .. } => "Foreach",
2106            NodeKind::Given { .. } => "Given",
2107            NodeKind::When { .. } => "When",
2108            NodeKind::Default { .. } => "Default",
2109            NodeKind::StatementModifier { .. } => "StatementModifier",
2110            NodeKind::Subroutine { .. } => "Subroutine",
2111            NodeKind::Prototype { .. } => "Prototype",
2112            NodeKind::Signature { .. } => "Signature",
2113            NodeKind::MandatoryParameter { .. } => "MandatoryParameter",
2114            NodeKind::OptionalParameter { .. } => "OptionalParameter",
2115            NodeKind::SlurpyParameter { .. } => "SlurpyParameter",
2116            NodeKind::NamedParameter { .. } => "NamedParameter",
2117            NodeKind::Method { .. } => "Method",
2118            NodeKind::Return { .. } => "Return",
2119            NodeKind::LoopControl { .. } => "LoopControl",
2120            NodeKind::Goto { .. } => "Goto",
2121            NodeKind::MethodCall { .. } => "MethodCall",
2122            NodeKind::FunctionCall { .. } => "FunctionCall",
2123            NodeKind::IndirectCall { .. } => "IndirectCall",
2124            NodeKind::Regex { .. } => "Regex",
2125            NodeKind::Match { .. } => "Match",
2126            NodeKind::Substitution { .. } => "Substitution",
2127            NodeKind::Transliteration { .. } => "Transliteration",
2128            NodeKind::Package { .. } => "Package",
2129            NodeKind::Use { .. } => "Use",
2130            NodeKind::No { .. } => "No",
2131            NodeKind::PhaseBlock { .. } => "PhaseBlock",
2132            NodeKind::DataSection { .. } => "DataSection",
2133            NodeKind::Class { .. } => "Class",
2134            NodeKind::Format { .. } => "Format",
2135            NodeKind::Identifier { .. } => "Identifier",
2136            NodeKind::Error { .. } => "Error",
2137            NodeKind::MissingExpression => "MissingExpression",
2138            NodeKind::MissingStatement => "MissingStatement",
2139            NodeKind::MissingIdentifier => "MissingIdentifier",
2140            NodeKind::MissingBlock => "MissingBlock",
2141            NodeKind::UnknownRest => "UnknownRest",
2142        }
2143    }
2144
2145    /// Canonical list of **all** `kind_name()` strings, in alphabetical order.
2146    ///
2147    /// Every consumer that needs the full set of NodeKind names should reference
2148    /// this constant instead of maintaining a hand-written copy.
2149    pub const ALL_KIND_NAMES: &[&'static str] = &[
2150        "ArrayLiteral",
2151        "Assignment",
2152        "Binary",
2153        "Block",
2154        "Class",
2155        "DataSection",
2156        "Default",
2157        "Diamond",
2158        "Do",
2159        "Ellipsis",
2160        "Error",
2161        "Eval",
2162        "ExpressionStatement",
2163        "For",
2164        "Foreach",
2165        "Format",
2166        "FunctionCall",
2167        "Given",
2168        "Glob",
2169        "Goto",
2170        "HashLiteral",
2171        "Heredoc",
2172        "Identifier",
2173        "If",
2174        "IndirectCall",
2175        "LabeledStatement",
2176        "LoopControl",
2177        "MandatoryParameter",
2178        "Match",
2179        "Method",
2180        "MethodCall",
2181        "MissingBlock",
2182        "MissingExpression",
2183        "MissingIdentifier",
2184        "MissingStatement",
2185        "NamedParameter",
2186        "No",
2187        "Number",
2188        "OptionalParameter",
2189        "Package",
2190        "PhaseBlock",
2191        "Program",
2192        "Prototype",
2193        "Readline",
2194        "Regex",
2195        "Return",
2196        "Signature",
2197        "SlurpyParameter",
2198        "StatementModifier",
2199        "String",
2200        "Subroutine",
2201        "Substitution",
2202        "Ternary",
2203        "Tie",
2204        "Transliteration",
2205        "Try",
2206        "Typeglob",
2207        "Unary",
2208        "Undef",
2209        "UnknownRest",
2210        "Untie",
2211        "Use",
2212        "Variable",
2213        "VariableDeclaration",
2214        "VariableListDeclaration",
2215        "VariableWithAttributes",
2216        "When",
2217        "While",
2218    ];
2219
2220    /// Subset of `ALL_KIND_NAMES` that represent synthetic/recovery nodes.
2221    ///
2222    /// These kinds are only produced by `parse_with_recovery()` on malformed
2223    /// input and should not be expected in clean parses.
2224    pub const RECOVERY_KIND_NAMES: &[&'static str] = &[
2225        "Error",
2226        "MissingBlock",
2227        "MissingExpression",
2228        "MissingIdentifier",
2229        "MissingStatement",
2230        "UnknownRest",
2231    ];
2232}
2233
2234/// Format unary operator for S-expression output
2235fn format_unary_operator(op: &str) -> String {
2236    match op {
2237        // Arithmetic unary operators
2238        "+" => "unary_+".to_string(),
2239        "-" => "unary_-".to_string(),
2240
2241        // Logical unary operators
2242        "!" => "unary_not".to_string(),
2243        "not" => "unary_not".to_string(),
2244
2245        // Bitwise complement
2246        "~" => "unary_complement".to_string(),
2247
2248        // Reference operator
2249        "\\" => "unary_ref".to_string(),
2250
2251        // Postfix operators
2252        "++" => "unary_++".to_string(),
2253        "--" => "unary_--".to_string(),
2254
2255        // File test operators
2256        "-f" => "unary_-f".to_string(),
2257        "-d" => "unary_-d".to_string(),
2258        "-e" => "unary_-e".to_string(),
2259        "-r" => "unary_-r".to_string(),
2260        "-w" => "unary_-w".to_string(),
2261        "-x" => "unary_-x".to_string(),
2262        "-o" => "unary_-o".to_string(),
2263        "-R" => "unary_-R".to_string(),
2264        "-W" => "unary_-W".to_string(),
2265        "-X" => "unary_-X".to_string(),
2266        "-O" => "unary_-O".to_string(),
2267        "-s" => "unary_-s".to_string(),
2268        "-p" => "unary_-p".to_string(),
2269        "-S" => "unary_-S".to_string(),
2270        "-b" => "unary_-b".to_string(),
2271        "-c" => "unary_-c".to_string(),
2272        "-t" => "unary_-t".to_string(),
2273        "-u" => "unary_-u".to_string(),
2274        "-g" => "unary_-g".to_string(),
2275        "-k" => "unary_-k".to_string(),
2276        "-T" => "unary_-T".to_string(),
2277        "-B" => "unary_-B".to_string(),
2278        "-M" => "unary_-M".to_string(),
2279        "-A" => "unary_-A".to_string(),
2280        "-C" => "unary_-C".to_string(),
2281        "-l" => "unary_-l".to_string(),
2282        "-z" => "unary_-z".to_string(),
2283
2284        // Postfix dereferencing
2285        "->@*" => "unary_->@*".to_string(),
2286        "->%*" => "unary_->%*".to_string(),
2287        "->$*" => "unary_->$*".to_string(),
2288        "->&*" => "unary_->&*".to_string(),
2289        "->**" => "unary_->**".to_string(),
2290
2291        // Defined operator
2292        "defined" => "unary_defined".to_string(),
2293
2294        // Default case for unknown operators
2295        _ => format!("unary_{}", op.replace(' ', "_")),
2296    }
2297}
2298
2299/// Format binary operator for S-expression output
2300fn format_binary_operator(op: &str) -> String {
2301    match op {
2302        // Arithmetic operators
2303        "+" => "binary_+".to_string(),
2304        "-" => "binary_-".to_string(),
2305        "*" => "binary_*".to_string(),
2306        "/" => "binary_/".to_string(),
2307        "%" => "binary_%".to_string(),
2308        "**" => "binary_**".to_string(),
2309
2310        // Comparison operators
2311        "==" => "binary_==".to_string(),
2312        "!=" => "binary_!=".to_string(),
2313        "<" => "binary_<".to_string(),
2314        ">" => "binary_>".to_string(),
2315        "<=" => "binary_<=".to_string(),
2316        ">=" => "binary_>=".to_string(),
2317        "<=>" => "binary_<=>".to_string(),
2318
2319        // String comparison
2320        "eq" => "binary_eq".to_string(),
2321        "ne" => "binary_ne".to_string(),
2322        "lt" => "binary_lt".to_string(),
2323        "le" => "binary_le".to_string(),
2324        "gt" => "binary_gt".to_string(),
2325        "ge" => "binary_ge".to_string(),
2326        "cmp" => "binary_cmp".to_string(),
2327
2328        // Logical operators
2329        "&&" => "binary_&&".to_string(),
2330        "||" => "binary_||".to_string(),
2331        "and" => "binary_and".to_string(),
2332        "or" => "binary_or".to_string(),
2333        "xor" => "binary_xor".to_string(),
2334
2335        // Bitwise operators
2336        "&" => "binary_&".to_string(),
2337        "|" => "binary_|".to_string(),
2338        "^" => "binary_^".to_string(),
2339        "<<" => "binary_<<".to_string(),
2340        ">>" => "binary_>>".to_string(),
2341
2342        // Pattern matching
2343        "=~" => "binary_=~".to_string(),
2344        "!~" => "binary_!~".to_string(),
2345
2346        // Smart match
2347        "~~" => "binary_~~".to_string(),
2348
2349        // String repetition
2350        "x" => "binary_x".to_string(),
2351
2352        // Concatenation
2353        "." => "binary_.".to_string(),
2354
2355        // Range operators
2356        ".." => "binary_..".to_string(),
2357        "..." => "binary_...".to_string(),
2358
2359        // Type checking
2360        "isa" => "binary_isa".to_string(),
2361
2362        // Assignment operators
2363        "=" => "binary_=".to_string(),
2364        "+=" => "binary_+=".to_string(),
2365        "-=" => "binary_-=".to_string(),
2366        "*=" => "binary_*=".to_string(),
2367        "/=" => "binary_/=".to_string(),
2368        "%=" => "binary_%=".to_string(),
2369        "**=" => "binary_**=".to_string(),
2370        ".=" => "binary_.=".to_string(),
2371        "&=" => "binary_&=".to_string(),
2372        "|=" => "binary_|=".to_string(),
2373        "^=" => "binary_^=".to_string(),
2374        "<<=" => "binary_<<=".to_string(),
2375        ">>=" => "binary_>>=".to_string(),
2376        "&&=" => "binary_&&=".to_string(),
2377        "||=" => "binary_||=".to_string(),
2378        "//=" => "binary_//=".to_string(),
2379
2380        // Defined-or operator
2381        "//" => "binary_//".to_string(),
2382
2383        // Method calls and dereferencing
2384        "->" => "binary_->".to_string(),
2385
2386        // Hash/array access
2387        "{}" => "binary_{}".to_string(),
2388        "[]" => "binary_[]".to_string(),
2389
2390        // Arrow hash/array dereference
2391        "->{}" => "arrow_hash_deref".to_string(),
2392        "->[]" => "arrow_array_deref".to_string(),
2393
2394        // Default case for unknown operators
2395        _ => format!("binary_{}", op.replace(' ', "_")),
2396    }
2397}
2398
2399// SourceLocation is now provided by perl-position-tracking crate
2400// See the re-export at the top of this file
2401
2402#[cfg(test)]
2403mod tests {
2404    use super::*;
2405    use std::collections::BTreeSet;
2406
2407    /// Build a dummy instance for every `NodeKind` variant and return its
2408    /// `kind_name()`.  This ensures the compiler forces us to update here
2409    /// whenever a variant is added/removed.
2410    fn all_kind_names_from_variants() -> BTreeSet<&'static str> {
2411        let loc = SourceLocation { start: 0, end: 0 };
2412        let dummy_node = || Node::new(NodeKind::Undef, loc);
2413
2414        let variants: Vec<NodeKind> = vec![
2415            NodeKind::Program { statements: vec![] },
2416            NodeKind::ExpressionStatement { expression: Box::new(dummy_node()) },
2417            NodeKind::VariableDeclaration {
2418                declarator: String::new(),
2419                variable: Box::new(dummy_node()),
2420                attributes: vec![],
2421                initializer: None,
2422            },
2423            NodeKind::VariableListDeclaration {
2424                declarator: String::new(),
2425                variables: vec![],
2426                attributes: vec![],
2427                initializer: None,
2428            },
2429            NodeKind::Variable { sigil: String::new(), name: String::new() },
2430            NodeKind::VariableWithAttributes {
2431                variable: Box::new(dummy_node()),
2432                attributes: vec![],
2433            },
2434            NodeKind::Assignment {
2435                lhs: Box::new(dummy_node()),
2436                rhs: Box::new(dummy_node()),
2437                op: String::new(),
2438            },
2439            NodeKind::Binary {
2440                op: String::new(),
2441                left: Box::new(dummy_node()),
2442                right: Box::new(dummy_node()),
2443            },
2444            NodeKind::Ternary {
2445                condition: Box::new(dummy_node()),
2446                then_expr: Box::new(dummy_node()),
2447                else_expr: Box::new(dummy_node()),
2448            },
2449            NodeKind::Unary { op: String::new(), operand: Box::new(dummy_node()) },
2450            NodeKind::Diamond,
2451            NodeKind::Ellipsis,
2452            NodeKind::Undef,
2453            NodeKind::Readline { filehandle: None },
2454            NodeKind::Glob { pattern: String::new() },
2455            NodeKind::Typeglob { name: String::new() },
2456            NodeKind::Number { value: String::new() },
2457            NodeKind::String { value: String::new(), interpolated: false },
2458            NodeKind::Heredoc {
2459                delimiter: String::new(),
2460                content: String::new(),
2461                interpolated: false,
2462                indented: false,
2463                command: false,
2464                body_span: None,
2465            },
2466            NodeKind::ArrayLiteral { elements: vec![] },
2467            NodeKind::HashLiteral { pairs: vec![] },
2468            NodeKind::Block { statements: vec![] },
2469            NodeKind::Eval { block: Box::new(dummy_node()) },
2470            NodeKind::Do { block: Box::new(dummy_node()) },
2471            NodeKind::Try {
2472                body: Box::new(dummy_node()),
2473                catch_blocks: vec![],
2474                finally_block: None,
2475            },
2476            NodeKind::If {
2477                condition: Box::new(dummy_node()),
2478                then_branch: Box::new(dummy_node()),
2479                elsif_branches: vec![],
2480                else_branch: None,
2481            },
2482            NodeKind::LabeledStatement { label: String::new(), statement: Box::new(dummy_node()) },
2483            NodeKind::While {
2484                condition: Box::new(dummy_node()),
2485                body: Box::new(dummy_node()),
2486                continue_block: None,
2487            },
2488            NodeKind::Tie {
2489                variable: Box::new(dummy_node()),
2490                package: Box::new(dummy_node()),
2491                args: vec![],
2492            },
2493            NodeKind::Untie { variable: Box::new(dummy_node()) },
2494            NodeKind::For {
2495                init: None,
2496                condition: None,
2497                update: None,
2498                body: Box::new(dummy_node()),
2499                continue_block: None,
2500            },
2501            NodeKind::Foreach {
2502                variable: Box::new(dummy_node()),
2503                list: Box::new(dummy_node()),
2504                body: Box::new(dummy_node()),
2505                continue_block: None,
2506            },
2507            NodeKind::Given { expr: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2508            NodeKind::When { condition: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2509            NodeKind::Default { body: Box::new(dummy_node()) },
2510            NodeKind::StatementModifier {
2511                statement: Box::new(dummy_node()),
2512                modifier: String::new(),
2513                condition: Box::new(dummy_node()),
2514            },
2515            NodeKind::Subroutine {
2516                name: None,
2517                name_span: None,
2518                prototype: None,
2519                signature: None,
2520                attributes: vec![],
2521                body: Box::new(dummy_node()),
2522            },
2523            NodeKind::Prototype { content: String::new() },
2524            NodeKind::Signature { parameters: vec![] },
2525            NodeKind::MandatoryParameter { variable: Box::new(dummy_node()) },
2526            NodeKind::OptionalParameter {
2527                variable: Box::new(dummy_node()),
2528                default_value: Box::new(dummy_node()),
2529            },
2530            NodeKind::SlurpyParameter { variable: Box::new(dummy_node()) },
2531            NodeKind::NamedParameter { variable: Box::new(dummy_node()) },
2532            NodeKind::Method {
2533                name: String::new(),
2534                signature: None,
2535                attributes: vec![],
2536                body: Box::new(dummy_node()),
2537            },
2538            NodeKind::Return { value: None },
2539            NodeKind::LoopControl { op: String::new(), label: None },
2540            NodeKind::Goto { target: Box::new(dummy_node()) },
2541            NodeKind::MethodCall {
2542                object: Box::new(dummy_node()),
2543                method: String::new(),
2544                args: vec![],
2545            },
2546            NodeKind::FunctionCall { name: String::new(), args: vec![] },
2547            NodeKind::IndirectCall {
2548                method: String::new(),
2549                object: Box::new(dummy_node()),
2550                args: vec![],
2551            },
2552            NodeKind::Regex {
2553                pattern: String::new(),
2554                replacement: None,
2555                modifiers: String::new(),
2556                has_embedded_code: false,
2557            },
2558            NodeKind::Match {
2559                expr: Box::new(dummy_node()),
2560                pattern: String::new(),
2561                modifiers: String::new(),
2562                has_embedded_code: false,
2563                negated: false,
2564            },
2565            NodeKind::Substitution {
2566                expr: Box::new(dummy_node()),
2567                pattern: String::new(),
2568                replacement: String::new(),
2569                modifiers: String::new(),
2570                has_embedded_code: false,
2571                negated: false,
2572            },
2573            NodeKind::Transliteration {
2574                expr: Box::new(dummy_node()),
2575                search: String::new(),
2576                replace: String::new(),
2577                modifiers: String::new(),
2578                negated: false,
2579            },
2580            NodeKind::Package { name: String::new(), name_span: loc, block: None },
2581            NodeKind::Use { module: String::new(), args: vec![], has_filter_risk: false },
2582            NodeKind::No { module: String::new(), args: vec![], has_filter_risk: false },
2583            NodeKind::PhaseBlock {
2584                phase: String::new(),
2585                phase_span: None,
2586                block: Box::new(dummy_node()),
2587            },
2588            NodeKind::DataSection { marker: String::new(), body: None },
2589            NodeKind::Class { name: String::new(), body: Box::new(dummy_node()) },
2590            NodeKind::Format { name: String::new(), body: String::new() },
2591            NodeKind::Identifier { name: String::new() },
2592            NodeKind::Error {
2593                message: String::new(),
2594                expected: vec![],
2595                found: None,
2596                partial: None,
2597            },
2598            NodeKind::MissingExpression,
2599            NodeKind::MissingStatement,
2600            NodeKind::MissingIdentifier,
2601            NodeKind::MissingBlock,
2602            NodeKind::UnknownRest,
2603        ];
2604
2605        variants.iter().map(|v| v.kind_name()).collect()
2606    }
2607
2608    #[test]
2609    fn all_kind_names_is_consistent_with_kind_name() {
2610        let from_enum = all_kind_names_from_variants();
2611        let from_const: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2612
2613        // Check for duplicates in the const array
2614        assert_eq!(
2615            NodeKind::ALL_KIND_NAMES.len(),
2616            from_const.len(),
2617            "ALL_KIND_NAMES contains duplicates"
2618        );
2619
2620        let only_in_enum: Vec<_> = from_enum.difference(&from_const).collect();
2621        let only_in_const: Vec<_> = from_const.difference(&from_enum).collect();
2622
2623        assert!(
2624            only_in_enum.is_empty() && only_in_const.is_empty(),
2625            "ALL_KIND_NAMES is out of sync with NodeKind variants:\n  \
2626             in enum but not in ALL_KIND_NAMES: {only_in_enum:?}\n  \
2627             in ALL_KIND_NAMES but not in enum: {only_in_const:?}"
2628        );
2629    }
2630
2631    #[test]
2632    fn recovery_kind_names_is_subset_of_all() {
2633        let all: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2634        let recovery: BTreeSet<&str> = NodeKind::RECOVERY_KIND_NAMES.iter().copied().collect();
2635
2636        // No duplicates
2637        assert_eq!(
2638            NodeKind::RECOVERY_KIND_NAMES.len(),
2639            recovery.len(),
2640            "RECOVERY_KIND_NAMES contains duplicates"
2641        );
2642
2643        let not_in_all: Vec<_> = recovery.difference(&all).collect();
2644        assert!(
2645            not_in_all.is_empty(),
2646            "RECOVERY_KIND_NAMES contains entries not in ALL_KIND_NAMES: {not_in_all:?}"
2647        );
2648    }
2649}