Skip to main content

perl_ast/
ast.rs

1//! Abstract Syntax Tree definitions for Perl within the parsing and LSP workflow.
2//!
3//! This module defines the comprehensive AST node types that represent parsed Perl code
4//! during the Parse → Index → Navigate → Complete → Analyze stages. The design is optimized
5//! for both direct use in Rust analysis and for generating tree-sitter compatible
6//! S-expressions during large workspace processing operations.
7//!
8//! # LSP Workflow Integration
9//!
10//! The AST structures support Perl tooling workflows by:
11//! - **Parse**: Produced by the parser as the canonical syntax tree
12//! - **Index**: Traversed to build symbol and reference tables
13//! - **Navigate**: Provides locations for definition and reference lookups
14//! - **Complete**: Supplies context for completion, hover, and signature help
15//! - **Analyze**: Feeds semantic analysis, diagnostics, and refactoring
16//!
17//! # Performance Characteristics
18//!
19//! AST structures are optimized for large codebases with:
20//! - Memory-efficient node representation using `Box<Node>` for recursive structures
21//! - Fast pattern matching via enum variants for common Perl constructs
22//! - Location tracking for precise error reporting in large files
23//! - Cheap cloning for parallel analysis tasks
24//!
25//! # Usage Examples
26//!
27//! ## Basic AST Construction
28//!
29//! ```rust
30//! use perl_ast::{Node, NodeKind, SourceLocation};
31//!
32//! // Create a simple variable declaration node
33//! let location = SourceLocation { start: 0, end: 10 };
34//! let node = Node::new(
35//!     NodeKind::VariableDeclaration {
36//!         declarator: "my".to_string(),
37//!         variable: Box::new(Node::new(
38//!             NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
39//!             location,
40//!         )),
41//!         attributes: vec![],
42//!         initializer: None,
43//!     },
44//!     location,
45//! );
46//! assert_eq!(node.kind.kind_name(), "VariableDeclaration");
47//! ```
48//!
49//! ## Tree-sitter S-expression Generation
50//!
51//! ```rust
52//! use perl_ast::{Node, NodeKind, SourceLocation};
53//!
54//! let loc = SourceLocation { start: 0, end: 2 };
55//! let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
56//! let program = Node::new(NodeKind::Program { statements: vec![num] }, loc);
57//!
58//! let sexp = program.to_sexp();
59//! assert!(sexp.starts_with("(source_file"));
60//! ```
61//!
62//! ## AST Traversal and Analysis
63//!
64//! ```rust
65//! use perl_ast::{Node, NodeKind, SourceLocation};
66//!
67//! fn count_variables(node: &Node) -> usize {
68//!     let mut count = 0;
69//!     match &node.kind {
70//!         NodeKind::Variable { .. } => count += 1,
71//!         NodeKind::Program { statements } => {
72//!             for stmt in statements {
73//!                 count += count_variables(stmt);
74//!             }
75//!         }
76//!         _ => {} // Handle other node types as needed
77//!     }
78//!     count
79//! }
80//!
81//! let loc = SourceLocation { start: 0, end: 5 };
82//! let var = Node::new(
83//!     NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
84//!     loc,
85//! );
86//! let program = Node::new(NodeKind::Program { statements: vec![var] }, loc);
87//! assert_eq!(count_variables(&program), 1);
88//! ```
89//!
90//! ## Parsing Integration
91//!
92//! In practice the AST is produced by the parser rather than built by hand
93//! (requires `perl-parser-core`):
94//!
95//! ```rust,ignore
96//! use perl_parser_core::Parser;
97//! use perl_ast::NodeKind;
98//!
99//! let mut parser = Parser::new("my $x = 42;");
100//! let ast = parser.parse().expect("should parse");
101//! assert!(matches!(ast.kind, NodeKind::Program { .. }));
102//! ```
103
104// Re-export SourceLocation from perl-position-tracking for unified span handling
105pub use perl_position_tracking::SourceLocation;
106// Re-export Token and TokenKind from perl-token for AST error nodes
107pub use perl_token::{Token, TokenKind};
108use std::fmt;
109
110/// Core AST node representing any Perl language construct within parsing workflows.
111///
112/// This is the fundamental building block for representing parsed Perl code. Each node
113/// contains both the semantic information (kind) and positional information (location)
114/// necessary for comprehensive script analysis.
115///
116/// # LSP Workflow Role
117///
118/// Nodes flow through tooling stages:
119/// - **Parse**: Created by the parser as it builds the syntax tree
120/// - **Index**: Visited to build symbol and reference tables
121/// - **Navigate**: Used to resolve definitions, references, and call hierarchy
122/// - **Complete**: Provides contextual information for completion and hover
123/// - **Analyze**: Drives semantic analysis and diagnostics
124///
125/// # Memory Optimization
126///
127/// The structure is designed for efficient memory usage during large-scale parsing:
128/// - `SourceLocation` uses compact position encoding for large files
129/// - `NodeKind` enum variants minimize memory overhead for common constructs
130/// - Clone operations are optimized for shared analysis workflows
131///
132/// # Examples
133///
134/// Construct a variable declaration node manually:
135///
136/// ```
137/// use perl_ast::{Node, NodeKind, SourceLocation};
138///
139/// let loc = SourceLocation { start: 0, end: 11 };
140/// let var = Node::new(
141///     NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
142///     loc,
143/// );
144/// let decl = Node::new(
145///     NodeKind::VariableDeclaration {
146///         declarator: "my".to_string(),
147///         variable: Box::new(var),
148///         attributes: vec![],
149///         initializer: None,
150///     },
151///     loc,
152/// );
153/// assert_eq!(decl.kind.kind_name(), "VariableDeclaration");
154/// ```
155///
156/// Typically you obtain nodes from the parser rather than constructing them by hand:
157///
158/// ```ignore
159/// use perl_parser::Parser;
160///
161/// let mut parser = Parser::new("my $x = 42;");
162/// let ast = parser.parse()?;
163/// println!("AST: {}", ast.to_sexp());
164/// ```
165#[derive(Debug, Clone, PartialEq)]
166pub struct Node {
167    /// The specific type and semantic content of this AST node
168    pub kind: NodeKind,
169    /// Source position information for error reporting and code navigation
170    pub location: SourceLocation,
171}
172
173impl Node {
174    /// Create a new AST node with the given kind and source location.
175    ///
176    /// # Examples
177    ///
178    /// ```
179    /// use perl_ast::{Node, NodeKind, SourceLocation};
180    ///
181    /// let node = Node::new(
182    ///     NodeKind::Number { value: "42".to_string() },
183    ///     SourceLocation { start: 0, end: 2 },
184    /// );
185    /// assert_eq!(node.kind.kind_name(), "Number");
186    /// assert_eq!(node.location.start, 0);
187    /// ```
188    pub fn new(kind: NodeKind, location: SourceLocation) -> Self {
189        Node { kind, location }
190    }
191
192    /// Convert the AST to a tree-sitter compatible S-expression.
193    ///
194    /// Produces a parenthesized representation compatible with tree-sitter's
195    /// S-expression format, useful for debugging and snapshot testing.
196    ///
197    /// # Examples
198    ///
199    /// ```
200    /// use perl_ast::{Node, NodeKind, SourceLocation};
201    ///
202    /// let loc = SourceLocation { start: 0, end: 2 };
203    /// let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
204    /// let program = Node::new(
205    ///     NodeKind::Program { statements: vec![num] },
206    ///     loc,
207    /// );
208    /// let sexp = program.to_sexp();
209    /// assert!(sexp.starts_with("(source_file"));
210    /// ```
211    pub fn to_sexp(&self) -> String {
212        match &self.kind {
213            NodeKind::Program { statements } => {
214                let stmts =
215                    statements.iter().map(|s| s.to_sexp_inner()).collect::<Vec<_>>().join(" ");
216                format!("(source_file {})", stmts)
217            }
218
219            NodeKind::ExpressionStatement { expression } => {
220                format!("(expression_statement {})", expression.to_sexp())
221            }
222
223            NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
224                let attrs_str = if attributes.is_empty() {
225                    String::new()
226                } else {
227                    format!(" (attributes {})", attributes.join(" "))
228                };
229                if let Some(init) = initializer {
230                    format!(
231                        "({}_declaration {}{}{})",
232                        declarator,
233                        variable.to_sexp(),
234                        attrs_str,
235                        init.to_sexp()
236                    )
237                } else {
238                    format!("({}_declaration {}{})", declarator, variable.to_sexp(), attrs_str)
239                }
240            }
241
242            NodeKind::VariableListDeclaration {
243                declarator,
244                variables,
245                attributes,
246                initializer,
247            } => {
248                let vars = variables.iter().map(|v| v.to_sexp()).collect::<Vec<_>>().join(" ");
249                let attrs_str = if attributes.is_empty() {
250                    String::new()
251                } else {
252                    format!(" (attributes {})", attributes.join(" "))
253                };
254                if let Some(init) = initializer {
255                    format!(
256                        "({}_declaration ({}){}{})",
257                        declarator,
258                        vars,
259                        attrs_str,
260                        init.to_sexp()
261                    )
262                } else {
263                    format!("({}_declaration ({}){})", declarator, vars, attrs_str)
264                }
265            }
266
267            NodeKind::Variable { sigil, name } => {
268                // Format expected by bless parsing tests: (variable $ name)
269                format!("(variable {} {})", sigil, name)
270            }
271
272            NodeKind::VariableWithAttributes { variable, attributes } => {
273                let attrs = attributes.join(" ");
274                format!("({} (attributes {}))", variable.to_sexp(), attrs)
275            }
276
277            NodeKind::Assignment { lhs, rhs, op } => {
278                format!(
279                    "(assignment_{} {} {})",
280                    op.replace("=", "assign"),
281                    lhs.to_sexp(),
282                    rhs.to_sexp()
283                )
284            }
285
286            NodeKind::Binary { op, left, right } => {
287                // Tree-sitter format: (binary_op left right)
288                let op_name = format_binary_operator(op);
289                format!("({} {} {})", op_name, left.to_sexp(), right.to_sexp())
290            }
291
292            NodeKind::Ternary { condition, then_expr, else_expr } => {
293                format!(
294                    "(ternary {} {} {})",
295                    condition.to_sexp(),
296                    then_expr.to_sexp(),
297                    else_expr.to_sexp()
298                )
299            }
300
301            NodeKind::Unary { op, operand } => {
302                // Tree-sitter format: (unary_op operand)
303                let op_name = format_unary_operator(op);
304                format!("({} {})", op_name, operand.to_sexp())
305            }
306
307            NodeKind::Diamond => "(diamond)".to_string(),
308
309            NodeKind::Ellipsis => "(ellipsis)".to_string(),
310
311            NodeKind::Undef => "(undef)".to_string(),
312
313            NodeKind::Readline { filehandle } => {
314                if let Some(fh) = filehandle {
315                    format!("(readline {})", fh)
316                } else {
317                    "(readline)".to_string()
318                }
319            }
320
321            NodeKind::Glob { pattern } => {
322                format!("(glob {})", pattern)
323            }
324            NodeKind::Typeglob { name } => {
325                format!("(typeglob {})", name)
326            }
327
328            NodeKind::Number { value } => {
329                // Format expected by bless parsing tests: (number value)
330                format!("(number {})", value)
331            }
332
333            NodeKind::String { value, interpolated } => {
334                // Escape quotes in string value to prevent S-expression parsing issues
335                let escaped_value = value.replace('\\', "\\\\").replace('"', "\\\"");
336
337                // Format based on interpolation status
338                if *interpolated {
339                    format!("(string_interpolated \"{}\")", escaped_value)
340                } else {
341                    format!("(string \"{}\")", escaped_value)
342                }
343            }
344
345            NodeKind::Heredoc { delimiter, content, interpolated, indented, command, .. } => {
346                let type_str = if *command {
347                    "heredoc_command"
348                } else if *indented {
349                    if *interpolated { "heredoc_indented_interpolated" } else { "heredoc_indented" }
350                } else if *interpolated {
351                    "heredoc_interpolated"
352                } else {
353                    "heredoc"
354                };
355                format!("({} {:?} {:?})", type_str, delimiter, content)
356            }
357
358            NodeKind::ArrayLiteral { elements } => {
359                let elems = elements.iter().map(|e| e.to_sexp()).collect::<Vec<_>>().join(" ");
360                format!("(array {})", elems)
361            }
362
363            NodeKind::HashLiteral { pairs } => {
364                let kvs = pairs
365                    .iter()
366                    .map(|(k, v)| format!("({} {})", k.to_sexp(), v.to_sexp()))
367                    .collect::<Vec<_>>()
368                    .join(" ");
369                format!("(hash {})", kvs)
370            }
371
372            NodeKind::Block { statements } => {
373                let stmts = statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ");
374                format!("(block {})", stmts)
375            }
376
377            NodeKind::Eval { block } => {
378                format!("(eval {})", block.to_sexp())
379            }
380
381            NodeKind::Do { block } => {
382                format!("(do {})", block.to_sexp())
383            }
384
385            NodeKind::Defer { block } => {
386                format!("(defer {})", block.to_sexp())
387            }
388
389            NodeKind::Try { body, catch_blocks, finally_block } => {
390                let mut parts = vec![format!("(try {})", body.to_sexp())];
391
392                for (var, block) in catch_blocks {
393                    if let Some(v) = var {
394                        parts.push(format!("(catch {} {})", v, block.to_sexp()));
395                    } else {
396                        parts.push(format!("(catch {})", block.to_sexp()));
397                    }
398                }
399
400                if let Some(finally) = finally_block {
401                    parts.push(format!("(finally {})", finally.to_sexp()));
402                }
403
404                parts.join(" ")
405            }
406
407            NodeKind::If { condition, then_branch, elsif_branches, else_branch, keyword } => {
408                let kw = keyword.as_deref().unwrap_or("if");
409                let mut parts =
410                    vec![format!("({} {} {})", kw, condition.to_sexp(), then_branch.to_sexp())];
411
412                for (cond, block) in elsif_branches {
413                    parts.push(format!("(elsif {} {})", cond.to_sexp(), block.to_sexp()));
414                }
415
416                if let Some(else_block) = else_branch {
417                    parts.push(format!("(else {})", else_block.to_sexp()));
418                }
419
420                parts.join(" ")
421            }
422
423            NodeKind::LabeledStatement { label, statement } => {
424                format!("(labeled_statement {} {})", label, statement.to_sexp())
425            }
426
427            NodeKind::While { condition, body, continue_block, keyword } => {
428                let kw = keyword.as_deref().unwrap_or("while");
429                let mut s = format!("({} {} {})", kw, condition.to_sexp(), body.to_sexp());
430                if let Some(cont) = continue_block {
431                    s.push_str(&format!(" (continue {})", cont.to_sexp()));
432                }
433                s
434            }
435            NodeKind::Tie { variable, package, args } => {
436                let mut s = format!("(tie {} {}", variable.to_sexp(), package.to_sexp());
437                for arg in args {
438                    s.push_str(&format!(" {}", arg.to_sexp()));
439                }
440                s.push(')');
441                s
442            }
443            NodeKind::Untie { variable } => {
444                format!("(untie {})", variable.to_sexp())
445            }
446            NodeKind::For { init, condition, update, body, continue_block } => {
447                let init_str =
448                    init.as_ref().map(|i| i.to_sexp()).unwrap_or_else(|| "()".to_string());
449                let cond_str =
450                    condition.as_ref().map(|c| c.to_sexp()).unwrap_or_else(|| "()".to_string());
451                let update_str =
452                    update.as_ref().map(|u| u.to_sexp()).unwrap_or_else(|| "()".to_string());
453                let mut result =
454                    format!("(for {} {} {} {})", init_str, cond_str, update_str, body.to_sexp());
455                if let Some(cont) = continue_block {
456                    result.push_str(&format!(" (continue {})", cont.to_sexp()));
457                }
458                result
459            }
460
461            NodeKind::Foreach { variable, list, body, continue_block } => {
462                let cont = if let Some(cb) = continue_block {
463                    format!(" {}", cb.to_sexp())
464                } else {
465                    String::new()
466                };
467                format!(
468                    "(foreach {} {} {}{})",
469                    variable.to_sexp(),
470                    list.to_sexp(),
471                    body.to_sexp(),
472                    cont
473                )
474            }
475
476            NodeKind::Given { expr, body } => {
477                format!("(given {} {})", expr.to_sexp(), body.to_sexp())
478            }
479
480            NodeKind::When { condition, body } => {
481                format!("(when {} {})", condition.to_sexp(), body.to_sexp())
482            }
483
484            NodeKind::Default { body } => {
485                format!("(default {})", body.to_sexp())
486            }
487
488            NodeKind::StatementModifier { statement, modifier, condition } => {
489                format!(
490                    "(statement_modifier_{} {} {})",
491                    modifier,
492                    statement.to_sexp(),
493                    condition.to_sexp()
494                )
495            }
496
497            NodeKind::Subroutine { name, prototype, signature, attributes, body, name_span: _ } => {
498                if let Some(sub_name) = name {
499                    // Named subroutine - bless test expected format: (sub name () block)
500                    let mut parts = vec![sub_name.clone()];
501
502                    // Add attributes if present (before prototype/signature)
503                    if !attributes.is_empty() {
504                        for attr in attributes {
505                            parts.push(format!(":{}", attr));
506                        }
507                    }
508
509                    // Add prototype/signature - use () for empty prototype
510                    if let Some(proto) = prototype {
511                        parts.push(format!("({})", proto.to_sexp()));
512                    } else if signature.is_some() {
513                        // If there's a signature but no prototype, still show ()
514                        parts.push("()".to_string());
515                    } else {
516                        parts.push("()".to_string());
517                    }
518
519                    // Add body
520                    parts.push(body.to_sexp());
521
522                    // Format: (sub name [attrs...] ()(block ...)) - space between name and (), no space between () and block
523                    if parts.len() >= 3 && parts[parts.len() - 2] == "()" {
524                        let name_and_attrs = parts[0..parts.len() - 2].join(" ");
525                        let proto = &parts[parts.len() - 2];
526                        let body = &parts[parts.len() - 1];
527                        format!("(sub {} {}{})", name_and_attrs, proto, body)
528                    } else {
529                        format!("(sub {})", parts.join(" "))
530                    }
531                } else {
532                    // Anonymous subroutine - tree-sitter format
533                    let mut parts = Vec::new();
534
535                    // Add attributes if present
536                    if !attributes.is_empty() {
537                        let attrs: Vec<String> = attributes
538                            .iter()
539                            .map(|_attr| "(attribute (attribute_name))".to_string())
540                            .collect();
541                        parts.push(format!("(attrlist {})", attrs.join("")));
542                    }
543
544                    // Add prototype if present
545                    if let Some(proto) = prototype {
546                        parts.push(proto.to_sexp());
547                    }
548
549                    // Add signature if present
550                    if let Some(sig) = signature {
551                        parts.push(sig.to_sexp());
552                    }
553
554                    // Add body
555                    parts.push(body.to_sexp());
556
557                    format!("(anonymous_subroutine_expression {})", parts.join(""))
558                }
559            }
560
561            NodeKind::Prototype { content: _ } => "(prototype)".to_string(),
562
563            NodeKind::Signature { parameters } => {
564                let params = parameters.iter().map(|p| p.to_sexp()).collect::<Vec<_>>().join(" ");
565                format!("(signature {})", params)
566            }
567
568            NodeKind::MandatoryParameter { variable } => {
569                format!("(mandatory_parameter {})", variable.to_sexp())
570            }
571
572            NodeKind::OptionalParameter { variable, default_value } => {
573                format!("(optional_parameter {} {})", variable.to_sexp(), default_value.to_sexp())
574            }
575
576            NodeKind::SlurpyParameter { variable } => {
577                format!("(slurpy_parameter {})", variable.to_sexp())
578            }
579
580            NodeKind::NamedParameter { variable } => {
581                format!("(named_parameter {})", variable.to_sexp())
582            }
583
584            NodeKind::Method { name: _, signature, attributes, body } => {
585                let block_contents = match &body.kind {
586                    NodeKind::Block { statements } => {
587                        statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ")
588                    }
589                    _ => body.to_sexp(),
590                };
591
592                let mut parts = vec!["(bareword)".to_string()];
593
594                // Add signature if present
595                if let Some(sig) = signature {
596                    parts.push(sig.to_sexp());
597                }
598
599                // Add attributes if present
600                if !attributes.is_empty() {
601                    let attrs: Vec<String> = attributes
602                        .iter()
603                        .map(|_attr| "(attribute (attribute_name))".to_string())
604                        .collect();
605                    parts.push(format!("(attrlist {})", attrs.join("")));
606                }
607
608                parts.push(format!("(block {})", block_contents));
609                format!("(method_declaration_statement {})", parts.join(" "))
610            }
611
612            NodeKind::Return { value } => {
613                if let Some(val) = value {
614                    format!("(return {})", val.to_sexp())
615                } else {
616                    "(return)".to_string()
617                }
618            }
619
620            NodeKind::LoopControl { op, label } => {
621                if let Some(l) = label {
622                    format!("({} {})", op, l)
623                } else {
624                    format!("({})", op)
625                }
626            }
627
628            NodeKind::Goto { target } => {
629                format!("(goto {})", target.to_sexp())
630            }
631
632            NodeKind::MethodCall { object, method, args } => {
633                let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
634                format!("(method_call {} {} ({}))", object.to_sexp(), method, args_str)
635            }
636
637            NodeKind::FunctionCall { name, args } => {
638                // Special handling for functions that should use call format in tree-sitter tests
639                if matches!(
640                    name.as_str(),
641                    "bless"
642                        | "shift"
643                        | "unshift"
644                        | "open"
645                        | "die"
646                        | "warn"
647                        | "print"
648                        | "printf"
649                        | "say"
650                        | "push"
651                        | "pop"
652                        | "map"
653                        | "sort"
654                        | "grep"
655                        | "keys"
656                        | "values"
657                        | "each"
658                        | "defined"
659                        | "scalar"
660                        | "ref"
661                ) {
662                    let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
663                    if args.is_empty() {
664                        format!("(call {} ())", name)
665                    } else {
666                        format!("(call {} ({}))", name, args_str)
667                    }
668                } else {
669                    // Tree-sitter format varies by context
670                    let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
671                    if args.is_empty() {
672                        "(function_call_expression (function))".to_string()
673                    } else {
674                        format!("(ambiguous_function_call_expression (function) {})", args_str)
675                    }
676                }
677            }
678
679            NodeKind::IndirectCall { method, object, args } => {
680                let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
681                format!("(indirect_call {} {} ({}))", method, object.to_sexp(), args_str)
682            }
683
684            NodeKind::Regex { pattern, replacement, modifiers, has_embedded_code } => {
685                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
686                format!("(regex {:?} {:?} {:?}{})", pattern, replacement, modifiers, risk_marker)
687            }
688
689            NodeKind::Match { expr, pattern, modifiers, has_embedded_code, negated } => {
690                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
691                let op = if *negated { "not_match" } else { "match" };
692                format!(
693                    "({} {} (regex {:?} {:?}{}))",
694                    op,
695                    expr.to_sexp(),
696                    pattern,
697                    modifiers,
698                    risk_marker
699                )
700            }
701
702            NodeKind::Substitution {
703                expr,
704                pattern,
705                replacement,
706                modifiers,
707                has_embedded_code,
708                negated,
709            } => {
710                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
711                let neg_marker = if *negated { " (negated)" } else { "" };
712                format!(
713                    "(substitution {} {:?} {:?} {:?}{}{})",
714                    expr.to_sexp(),
715                    pattern,
716                    replacement,
717                    modifiers,
718                    risk_marker,
719                    neg_marker
720                )
721            }
722
723            NodeKind::Transliteration { expr, search, replace, modifiers, negated } => {
724                let neg_marker = if *negated { " (negated)" } else { "" };
725                format!(
726                    "(transliteration {} {:?} {:?} {:?}{})",
727                    expr.to_sexp(),
728                    search,
729                    replace,
730                    modifiers,
731                    neg_marker
732                )
733            }
734
735            NodeKind::Package { name, block, name_span: _ } => {
736                if let Some(blk) = block {
737                    format!("(package {} {})", name, blk.to_sexp())
738                } else {
739                    format!("(package {})", name)
740                }
741            }
742
743            NodeKind::Use { module, args, has_filter_risk } => {
744                let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
745                if args.is_empty() {
746                    format!("(use {}{})", module, risk_marker)
747                } else {
748                    let args_str = args.join(" ");
749                    format!("(use {} ({}){})", module, args_str, risk_marker)
750                }
751            }
752
753            NodeKind::No { module, args, has_filter_risk } => {
754                let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
755                if args.is_empty() {
756                    format!("(no {}{})", module, risk_marker)
757                } else {
758                    let args_str = args.join(" ");
759                    format!("(no {} ({}){})", module, args_str, risk_marker)
760                }
761            }
762
763            NodeKind::PhaseBlock { phase, phase_span: _, block } => {
764                format!("({} {})", phase, block.to_sexp())
765            }
766
767            NodeKind::DataSection { marker, body } => {
768                if let Some(body_text) = body {
769                    format!("(data_section {} \"{}\")", marker, body_text.escape_default())
770                } else {
771                    format!("(data_section {})", marker)
772                }
773            }
774
775            NodeKind::Class { name, parents, body } => {
776                if parents.is_empty() {
777                    format!("(class {} {})", name, body.to_sexp())
778                } else {
779                    format!("(class {} :isa({}) {})", name, parents.join(","), body.to_sexp())
780                }
781            }
782
783            NodeKind::Format { name, body } => {
784                format!("(format {} {:?})", name, body)
785            }
786
787            NodeKind::Identifier { name } => {
788                // Format expected by tests: (identifier name)
789                format!("(identifier {})", name)
790            }
791
792            NodeKind::Error { message, partial, .. } => {
793                if let Some(node) = partial {
794                    format!("(ERROR \"{}\" {})", message.escape_default(), node.to_sexp())
795                } else {
796                    format!("(ERROR \"{}\")", message.escape_default())
797                }
798            }
799            NodeKind::MissingExpression => "(missing_expression)".to_string(),
800            NodeKind::MissingStatement => "(missing_statement)".to_string(),
801            NodeKind::MissingIdentifier => "(missing_identifier)".to_string(),
802            NodeKind::MissingBlock => "(missing_block)".to_string(),
803            NodeKind::UnknownRest => "(UNKNOWN_REST)".to_string(),
804        }
805    }
806
807    /// Convert the AST to S-expression format that unwraps expression statements in programs
808    pub fn to_sexp_inner(&self) -> String {
809        match &self.kind {
810            NodeKind::ExpressionStatement { expression } => {
811                // Check if this is an anonymous subroutine - if so, keep it wrapped
812                match &expression.kind {
813                    NodeKind::Subroutine { name, .. } if name.is_none() => {
814                        // Anonymous subroutine should remain wrapped in expression statement
815                        self.to_sexp()
816                    }
817                    _ => {
818                        // In the inner format, other expression statements are unwrapped
819                        expression.to_sexp()
820                    }
821                }
822            }
823            _ => {
824                // For all other node types, use regular to_sexp
825                self.to_sexp()
826            }
827        }
828    }
829
830    /// Call a function on every direct child node of this node.
831    ///
832    /// This enables depth-first traversal for operations like heredoc content attachment.
833    /// The closure receives a mutable reference to each child node.
834    #[inline]
835    pub fn for_each_child_mut<F: FnMut(&mut Node)>(&mut self, mut f: F) {
836        match &mut self.kind {
837            NodeKind::Tie { variable, package, args } => {
838                f(variable);
839                f(package);
840                for arg in args {
841                    f(arg);
842                }
843            }
844            NodeKind::Untie { variable } => f(variable),
845
846            // Root program node
847            NodeKind::Program { statements } => {
848                for stmt in statements {
849                    f(stmt);
850                }
851            }
852
853            // Statement wrappers
854            NodeKind::ExpressionStatement { expression } => f(expression),
855
856            // Variable declarations
857            NodeKind::VariableDeclaration { variable, initializer, .. } => {
858                f(variable);
859                if let Some(init) = initializer {
860                    f(init);
861                }
862            }
863            NodeKind::VariableListDeclaration { variables, initializer, .. } => {
864                for var in variables {
865                    f(var);
866                }
867                if let Some(init) = initializer {
868                    f(init);
869                }
870            }
871            NodeKind::VariableWithAttributes { variable, .. } => f(variable),
872
873            // Binary operations
874            NodeKind::Binary { left, right, .. } => {
875                f(left);
876                f(right);
877            }
878            NodeKind::Ternary { condition, then_expr, else_expr } => {
879                f(condition);
880                f(then_expr);
881                f(else_expr);
882            }
883            NodeKind::Unary { operand, .. } => f(operand),
884            NodeKind::Assignment { lhs, rhs, .. } => {
885                f(lhs);
886                f(rhs);
887            }
888
889            // Control flow
890            NodeKind::Block { statements } => {
891                for stmt in statements {
892                    f(stmt);
893                }
894            }
895            NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
896                f(condition);
897                f(then_branch);
898                for (elsif_cond, elsif_body) in elsif_branches {
899                    f(elsif_cond);
900                    f(elsif_body);
901                }
902                if let Some(else_body) = else_branch {
903                    f(else_body);
904                }
905            }
906            NodeKind::While { condition, body, continue_block, .. } => {
907                f(condition);
908                f(body);
909                if let Some(cont) = continue_block {
910                    f(cont);
911                }
912            }
913            NodeKind::For { init, condition, update, body, continue_block, .. } => {
914                if let Some(i) = init {
915                    f(i);
916                }
917                if let Some(c) = condition {
918                    f(c);
919                }
920                if let Some(u) = update {
921                    f(u);
922                }
923                f(body);
924                if let Some(cont) = continue_block {
925                    f(cont);
926                }
927            }
928            NodeKind::Foreach { variable, list, body, continue_block } => {
929                f(variable);
930                f(list);
931                f(body);
932                if let Some(cb) = continue_block {
933                    f(cb);
934                }
935            }
936            NodeKind::Given { expr, body } => {
937                f(expr);
938                f(body);
939            }
940            NodeKind::When { condition, body } => {
941                f(condition);
942                f(body);
943            }
944            NodeKind::Default { body } => f(body),
945            NodeKind::StatementModifier { statement, condition, .. } => {
946                f(statement);
947                f(condition);
948            }
949            NodeKind::LabeledStatement { statement, .. } => f(statement),
950
951            // Eval and Do blocks
952            NodeKind::Eval { block } => f(block),
953            NodeKind::Do { block } => f(block),
954            NodeKind::Defer { block } => f(block),
955            NodeKind::Try { body, catch_blocks, finally_block } => {
956                f(body);
957                for (_, catch_body) in catch_blocks {
958                    f(catch_body);
959                }
960                if let Some(finally) = finally_block {
961                    f(finally);
962                }
963            }
964
965            // Function calls
966            NodeKind::FunctionCall { args, .. } => {
967                for arg in args {
968                    f(arg);
969                }
970            }
971            NodeKind::MethodCall { object, args, .. } => {
972                f(object);
973                for arg in args {
974                    f(arg);
975                }
976            }
977            NodeKind::IndirectCall { object, args, .. } => {
978                f(object);
979                for arg in args {
980                    f(arg);
981                }
982            }
983
984            // Functions
985            NodeKind::Subroutine { prototype, signature, body, .. } => {
986                if let Some(proto) = prototype {
987                    f(proto);
988                }
989                if let Some(sig) = signature {
990                    f(sig);
991                }
992                f(body);
993            }
994            NodeKind::Method { signature, body, .. } => {
995                if let Some(sig) = signature {
996                    f(sig);
997                }
998                f(body);
999            }
1000            NodeKind::Return { value } => {
1001                if let Some(v) = value {
1002                    f(v);
1003                }
1004            }
1005            NodeKind::Goto { target } => f(target),
1006            NodeKind::Signature { parameters } => {
1007                for param in parameters {
1008                    f(param);
1009                }
1010            }
1011            NodeKind::MandatoryParameter { variable } => f(variable),
1012            NodeKind::OptionalParameter { variable, default_value } => {
1013                f(variable);
1014                f(default_value);
1015            }
1016            NodeKind::SlurpyParameter { variable } => f(variable),
1017            NodeKind::NamedParameter { variable } => f(variable),
1018
1019            // Pattern matching
1020            NodeKind::Match { expr, .. } => f(expr),
1021            NodeKind::Substitution { expr, .. } => f(expr),
1022            NodeKind::Transliteration { expr, .. } => f(expr),
1023
1024            // Containers
1025            NodeKind::ArrayLiteral { elements } => {
1026                for elem in elements {
1027                    f(elem);
1028                }
1029            }
1030            NodeKind::HashLiteral { pairs } => {
1031                for (key, value) in pairs {
1032                    f(key);
1033                    f(value);
1034                }
1035            }
1036
1037            // Package system
1038            NodeKind::Package { block, .. } => {
1039                if let Some(b) = block {
1040                    f(b);
1041                }
1042            }
1043            NodeKind::PhaseBlock { block, .. } => f(block),
1044            NodeKind::Class { body, .. } => f(body),
1045
1046            // Error node might have a partial valid tree
1047            NodeKind::Error { partial, .. } => {
1048                if let Some(node) = partial {
1049                    f(node);
1050                }
1051            }
1052
1053            // Leaf nodes (no children to traverse)
1054            NodeKind::Variable { .. }
1055            | NodeKind::Identifier { .. }
1056            | NodeKind::Number { .. }
1057            | NodeKind::String { .. }
1058            | NodeKind::Heredoc { .. }
1059            | NodeKind::Regex { .. }
1060            | NodeKind::Readline { .. }
1061            | NodeKind::Glob { .. }
1062            | NodeKind::Typeglob { .. }
1063            | NodeKind::Diamond
1064            | NodeKind::Ellipsis
1065            | NodeKind::Undef
1066            | NodeKind::Use { .. }
1067            | NodeKind::No { .. }
1068            | NodeKind::Prototype { .. }
1069            | NodeKind::DataSection { .. }
1070            | NodeKind::Format { .. }
1071            | NodeKind::LoopControl { .. }
1072            | NodeKind::MissingExpression
1073            | NodeKind::MissingStatement
1074            | NodeKind::MissingIdentifier
1075            | NodeKind::MissingBlock
1076            | NodeKind::UnknownRest => {}
1077        }
1078    }
1079
1080    /// Call a function on every direct child node of this node (immutable version).
1081    ///
1082    /// This enables depth-first traversal for read-only operations like AST analysis.
1083    /// The closure receives an immutable reference to each child node.
1084    #[inline]
1085    pub fn for_each_child<'a, F: FnMut(&'a Node)>(&'a self, mut f: F) {
1086        match &self.kind {
1087            NodeKind::Tie { variable, package, args } => {
1088                f(variable);
1089                f(package);
1090                for arg in args {
1091                    f(arg);
1092                }
1093            }
1094            NodeKind::Untie { variable } => f(variable),
1095
1096            // Root program node
1097            NodeKind::Program { statements } => {
1098                for stmt in statements {
1099                    f(stmt);
1100                }
1101            }
1102
1103            // Statement wrappers
1104            NodeKind::ExpressionStatement { expression } => f(expression),
1105
1106            // Variable declarations
1107            NodeKind::VariableDeclaration { variable, initializer, .. } => {
1108                f(variable);
1109                if let Some(init) = initializer {
1110                    f(init);
1111                }
1112            }
1113            NodeKind::VariableListDeclaration { variables, initializer, .. } => {
1114                for var in variables {
1115                    f(var);
1116                }
1117                if let Some(init) = initializer {
1118                    f(init);
1119                }
1120            }
1121            NodeKind::VariableWithAttributes { variable, .. } => f(variable),
1122
1123            // Binary operations
1124            NodeKind::Binary { left, right, .. } => {
1125                f(left);
1126                f(right);
1127            }
1128            NodeKind::Ternary { condition, then_expr, else_expr } => {
1129                f(condition);
1130                f(then_expr);
1131                f(else_expr);
1132            }
1133            NodeKind::Unary { operand, .. } => f(operand),
1134            NodeKind::Assignment { lhs, rhs, .. } => {
1135                f(lhs);
1136                f(rhs);
1137            }
1138
1139            // Control flow
1140            NodeKind::Block { statements } => {
1141                for stmt in statements {
1142                    f(stmt);
1143                }
1144            }
1145            NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
1146                f(condition);
1147                f(then_branch);
1148                for (elsif_cond, elsif_body) in elsif_branches {
1149                    f(elsif_cond);
1150                    f(elsif_body);
1151                }
1152                if let Some(else_body) = else_branch {
1153                    f(else_body);
1154                }
1155            }
1156            NodeKind::While { condition, body, continue_block, .. } => {
1157                f(condition);
1158                f(body);
1159                if let Some(cont) = continue_block {
1160                    f(cont);
1161                }
1162            }
1163            NodeKind::For { init, condition, update, body, continue_block, .. } => {
1164                if let Some(i) = init {
1165                    f(i);
1166                }
1167                if let Some(c) = condition {
1168                    f(c);
1169                }
1170                if let Some(u) = update {
1171                    f(u);
1172                }
1173                f(body);
1174                if let Some(cont) = continue_block {
1175                    f(cont);
1176                }
1177            }
1178            NodeKind::Foreach { variable, list, body, continue_block } => {
1179                f(variable);
1180                f(list);
1181                f(body);
1182                if let Some(cb) = continue_block {
1183                    f(cb);
1184                }
1185            }
1186            NodeKind::Given { expr, body } => {
1187                f(expr);
1188                f(body);
1189            }
1190            NodeKind::When { condition, body } => {
1191                f(condition);
1192                f(body);
1193            }
1194            NodeKind::Default { body } => f(body),
1195            NodeKind::StatementModifier { statement, condition, .. } => {
1196                f(statement);
1197                f(condition);
1198            }
1199            NodeKind::LabeledStatement { statement, .. } => f(statement),
1200
1201            // Eval and Do blocks
1202            NodeKind::Eval { block } => f(block),
1203            NodeKind::Do { block } => f(block),
1204            NodeKind::Defer { block } => f(block),
1205            NodeKind::Try { body, catch_blocks, finally_block } => {
1206                f(body);
1207                for (_, catch_body) in catch_blocks {
1208                    f(catch_body);
1209                }
1210                if let Some(finally) = finally_block {
1211                    f(finally);
1212                }
1213            }
1214
1215            // Function calls
1216            NodeKind::FunctionCall { args, .. } => {
1217                for arg in args {
1218                    f(arg);
1219                }
1220            }
1221            NodeKind::MethodCall { object, args, .. } => {
1222                f(object);
1223                for arg in args {
1224                    f(arg);
1225                }
1226            }
1227            NodeKind::IndirectCall { object, args, .. } => {
1228                f(object);
1229                for arg in args {
1230                    f(arg);
1231                }
1232            }
1233
1234            // Functions
1235            NodeKind::Subroutine { prototype, signature, body, .. } => {
1236                if let Some(proto) = prototype {
1237                    f(proto);
1238                }
1239                if let Some(sig) = signature {
1240                    f(sig);
1241                }
1242                f(body);
1243            }
1244            NodeKind::Method { signature, body, .. } => {
1245                if let Some(sig) = signature {
1246                    f(sig);
1247                }
1248                f(body);
1249            }
1250            NodeKind::Return { value } => {
1251                if let Some(v) = value {
1252                    f(v);
1253                }
1254            }
1255            NodeKind::Goto { target } => f(target),
1256            NodeKind::Signature { parameters } => {
1257                for param in parameters {
1258                    f(param);
1259                }
1260            }
1261            NodeKind::MandatoryParameter { variable } => f(variable),
1262            NodeKind::OptionalParameter { variable, default_value } => {
1263                f(variable);
1264                f(default_value);
1265            }
1266            NodeKind::SlurpyParameter { variable } => f(variable),
1267            NodeKind::NamedParameter { variable } => f(variable),
1268
1269            // Pattern matching
1270            NodeKind::Match { expr, .. } => f(expr),
1271            NodeKind::Substitution { expr, .. } => f(expr),
1272            NodeKind::Transliteration { expr, .. } => f(expr),
1273
1274            // Containers
1275            NodeKind::ArrayLiteral { elements } => {
1276                for elem in elements {
1277                    f(elem);
1278                }
1279            }
1280            NodeKind::HashLiteral { pairs } => {
1281                for (key, value) in pairs {
1282                    f(key);
1283                    f(value);
1284                }
1285            }
1286
1287            // Package system
1288            NodeKind::Package { block, .. } => {
1289                if let Some(b) = block {
1290                    f(b);
1291                }
1292            }
1293            NodeKind::PhaseBlock { block, .. } => f(block),
1294            NodeKind::Class { body, .. } => f(body),
1295
1296            // Error node might have a partial valid tree
1297            NodeKind::Error { partial, .. } => {
1298                if let Some(node) = partial {
1299                    f(node);
1300                }
1301            }
1302
1303            // Leaf nodes (no children to traverse)
1304            NodeKind::Variable { .. }
1305            | NodeKind::Identifier { .. }
1306            | NodeKind::Number { .. }
1307            | NodeKind::String { .. }
1308            | NodeKind::Heredoc { .. }
1309            | NodeKind::Regex { .. }
1310            | NodeKind::Readline { .. }
1311            | NodeKind::Glob { .. }
1312            | NodeKind::Typeglob { .. }
1313            | NodeKind::Diamond
1314            | NodeKind::Ellipsis
1315            | NodeKind::Undef
1316            | NodeKind::Use { .. }
1317            | NodeKind::No { .. }
1318            | NodeKind::Prototype { .. }
1319            | NodeKind::DataSection { .. }
1320            | NodeKind::Format { .. }
1321            | NodeKind::LoopControl { .. }
1322            | NodeKind::MissingExpression
1323            | NodeKind::MissingStatement
1324            | NodeKind::MissingIdentifier
1325            | NodeKind::MissingBlock
1326            | NodeKind::UnknownRest => {}
1327        }
1328    }
1329
1330    /// Count the total number of nodes in this subtree (inclusive).
1331    ///
1332    /// # Examples
1333    ///
1334    /// ```
1335    /// use perl_ast::{Node, NodeKind, SourceLocation};
1336    ///
1337    /// let loc = SourceLocation { start: 0, end: 1 };
1338    /// let leaf = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1339    /// assert_eq!(leaf.count_nodes(), 1);
1340    ///
1341    /// let program = Node::new(
1342    ///     NodeKind::Program { statements: vec![leaf] },
1343    ///     loc,
1344    /// );
1345    /// assert_eq!(program.count_nodes(), 2);
1346    /// ```
1347    pub fn count_nodes(&self) -> usize {
1348        let mut count = 1;
1349        self.for_each_child(|child| {
1350            count += child.count_nodes();
1351        });
1352        count
1353    }
1354
1355    /// Collect direct child nodes into a vector for convenience APIs.
1356    ///
1357    /// # Examples
1358    ///
1359    /// ```
1360    /// use perl_ast::{Node, NodeKind, SourceLocation};
1361    ///
1362    /// let loc = SourceLocation { start: 0, end: 1 };
1363    /// let stmt = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1364    /// let program = Node::new(
1365    ///     NodeKind::Program { statements: vec![stmt] },
1366    ///     loc,
1367    /// );
1368    /// assert_eq!(program.children().len(), 1);
1369    /// ```
1370    #[inline]
1371    pub fn children(&self) -> Vec<&Node> {
1372        let mut children = Vec::new();
1373        self.for_each_child(|child| children.push(child));
1374        children
1375    }
1376
1377    /// Count direct child nodes without allocating an intermediate vector.
1378    ///
1379    /// This is more efficient than `children().len()` when callers only need
1380    /// cardinality.
1381    #[inline]
1382    pub fn child_count(&self) -> usize {
1383        let mut count = 0;
1384        self.for_each_child(|_| count += 1);
1385        count
1386    }
1387
1388    /// Get the first direct child node, if any.
1389    ///
1390    /// Optimized to avoid allocating the children vector.
1391    #[inline]
1392    pub fn first_child(&self) -> Option<&Node> {
1393        let mut result = None;
1394        self.for_each_child(|child| {
1395            if result.is_none() {
1396                result = Some(child);
1397            }
1398        });
1399        result
1400    }
1401
1402    /// Returns `true` when this node's source span contains `offset`.
1403    ///
1404    /// The start position is inclusive and the end position is exclusive.
1405    #[inline]
1406    pub fn contains_offset(&self, offset: usize) -> bool {
1407        self.location.start <= offset && offset < self.location.end
1408    }
1409
1410    /// Find the most specific node whose source span contains `offset`.
1411    ///
1412    /// Returns `None` when `offset` is outside this node. Otherwise, returns this
1413    /// node or the deepest descendant whose span contains the offset. This is useful
1414    /// for LSP features that need to map a cursor byte offset to the smallest AST
1415    /// construct at that position.
1416    ///
1417    /// The same half-open span semantics as [`Node::contains_offset`] apply: start
1418    /// positions are inclusive and end positions are exclusive.
1419    ///
1420    /// # Examples
1421    ///
1422    /// ```
1423    /// use perl_ast::{Node, NodeKind, SourceLocation};
1424    ///
1425    /// let left = Node::new(
1426    ///     NodeKind::Identifier { name: "left".to_string() },
1427    ///     SourceLocation { start: 0, end: 4 },
1428    /// );
1429    /// let right = Node::new(
1430    ///     NodeKind::Number { value: "1".to_string() },
1431    ///     SourceLocation { start: 7, end: 8 },
1432    /// );
1433    /// let expr = Node::new(
1434    ///     NodeKind::Binary {
1435    ///         op: "+".to_string(),
1436    ///         left: Box::new(left),
1437    ///         right: Box::new(right),
1438    ///     },
1439    ///     SourceLocation { start: 0, end: 8 },
1440    /// );
1441    ///
1442    /// assert_eq!(
1443    ///     expr.find_deepest_containing_offset(7).map(|node| node.kind.kind_name()),
1444    ///     Some("Number"),
1445    /// );
1446    /// assert_eq!(expr.find_deepest_containing_offset(8), None);
1447    /// ```
1448    #[inline]
1449    pub fn find_deepest_containing_offset(&self, offset: usize) -> Option<&Node> {
1450        if !self.contains_offset(offset) {
1451            return None;
1452        }
1453
1454        let mut result = self;
1455        self.for_each_child(|child| {
1456            if let Some(descendant) = child.find_deepest_containing_offset(offset) {
1457                result = descendant;
1458            }
1459        });
1460        Some(result)
1461    }
1462
1463    /// Returns the byte length of this node's source span.
1464    ///
1465    /// Uses saturating subtraction so malformed spans never underflow.
1466    #[inline]
1467    pub fn span_len(&self) -> usize {
1468        self.location.end.saturating_sub(self.location.start)
1469    }
1470
1471    /// Get the last direct child node, if any.
1472    ///
1473    /// Optimized to avoid allocating the children vector.
1474    ///
1475    /// # Examples
1476    ///
1477    /// ```
1478    /// use perl_ast::{Node, NodeKind, SourceLocation};
1479    ///
1480    /// let loc = SourceLocation { start: 0, end: 1 };
1481    /// let first = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1482    /// let second = Node::new(NodeKind::Number { value: "2".to_string() }, loc);
1483    /// let program = Node::new(
1484    ///     NodeKind::Program { statements: vec![first, second] },
1485    ///     loc,
1486    /// );
1487    ///
1488    /// assert_eq!(program.last_child().map(|n| n.kind.kind_name()), Some("Number"));
1489    /// assert_eq!(Node::new(NodeKind::Block { statements: vec![] }, loc).last_child(), None);
1490    /// ```
1491    #[inline]
1492    pub fn last_child(&self) -> Option<&Node> {
1493        let mut result = None;
1494        self.for_each_child(|child| {
1495            result = Some(child);
1496        });
1497        result
1498    }
1499}
1500
1501/// Comprehensive enumeration of all Perl language constructs supported by the parser.
1502///
1503/// This enum represents every possible AST node type that can be parsed from Perl code
1504/// during the Parse → Index → Navigate → Complete → Analyze workflow. Each variant captures
1505/// the semantic meaning and structural relationships needed for complete script analysis
1506/// and transformation.
1507///
1508/// # LSP Workflow Integration
1509///
1510/// Node kinds are processed differently across workflow stages:
1511/// - **Parse**: All variants are produced by the parser
1512/// - **Index**: Symbol-bearing variants feed workspace indexing
1513/// - **Navigate**: Call and reference variants support navigation features
1514/// - **Complete**: Expression variants provide completion context
1515/// - **Analyze**: Semantic variants drive diagnostics and refactoring
1516///
1517/// # Examples
1518///
1519/// Pattern-match on node kinds to extract semantic information:
1520///
1521/// ```
1522/// use perl_ast::{Node, NodeKind, SourceLocation};
1523///
1524/// let loc = SourceLocation { start: 0, end: 5 };
1525/// let node = Node::new(
1526///     NodeKind::Variable { sigil: "$".to_string(), name: "foo".to_string() },
1527///     loc,
1528/// );
1529///
1530/// assert!(matches!(
1531///     &node.kind,
1532///     NodeKind::Variable { sigil, name } if sigil == "$" && name == "foo"
1533/// ));
1534/// ```
1535///
1536/// Use [`kind_name()`](NodeKind::kind_name) for debugging and diagnostics:
1537///
1538/// ```
1539/// use perl_ast::NodeKind;
1540///
1541/// let kind = NodeKind::Number { value: "99".to_string() };
1542/// assert_eq!(kind.kind_name(), "Number");
1543///
1544/// let kind = NodeKind::Variable { sigil: "@".to_string(), name: "list".to_string() };
1545/// assert_eq!(kind.kind_name(), "Variable");
1546/// ```
1547///
1548/// # Performance Considerations
1549///
1550/// The enum design optimizes for large codebases:
1551/// - Box pointers minimize stack usage for recursive structures
1552/// - Vector storage enables efficient bulk operations on child nodes
1553/// - Clone operations optimized for concurrent analysis workflows
1554/// - Pattern matching performance tuned for common Perl constructs
1555#[derive(Debug, Clone, PartialEq)]
1556pub enum NodeKind {
1557    /// Top-level program containing all statements in an Perl script
1558    ///
1559    /// This is the root node for any parsed Perl script content, containing all
1560    /// top-level statements found during the Parse stage of LSP workflow.
1561    Program {
1562        /// All top-level statements in the Perl script
1563        statements: Vec<Node>,
1564    },
1565
1566    /// Statement wrapper for expressions that appear at statement level
1567    ///
1568    /// Used during Analyze stage to distinguish between expressions used as
1569    /// statements versus expressions within other contexts during Perl parsing.
1570    ExpressionStatement {
1571        /// The expression being used as a statement
1572        expression: Box<Node>,
1573    },
1574
1575    /// Variable declaration with scope declarator in Perl script processing
1576    ///
1577    /// Represents declarations like `my $var`, `our $global`, `local $dynamic`, etc.
1578    /// Critical for Analyze stage symbol table construction during Perl parsing.
1579    VariableDeclaration {
1580        /// Scope declarator: "my", "our", "local", "state"
1581        declarator: String,
1582        /// The variable being declared
1583        variable: Box<Node>,
1584        /// Variable attributes (e.g., ":shared", ":locked")
1585        attributes: Vec<String>,
1586        /// Optional initializer expression
1587        initializer: Option<Box<Node>>,
1588    },
1589
1590    /// Multiple variable declaration in a single statement
1591    ///
1592    /// Handles constructs like `my ($x, $y) = @values` common in Perl script processing.
1593    /// Supports efficient bulk variable analysis during Navigate stage operations.
1594    VariableListDeclaration {
1595        /// Scope declarator for all variables in the list
1596        declarator: String,
1597        /// All variables being declared in the list
1598        variables: Vec<Node>,
1599        /// Attributes applied to the variable list
1600        attributes: Vec<String>,
1601        /// Optional initializer for the entire variable list
1602        initializer: Option<Box<Node>>,
1603    },
1604
1605    /// Perl variable reference (scalar, array, hash, etc.) in Perl parsing workflow
1606    Variable {
1607        /// Variable sigil indicating type: $, @, %, &, *
1608        sigil: String, // $, @, %, &, *
1609        /// Variable name without sigil
1610        name: String,
1611    },
1612
1613    /// Variable with additional attributes for enhanced LSP workflow
1614    VariableWithAttributes {
1615        /// The base variable node
1616        variable: Box<Node>,
1617        /// List of attribute names applied to the variable
1618        attributes: Vec<String>,
1619    },
1620
1621    /// Assignment operation for LSP data processing workflows
1622    Assignment {
1623        /// Left-hand side of assignment
1624        lhs: Box<Node>,
1625        /// Right-hand side of assignment
1626        rhs: Box<Node>,
1627        /// Assignment operator: =, +=, -=, etc.
1628        op: String, // =, +=, -=, etc.
1629    },
1630
1631    // Expressions
1632    /// Binary operation for Perl parsing workflow calculations
1633    Binary {
1634        /// Binary operator
1635        op: String,
1636        /// Left operand
1637        left: Box<Node>,
1638        /// Right operand
1639        right: Box<Node>,
1640    },
1641
1642    /// Ternary conditional expression for Perl parsing workflow logic
1643    Ternary {
1644        /// Condition to evaluate
1645        condition: Box<Node>,
1646        /// Expression when condition is true
1647        then_expr: Box<Node>,
1648        /// Expression when condition is false
1649        else_expr: Box<Node>,
1650    },
1651
1652    /// Unary operation for Perl parsing workflow
1653    Unary {
1654        /// Unary operator
1655        op: String,
1656        /// Operand to apply operator to
1657        operand: Box<Node>,
1658    },
1659
1660    // I/O operations
1661    /// Diamond operator for file input in Perl parsing workflow
1662    Diamond, // <>
1663
1664    /// Ellipsis operator for Perl parsing workflow
1665    Ellipsis, // ...
1666
1667    /// Undef value for Perl parsing workflow
1668    Undef, // undef
1669
1670    /// Readline operation for LSP file processing
1671    Readline {
1672        /// Optional filehandle: `<STDIN>`, `<$fh>`, etc.
1673        filehandle: Option<String>, // <STDIN>, <$fh>, etc.
1674    },
1675
1676    /// Glob pattern for LSP workspace file matching
1677    Glob {
1678        /// Pattern string for file matching
1679        pattern: String, // <*.txt>
1680    },
1681
1682    /// Typeglob expression: `*foo` or `*main::bar`
1683    ///
1684    /// Provides access to all symbol table entries for a given name.
1685    Typeglob {
1686        /// Name of the symbol (including package qualification)
1687        name: String,
1688    },
1689
1690    /// Numeric literal in Perl code (integer, float, hex, octal, binary)
1691    ///
1692    /// Represents all numeric literal forms: `42`, `3.14`, `0x1A`, `0o755`, `0b1010`.
1693    Number {
1694        /// String representation preserving original format
1695        value: String,
1696    },
1697
1698    /// String literal with optional interpolation
1699    ///
1700    /// Handles both single-quoted (`'literal'`) and double-quoted (`"$interpolated"`) strings.
1701    String {
1702        /// String content (after quote processing)
1703        value: String,
1704        /// Whether the string supports variable interpolation
1705        interpolated: bool,
1706    },
1707
1708    /// Heredoc string literal for multi-line content
1709    ///
1710    /// Supports all heredoc forms: `<<EOF`, `<<'EOF'`, `<<"EOF"`, `<<~EOF` (indented).
1711    Heredoc {
1712        /// Delimiter marking heredoc boundaries
1713        delimiter: String,
1714        /// Content between delimiters
1715        content: String,
1716        /// Whether content supports variable interpolation
1717        interpolated: bool,
1718        /// Whether leading whitespace is stripped (<<~ form)
1719        indented: bool,
1720        /// Whether this is a command execution heredoc (<<`EOF`)
1721        command: bool,
1722        /// Body span for breakpoint detection (populated by drain_pending_heredocs)
1723        body_span: Option<SourceLocation>,
1724    },
1725
1726    /// Array literal expression: `(1, 2, 3)` or `[1, 2, 3]`
1727    ArrayLiteral {
1728        /// Elements in the array
1729        elements: Vec<Node>,
1730    },
1731
1732    /// Hash literal expression: `(key => 'value')` or `{key => 'value'}`
1733    HashLiteral {
1734        /// Key-value pairs in the hash
1735        pairs: Vec<(Node, Node)>,
1736    },
1737
1738    /// Block of statements: `{ ... }`
1739    ///
1740    /// Used for control structures, subroutine bodies, and bare blocks.
1741    Block {
1742        /// Statements within the block
1743        statements: Vec<Node>,
1744    },
1745
1746    /// Eval block for exception handling: `eval { ... }`
1747    Eval {
1748        /// Block to evaluate with exception trapping
1749        block: Box<Node>,
1750    },
1751
1752    /// Do block for file inclusion or expression evaluation: `do { ... }` or `do "file"`
1753    Do {
1754        /// Block to execute or file expression
1755        block: Box<Node>,
1756    },
1757
1758    /// Defer block for deferred cleanup on scope exit (Perl 5.36+ experimental, stable in 5.40)
1759    Defer {
1760        /// Block to execute on scope exit
1761        block: Box<Node>,
1762    },
1763
1764    /// Try-catch-finally for modern exception handling (Syntax::Keyword::Try style)
1765    Try {
1766        /// Try block body
1767        body: Box<Node>,
1768        /// Catch blocks: (optional exception variable, handler block)
1769        catch_blocks: Vec<(Option<String>, Box<Node>)>,
1770        /// Optional finally block
1771        finally_block: Option<Box<Node>>,
1772    },
1773
1774    /// If-elsif-else conditional statement
1775    If {
1776        /// Condition expression
1777        condition: Box<Node>,
1778        /// Then branch block
1779        then_branch: Box<Node>,
1780        /// Elsif branches: (condition, block) pairs
1781        elsif_branches: Vec<(Box<Node>, Box<Node>)>,
1782        /// Optional else branch
1783        else_branch: Option<Box<Node>>,
1784        /// Original keyword: None for 'if', Some("unless") for 'unless' block form.
1785        keyword: Option<String>,
1786    },
1787
1788    /// Statement with a label for loop control: `LABEL: while (...)`
1789    LabeledStatement {
1790        /// Label name (e.g., "OUTER", "LINE")
1791        label: String,
1792        /// Labeled statement (typically a loop)
1793        statement: Box<Node>,
1794    },
1795
1796    /// While loop: `while (condition) { ... }`
1797    While {
1798        /// Loop condition
1799        condition: Box<Node>,
1800        /// Loop body
1801        body: Box<Node>,
1802        /// Optional continue block
1803        continue_block: Option<Box<Node>>,
1804        /// Original keyword: None for 'while', Some("until") for 'until' block form.
1805        keyword: Option<String>,
1806    },
1807
1808    /// Tie operation for binding variables to objects: `tie %hash, 'Package', @args`
1809    Tie {
1810        /// Variable being tied
1811        variable: Box<Node>,
1812        /// Class/package name to tie to
1813        package: Box<Node>,
1814        /// Arguments passed to TIE* method
1815        args: Vec<Node>,
1816    },
1817
1818    /// Untie operation for unbinding variables: `untie %hash`
1819    Untie {
1820        /// Variable being untied
1821        variable: Box<Node>,
1822    },
1823
1824    /// C-style for loop: `for (init; cond; update) { ... }`
1825    For {
1826        /// Initialization expression
1827        init: Option<Box<Node>>,
1828        /// Loop condition
1829        condition: Option<Box<Node>>,
1830        /// Update expression
1831        update: Option<Box<Node>>,
1832        /// Loop body
1833        body: Box<Node>,
1834        /// Optional continue block
1835        continue_block: Option<Box<Node>>,
1836    },
1837
1838    /// Foreach loop: `foreach my $item (@list) { ... }`
1839    Foreach {
1840        /// Iterator variable
1841        variable: Box<Node>,
1842        /// List to iterate
1843        list: Box<Node>,
1844        /// Loop body
1845        body: Box<Node>,
1846        /// Optional continue block
1847        continue_block: Option<Box<Node>>,
1848    },
1849
1850    /// Given statement for switch-like matching (Perl 5.10+)
1851    Given {
1852        /// Expression to match against
1853        expr: Box<Node>,
1854        /// Body containing when/default blocks
1855        body: Box<Node>,
1856    },
1857
1858    /// When clause in given/switch: `when ($pattern) { ... }`
1859    When {
1860        /// Pattern to match
1861        condition: Box<Node>,
1862        /// Handler block
1863        body: Box<Node>,
1864    },
1865
1866    /// Default clause in given/switch: `default { ... }`
1867    Default {
1868        /// Handler block for unmatched cases
1869        body: Box<Node>,
1870    },
1871
1872    /// Statement modifier syntax: `print "ok" if $condition`
1873    StatementModifier {
1874        /// Statement to conditionally execute
1875        statement: Box<Node>,
1876        /// Modifier keyword: if, unless, while, until, for, foreach
1877        modifier: String,
1878        /// Modifier condition
1879        condition: Box<Node>,
1880    },
1881
1882    // Functions
1883    /// Subroutine declaration (function) including name, prototype, signature and body.
1884    Subroutine {
1885        /// Name of the subroutine
1886        ///
1887        /// # Precise Navigation Support
1888        /// - Added name_span for exact LSP navigation
1889        /// - Enables precise go-to-definition and hover behavior
1890        /// - O(1) span lookup in workspace symbols
1891        ///
1892        /// ## Integration Points
1893        /// - Semantic token providers
1894        /// - Cross-reference generation
1895        /// - Symbol renaming
1896        name: Option<String>,
1897
1898        /// Source location span of the subroutine name
1899        ///
1900        /// ## Usage Notes
1901        /// - Always corresponds to the name field
1902        /// - Provides constant-time position information
1903        /// - Essential for precise editor interactions
1904        name_span: Option<SourceLocation>,
1905
1906        /// Optional prototype node (e.g. `($;@)`).
1907        prototype: Option<Box<Node>>,
1908        /// Optional signature node (Perl 5.20+ feature).
1909        signature: Option<Box<Node>>,
1910        /// Attributes attached to the subroutine (`:lvalue`, etc.).
1911        attributes: Vec<String>,
1912        /// The body block of the subroutine.
1913        body: Box<Node>,
1914    },
1915
1916    /// Subroutine prototype specification: `sub foo ($;@) { ... }`
1917    Prototype {
1918        /// Prototype string defining argument behavior
1919        content: String,
1920    },
1921
1922    /// Subroutine signature (Perl 5.20+): `sub foo ($x, $y = 0) { ... }`
1923    Signature {
1924        /// List of signature parameters
1925        parameters: Vec<Node>,
1926    },
1927
1928    /// Mandatory signature parameter: `$x` in `sub foo ($x) { }`
1929    MandatoryParameter {
1930        /// Variable being bound
1931        variable: Box<Node>,
1932    },
1933
1934    /// Optional signature parameter with default: `$y = 0` in `sub foo ($y = 0) { }`
1935    OptionalParameter {
1936        /// Variable being bound
1937        variable: Box<Node>,
1938        /// Default value expression
1939        default_value: Box<Node>,
1940    },
1941
1942    /// Slurpy parameter collecting remaining args: `@rest` or `%opts` in signature
1943    SlurpyParameter {
1944        /// Array or hash variable to receive remaining arguments
1945        variable: Box<Node>,
1946    },
1947
1948    /// Named parameter placeholder in signature (future Perl feature)
1949    NamedParameter {
1950        /// Variable for named parameter binding
1951        variable: Box<Node>,
1952    },
1953
1954    /// Method declaration (Perl 5.38+ with `use feature 'class'`)
1955    Method {
1956        /// Method name
1957        name: String,
1958        /// Optional signature
1959        signature: Option<Box<Node>>,
1960        /// Method attributes (e.g., `:lvalue`)
1961        attributes: Vec<String>,
1962        /// Method body
1963        body: Box<Node>,
1964    },
1965
1966    /// Return statement: `return;` or `return $value;`
1967    Return {
1968        /// Optional return value
1969        value: Option<Box<Node>>,
1970    },
1971
1972    /// Loop control statement: `next`, `last`, or `redo`
1973    LoopControl {
1974        /// Control keyword: "next", "last", or "redo"
1975        op: String,
1976        /// Optional label: `next LABEL`
1977        label: Option<String>,
1978    },
1979
1980    /// Goto statement: `goto LABEL`, `goto &sub`, or `goto $expr`
1981    Goto {
1982        /// The target of the goto (label identifier, sub reference, or expression)
1983        target: Box<Node>,
1984    },
1985
1986    /// Method call: `$obj->method(@args)` or `$obj->method`
1987    MethodCall {
1988        /// Object or class expression
1989        object: Box<Node>,
1990        /// Method name being called
1991        method: String,
1992        /// Method arguments
1993        args: Vec<Node>,
1994    },
1995
1996    /// Function call: `foo(@args)` or `foo()`
1997    FunctionCall {
1998        /// Function name (may be qualified: `Package::func`)
1999        name: String,
2000        /// Function arguments
2001        args: Vec<Node>,
2002    },
2003
2004    /// Indirect object call (legacy syntax): `new Class @args`
2005    IndirectCall {
2006        /// Method name
2007        method: String,
2008        /// Object or class
2009        object: Box<Node>,
2010        /// Arguments
2011        args: Vec<Node>,
2012    },
2013
2014    /// Regex literal: `/pattern/modifiers` or `qr/pattern/modifiers`
2015    Regex {
2016        /// Regular expression pattern
2017        pattern: String,
2018        /// Replacement string (for s/// when parsed as regex)
2019        replacement: Option<String>,
2020        /// Regex modifiers (i, m, s, x, g, etc.)
2021        modifiers: String,
2022        /// Whether the regex contains embedded code `(?{...})`
2023        has_embedded_code: bool,
2024    },
2025
2026    /// Match operation: `$str =~ /pattern/modifiers` or `$str !~ /pattern/modifiers`
2027    Match {
2028        /// Expression to match against
2029        expr: Box<Node>,
2030        /// Pattern to match
2031        pattern: String,
2032        /// Match modifiers
2033        modifiers: String,
2034        /// Whether the regex contains embedded code `(?{...})`
2035        has_embedded_code: bool,
2036        /// Whether the binding operator was `!~` (negated match)
2037        negated: bool,
2038    },
2039
2040    /// Substitution operation: `$str =~ s/pattern/replacement/modifiers`
2041    Substitution {
2042        /// Expression to substitute in
2043        expr: Box<Node>,
2044        /// Pattern to find
2045        pattern: String,
2046        /// Replacement string
2047        replacement: String,
2048        /// Substitution modifiers (g, e, r, etc.)
2049        modifiers: String,
2050        /// Whether the regex contains embedded code `(?{...})`
2051        has_embedded_code: bool,
2052        /// Whether the binding operator was `!~` (negated match)
2053        negated: bool,
2054    },
2055
2056    /// Transliteration operation: `$str =~ tr/search/replace/` or `y///`
2057    Transliteration {
2058        /// Expression to transliterate
2059        expr: Box<Node>,
2060        /// Characters to search for
2061        search: String,
2062        /// Replacement characters
2063        replace: String,
2064        /// Transliteration modifiers (c, d, s, r)
2065        modifiers: String,
2066        /// Whether the binding operator was `!~` (negated match)
2067        negated: bool,
2068    },
2069
2070    // Package system
2071    /// Package declaration (e.g. `package Foo;`) and optional inline block form.
2072    Package {
2073        /// Name of the package
2074        ///
2075        /// # Precise Navigation Support
2076        /// - Added name_span for exact LSP navigation
2077        /// - Enables precise go-to-definition and hover behavior
2078        /// - O(1) span lookup in workspace symbols
2079        ///
2080        /// ## Integration Points
2081        /// - Workspace indexing
2082        /// - Cross-module symbol resolution
2083        /// - Code action providers
2084        name: String,
2085
2086        /// Source location span of the package name
2087        ///
2088        /// ## Usage Notes
2089        /// - Always corresponds to the name field
2090        /// - Provides constant-time position information
2091        /// - Essential for precise editor interactions
2092        name_span: SourceLocation,
2093
2094        /// Optional inline block for `package Foo { ... }` declarations.
2095        block: Option<Box<Node>>,
2096    },
2097
2098    /// Use statement for module loading: `use Module qw(imports);`
2099    Use {
2100        /// Module name to load
2101        module: String,
2102        /// Import arguments (symbols to import)
2103        args: Vec<String>,
2104        /// Whether this module is a known source filter (security risk)
2105        has_filter_risk: bool,
2106    },
2107
2108    /// No statement for disabling features: `no strict;`
2109    No {
2110        /// Module/pragma name to disable
2111        module: String,
2112        /// Arguments for the no statement
2113        args: Vec<String>,
2114        /// Whether this module is a known source filter (security risk)
2115        has_filter_risk: bool,
2116    },
2117
2118    /// Phase block for compile/runtime hooks: `BEGIN`, `END`, `CHECK`, `INIT`, `UNITCHECK`
2119    PhaseBlock {
2120        /// Phase name: BEGIN, END, CHECK, INIT, UNITCHECK
2121        phase: String,
2122        /// Source location span of the phase block name for precise navigation
2123        phase_span: Option<SourceLocation>,
2124        /// Block to execute during the specified phase
2125        block: Box<Node>,
2126    },
2127
2128    /// Data section marker: `__DATA__` or `__END__`
2129    DataSection {
2130        /// Section marker (__DATA__ or __END__)
2131        marker: String,
2132        /// Content following the marker (if any)
2133        body: Option<String>,
2134    },
2135
2136    /// Class declaration (Perl 5.38+ with `use feature 'class'`)
2137    Class {
2138        /// Class name
2139        name: String,
2140        /// Parent class names from `:isa(Parent)` attributes
2141        parents: Vec<String>,
2142        /// Class body containing methods and attributes
2143        body: Box<Node>,
2144    },
2145
2146    /// Format declaration for legacy report generation
2147    Format {
2148        /// Format name (defaults to filehandle name)
2149        name: String,
2150        /// Format specification body
2151        body: String,
2152    },
2153
2154    /// Bare identifier (bareword or package-qualified name)
2155    Identifier {
2156        /// Identifier string
2157        name: String,
2158    },
2159
2160    /// Parse error placeholder with error message and recovery context
2161    Error {
2162        /// Error description
2163        message: String,
2164        /// Expected token types (if any)
2165        expected: Vec<TokenKind>,
2166        /// The token actually found (if any)
2167        found: Option<Token>,
2168        /// Partial AST node parsed before error (if any)
2169        partial: Option<Box<Node>>,
2170    },
2171
2172    /// Missing expression where one was expected
2173    MissingExpression,
2174    /// Missing statement where one was expected
2175    MissingStatement,
2176    /// Missing identifier where one was expected
2177    MissingIdentifier,
2178    /// Missing block where one was expected
2179    MissingBlock,
2180
2181    /// Lexer budget exceeded marker preserving partial parse results
2182    ///
2183    /// Used when recursion or token limits are hit to preserve already-parsed content.
2184    UnknownRest,
2185}
2186
2187impl NodeKind {
2188    /// Get the name of this `NodeKind` as a static string.
2189    ///
2190    /// Useful for diagnostics, logging, and human-readable AST dumps.
2191    ///
2192    /// # Examples
2193    ///
2194    /// ```
2195    /// use perl_ast::NodeKind;
2196    ///
2197    /// let kind = NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() };
2198    /// assert_eq!(kind.kind_name(), "Variable");
2199    ///
2200    /// let kind = NodeKind::Program { statements: vec![] };
2201    /// assert_eq!(kind.kind_name(), "Program");
2202    /// ```
2203    pub fn kind_name(&self) -> &'static str {
2204        match self {
2205            NodeKind::Program { .. } => "Program",
2206            NodeKind::ExpressionStatement { .. } => "ExpressionStatement",
2207            NodeKind::VariableDeclaration { .. } => "VariableDeclaration",
2208            NodeKind::VariableListDeclaration { .. } => "VariableListDeclaration",
2209            NodeKind::Variable { .. } => "Variable",
2210            NodeKind::VariableWithAttributes { .. } => "VariableWithAttributes",
2211            NodeKind::Assignment { .. } => "Assignment",
2212            NodeKind::Binary { .. } => "Binary",
2213            NodeKind::Ternary { .. } => "Ternary",
2214            NodeKind::Unary { .. } => "Unary",
2215            NodeKind::Diamond => "Diamond",
2216            NodeKind::Ellipsis => "Ellipsis",
2217            NodeKind::Undef => "Undef",
2218            NodeKind::Readline { .. } => "Readline",
2219            NodeKind::Glob { .. } => "Glob",
2220            NodeKind::Typeglob { .. } => "Typeglob",
2221            NodeKind::Number { .. } => "Number",
2222            NodeKind::String { .. } => "String",
2223            NodeKind::Heredoc { .. } => "Heredoc",
2224            NodeKind::ArrayLiteral { .. } => "ArrayLiteral",
2225            NodeKind::HashLiteral { .. } => "HashLiteral",
2226            NodeKind::Block { .. } => "Block",
2227            NodeKind::Eval { .. } => "Eval",
2228            NodeKind::Do { .. } => "Do",
2229            NodeKind::Defer { .. } => "Defer",
2230            NodeKind::Try { .. } => "Try",
2231            NodeKind::If { .. } => "If",
2232            NodeKind::LabeledStatement { .. } => "LabeledStatement",
2233            NodeKind::While { .. } => "While",
2234            NodeKind::Tie { .. } => "Tie",
2235            NodeKind::Untie { .. } => "Untie",
2236            NodeKind::For { .. } => "For",
2237            NodeKind::Foreach { .. } => "Foreach",
2238            NodeKind::Given { .. } => "Given",
2239            NodeKind::When { .. } => "When",
2240            NodeKind::Default { .. } => "Default",
2241            NodeKind::StatementModifier { .. } => "StatementModifier",
2242            NodeKind::Subroutine { .. } => "Subroutine",
2243            NodeKind::Prototype { .. } => "Prototype",
2244            NodeKind::Signature { .. } => "Signature",
2245            NodeKind::MandatoryParameter { .. } => "MandatoryParameter",
2246            NodeKind::OptionalParameter { .. } => "OptionalParameter",
2247            NodeKind::SlurpyParameter { .. } => "SlurpyParameter",
2248            NodeKind::NamedParameter { .. } => "NamedParameter",
2249            NodeKind::Method { .. } => "Method",
2250            NodeKind::Return { .. } => "Return",
2251            NodeKind::LoopControl { .. } => "LoopControl",
2252            NodeKind::Goto { .. } => "Goto",
2253            NodeKind::MethodCall { .. } => "MethodCall",
2254            NodeKind::FunctionCall { .. } => "FunctionCall",
2255            NodeKind::IndirectCall { .. } => "IndirectCall",
2256            NodeKind::Regex { .. } => "Regex",
2257            NodeKind::Match { .. } => "Match",
2258            NodeKind::Substitution { .. } => "Substitution",
2259            NodeKind::Transliteration { .. } => "Transliteration",
2260            NodeKind::Package { .. } => "Package",
2261            NodeKind::Use { .. } => "Use",
2262            NodeKind::No { .. } => "No",
2263            NodeKind::PhaseBlock { .. } => "PhaseBlock",
2264            NodeKind::DataSection { .. } => "DataSection",
2265            NodeKind::Class { .. } => "Class",
2266            NodeKind::Format { .. } => "Format",
2267            NodeKind::Identifier { .. } => "Identifier",
2268            NodeKind::Error { .. } => "Error",
2269            NodeKind::MissingExpression => "MissingExpression",
2270            NodeKind::MissingStatement => "MissingStatement",
2271            NodeKind::MissingIdentifier => "MissingIdentifier",
2272            NodeKind::MissingBlock => "MissingBlock",
2273            NodeKind::UnknownRest => "UnknownRest",
2274        }
2275    }
2276
2277    /// Canonical list of **all** `kind_name()` strings, in alphabetical order.
2278    ///
2279    /// Every consumer that needs the full set of NodeKind names should reference
2280    /// this constant instead of maintaining a hand-written copy.
2281    pub const ALL_KIND_NAMES: &[&'static str] = &[
2282        "ArrayLiteral",
2283        "Assignment",
2284        "Binary",
2285        "Block",
2286        "Class",
2287        "DataSection",
2288        "Default",
2289        "Defer",
2290        "Diamond",
2291        "Do",
2292        "Ellipsis",
2293        "Error",
2294        "Eval",
2295        "ExpressionStatement",
2296        "For",
2297        "Foreach",
2298        "Format",
2299        "FunctionCall",
2300        "Given",
2301        "Glob",
2302        "Goto",
2303        "HashLiteral",
2304        "Heredoc",
2305        "Identifier",
2306        "If",
2307        "IndirectCall",
2308        "LabeledStatement",
2309        "LoopControl",
2310        "MandatoryParameter",
2311        "Match",
2312        "Method",
2313        "MethodCall",
2314        "MissingBlock",
2315        "MissingExpression",
2316        "MissingIdentifier",
2317        "MissingStatement",
2318        "NamedParameter",
2319        "No",
2320        "Number",
2321        "OptionalParameter",
2322        "Package",
2323        "PhaseBlock",
2324        "Program",
2325        "Prototype",
2326        "Readline",
2327        "Regex",
2328        "Return",
2329        "Signature",
2330        "SlurpyParameter",
2331        "StatementModifier",
2332        "String",
2333        "Subroutine",
2334        "Substitution",
2335        "Ternary",
2336        "Tie",
2337        "Transliteration",
2338        "Try",
2339        "Typeglob",
2340        "Unary",
2341        "Undef",
2342        "UnknownRest",
2343        "Untie",
2344        "Use",
2345        "Variable",
2346        "VariableDeclaration",
2347        "VariableListDeclaration",
2348        "VariableWithAttributes",
2349        "When",
2350        "While",
2351    ];
2352
2353    /// Subset of `ALL_KIND_NAMES` that represent synthetic/recovery nodes.
2354    ///
2355    /// These kinds are only produced by `parse_with_recovery()` on malformed
2356    /// input and should not be expected in clean parses.
2357    pub const RECOVERY_KIND_NAMES: &[&'static str] = &[
2358        "Error",
2359        "MissingBlock",
2360        "MissingExpression",
2361        "MissingIdentifier",
2362        "MissingStatement",
2363        "UnknownRest",
2364    ];
2365}
2366
2367impl fmt::Display for NodeKind {
2368    /// Formats as the canonical `kind_name()` string.
2369    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2370        f.write_str(self.kind_name())
2371    }
2372}
2373
2374impl fmt::Display for Node {
2375    /// Formats as the tree-sitter compatible S-expression.
2376    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2377        f.write_str(&self.to_sexp())
2378    }
2379}
2380
2381/// Format unary operator for S-expression output
2382fn format_unary_operator(op: &str) -> String {
2383    match op {
2384        // Arithmetic unary operators
2385        "+" => "unary_+".to_string(),
2386        "-" => "unary_-".to_string(),
2387
2388        // Logical unary operators
2389        "!" => "unary_not".to_string(),
2390        "not" => "unary_not".to_string(),
2391
2392        // Bitwise complement
2393        "~" => "unary_complement".to_string(),
2394
2395        // Reference operator
2396        "\\" => "unary_ref".to_string(),
2397
2398        // Postfix operators
2399        "++" => "unary_++".to_string(),
2400        "--" => "unary_--".to_string(),
2401
2402        // File test operators
2403        "-f" => "unary_-f".to_string(),
2404        "-d" => "unary_-d".to_string(),
2405        "-e" => "unary_-e".to_string(),
2406        "-r" => "unary_-r".to_string(),
2407        "-w" => "unary_-w".to_string(),
2408        "-x" => "unary_-x".to_string(),
2409        "-o" => "unary_-o".to_string(),
2410        "-R" => "unary_-R".to_string(),
2411        "-W" => "unary_-W".to_string(),
2412        "-X" => "unary_-X".to_string(),
2413        "-O" => "unary_-O".to_string(),
2414        "-s" => "unary_-s".to_string(),
2415        "-p" => "unary_-p".to_string(),
2416        "-S" => "unary_-S".to_string(),
2417        "-b" => "unary_-b".to_string(),
2418        "-c" => "unary_-c".to_string(),
2419        "-t" => "unary_-t".to_string(),
2420        "-u" => "unary_-u".to_string(),
2421        "-g" => "unary_-g".to_string(),
2422        "-k" => "unary_-k".to_string(),
2423        "-T" => "unary_-T".to_string(),
2424        "-B" => "unary_-B".to_string(),
2425        "-M" => "unary_-M".to_string(),
2426        "-A" => "unary_-A".to_string(),
2427        "-C" => "unary_-C".to_string(),
2428        "-l" => "unary_-l".to_string(),
2429        "-z" => "unary_-z".to_string(),
2430
2431        // Postfix dereferencing
2432        "->@*" => "unary_->@*".to_string(),
2433        "->%*" => "unary_->%*".to_string(),
2434        "->$*" => "unary_->$*".to_string(),
2435        "->&*" => "unary_->&*".to_string(),
2436        "->**" => "unary_->**".to_string(),
2437
2438        // Defined operator
2439        "defined" => "unary_defined".to_string(),
2440
2441        // Default case for unknown operators
2442        _ => format!("unary_{}", op.replace(' ', "_")),
2443    }
2444}
2445
2446/// Format binary operator for S-expression output
2447fn format_binary_operator(op: &str) -> String {
2448    match op {
2449        // Arithmetic operators
2450        "+" => "binary_+".to_string(),
2451        "-" => "binary_-".to_string(),
2452        "*" => "binary_*".to_string(),
2453        "/" => "binary_/".to_string(),
2454        "%" => "binary_%".to_string(),
2455        "**" => "binary_**".to_string(),
2456
2457        // Comparison operators
2458        "==" => "binary_==".to_string(),
2459        "!=" => "binary_!=".to_string(),
2460        "<" => "binary_<".to_string(),
2461        ">" => "binary_>".to_string(),
2462        "<=" => "binary_<=".to_string(),
2463        ">=" => "binary_>=".to_string(),
2464        "<=>" => "binary_<=>".to_string(),
2465
2466        // String comparison
2467        "eq" => "binary_eq".to_string(),
2468        "ne" => "binary_ne".to_string(),
2469        "lt" => "binary_lt".to_string(),
2470        "le" => "binary_le".to_string(),
2471        "gt" => "binary_gt".to_string(),
2472        "ge" => "binary_ge".to_string(),
2473        "cmp" => "binary_cmp".to_string(),
2474
2475        // Logical operators
2476        "&&" => "binary_&&".to_string(),
2477        "||" => "binary_||".to_string(),
2478        "and" => "binary_and".to_string(),
2479        "or" => "binary_or".to_string(),
2480        "xor" => "binary_xor".to_string(),
2481
2482        // Bitwise operators
2483        "&" => "binary_&".to_string(),
2484        "|" => "binary_|".to_string(),
2485        "^" => "binary_^".to_string(),
2486        "<<" => "binary_<<".to_string(),
2487        ">>" => "binary_>>".to_string(),
2488
2489        // Pattern matching
2490        "=~" => "binary_=~".to_string(),
2491        "!~" => "binary_!~".to_string(),
2492
2493        // Smart match
2494        "~~" => "binary_~~".to_string(),
2495
2496        // String repetition
2497        "x" => "binary_x".to_string(),
2498
2499        // Concatenation
2500        "." => "binary_.".to_string(),
2501
2502        // Range operators
2503        ".." => "binary_..".to_string(),
2504        "..." => "binary_...".to_string(),
2505
2506        // Type checking
2507        "isa" => "binary_isa".to_string(),
2508
2509        // Assignment operators
2510        "=" => "binary_=".to_string(),
2511        "+=" => "binary_+=".to_string(),
2512        "-=" => "binary_-=".to_string(),
2513        "*=" => "binary_*=".to_string(),
2514        "/=" => "binary_/=".to_string(),
2515        "%=" => "binary_%=".to_string(),
2516        "**=" => "binary_**=".to_string(),
2517        ".=" => "binary_.=".to_string(),
2518        "&=" => "binary_&=".to_string(),
2519        "|=" => "binary_|=".to_string(),
2520        "^=" => "binary_^=".to_string(),
2521        "<<=" => "binary_<<=".to_string(),
2522        ">>=" => "binary_>>=".to_string(),
2523        "&&=" => "binary_&&=".to_string(),
2524        "||=" => "binary_||=".to_string(),
2525        "//=" => "binary_//=".to_string(),
2526
2527        // Defined-or operator
2528        "//" => "binary_//".to_string(),
2529
2530        // Method calls and dereferencing
2531        "->" => "binary_->".to_string(),
2532
2533        // Hash/array access
2534        "{}" => "binary_{}".to_string(),
2535        "[]" => "binary_[]".to_string(),
2536
2537        // Arrow hash/array dereference
2538        "->{}" => "arrow_hash_deref".to_string(),
2539        "->[]" => "arrow_array_deref".to_string(),
2540
2541        // Default case for unknown operators
2542        _ => format!("binary_{}", op.replace(' ', "_")),
2543    }
2544}
2545
2546// SourceLocation is now provided by perl-position-tracking crate
2547// See the re-export at the top of this file
2548
2549#[cfg(test)]
2550mod tests {
2551    use super::*;
2552    use std::collections::BTreeSet;
2553
2554    /// Build a dummy instance for every `NodeKind` variant and return its
2555    /// `kind_name()`.  This ensures the compiler forces us to update here
2556    /// whenever a variant is added/removed.
2557    fn all_kind_names_from_variants() -> BTreeSet<&'static str> {
2558        let loc = SourceLocation { start: 0, end: 0 };
2559        let dummy_node = || Node::new(NodeKind::Undef, loc);
2560
2561        let variants: Vec<NodeKind> = vec![
2562            NodeKind::Program { statements: vec![] },
2563            NodeKind::ExpressionStatement { expression: Box::new(dummy_node()) },
2564            NodeKind::VariableDeclaration {
2565                declarator: String::new(),
2566                variable: Box::new(dummy_node()),
2567                attributes: vec![],
2568                initializer: None,
2569            },
2570            NodeKind::VariableListDeclaration {
2571                declarator: String::new(),
2572                variables: vec![],
2573                attributes: vec![],
2574                initializer: None,
2575            },
2576            NodeKind::Variable { sigil: String::new(), name: String::new() },
2577            NodeKind::VariableWithAttributes {
2578                variable: Box::new(dummy_node()),
2579                attributes: vec![],
2580            },
2581            NodeKind::Assignment {
2582                lhs: Box::new(dummy_node()),
2583                rhs: Box::new(dummy_node()),
2584                op: String::new(),
2585            },
2586            NodeKind::Binary {
2587                op: String::new(),
2588                left: Box::new(dummy_node()),
2589                right: Box::new(dummy_node()),
2590            },
2591            NodeKind::Ternary {
2592                condition: Box::new(dummy_node()),
2593                then_expr: Box::new(dummy_node()),
2594                else_expr: Box::new(dummy_node()),
2595            },
2596            NodeKind::Unary { op: String::new(), operand: Box::new(dummy_node()) },
2597            NodeKind::Diamond,
2598            NodeKind::Ellipsis,
2599            NodeKind::Undef,
2600            NodeKind::Readline { filehandle: None },
2601            NodeKind::Glob { pattern: String::new() },
2602            NodeKind::Typeglob { name: String::new() },
2603            NodeKind::Number { value: String::new() },
2604            NodeKind::String { value: String::new(), interpolated: false },
2605            NodeKind::Heredoc {
2606                delimiter: String::new(),
2607                content: String::new(),
2608                interpolated: false,
2609                indented: false,
2610                command: false,
2611                body_span: None,
2612            },
2613            NodeKind::ArrayLiteral { elements: vec![] },
2614            NodeKind::HashLiteral { pairs: vec![] },
2615            NodeKind::Block { statements: vec![] },
2616            NodeKind::Eval { block: Box::new(dummy_node()) },
2617            NodeKind::Do { block: Box::new(dummy_node()) },
2618            NodeKind::Defer { block: Box::new(dummy_node()) },
2619            NodeKind::Try {
2620                body: Box::new(dummy_node()),
2621                catch_blocks: vec![],
2622                finally_block: None,
2623            },
2624            NodeKind::If {
2625                condition: Box::new(dummy_node()),
2626                then_branch: Box::new(dummy_node()),
2627                elsif_branches: vec![],
2628                else_branch: None,
2629                keyword: None,
2630            },
2631            NodeKind::LabeledStatement { label: String::new(), statement: Box::new(dummy_node()) },
2632            NodeKind::While {
2633                condition: Box::new(dummy_node()),
2634                body: Box::new(dummy_node()),
2635                continue_block: None,
2636                keyword: None,
2637            },
2638            NodeKind::Tie {
2639                variable: Box::new(dummy_node()),
2640                package: Box::new(dummy_node()),
2641                args: vec![],
2642            },
2643            NodeKind::Untie { variable: Box::new(dummy_node()) },
2644            NodeKind::For {
2645                init: None,
2646                condition: None,
2647                update: None,
2648                body: Box::new(dummy_node()),
2649                continue_block: None,
2650            },
2651            NodeKind::Foreach {
2652                variable: Box::new(dummy_node()),
2653                list: Box::new(dummy_node()),
2654                body: Box::new(dummy_node()),
2655                continue_block: None,
2656            },
2657            NodeKind::Given { expr: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2658            NodeKind::When { condition: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2659            NodeKind::Default { body: Box::new(dummy_node()) },
2660            NodeKind::StatementModifier {
2661                statement: Box::new(dummy_node()),
2662                modifier: String::new(),
2663                condition: Box::new(dummy_node()),
2664            },
2665            NodeKind::Subroutine {
2666                name: None,
2667                name_span: None,
2668                prototype: None,
2669                signature: None,
2670                attributes: vec![],
2671                body: Box::new(dummy_node()),
2672            },
2673            NodeKind::Prototype { content: String::new() },
2674            NodeKind::Signature { parameters: vec![] },
2675            NodeKind::MandatoryParameter { variable: Box::new(dummy_node()) },
2676            NodeKind::OptionalParameter {
2677                variable: Box::new(dummy_node()),
2678                default_value: Box::new(dummy_node()),
2679            },
2680            NodeKind::SlurpyParameter { variable: Box::new(dummy_node()) },
2681            NodeKind::NamedParameter { variable: Box::new(dummy_node()) },
2682            NodeKind::Method {
2683                name: String::new(),
2684                signature: None,
2685                attributes: vec![],
2686                body: Box::new(dummy_node()),
2687            },
2688            NodeKind::Return { value: None },
2689            NodeKind::LoopControl { op: String::new(), label: None },
2690            NodeKind::Goto { target: Box::new(dummy_node()) },
2691            NodeKind::MethodCall {
2692                object: Box::new(dummy_node()),
2693                method: String::new(),
2694                args: vec![],
2695            },
2696            NodeKind::FunctionCall { name: String::new(), args: vec![] },
2697            NodeKind::IndirectCall {
2698                method: String::new(),
2699                object: Box::new(dummy_node()),
2700                args: vec![],
2701            },
2702            NodeKind::Regex {
2703                pattern: String::new(),
2704                replacement: None,
2705                modifiers: String::new(),
2706                has_embedded_code: false,
2707            },
2708            NodeKind::Match {
2709                expr: Box::new(dummy_node()),
2710                pattern: String::new(),
2711                modifiers: String::new(),
2712                has_embedded_code: false,
2713                negated: false,
2714            },
2715            NodeKind::Substitution {
2716                expr: Box::new(dummy_node()),
2717                pattern: String::new(),
2718                replacement: String::new(),
2719                modifiers: String::new(),
2720                has_embedded_code: false,
2721                negated: false,
2722            },
2723            NodeKind::Transliteration {
2724                expr: Box::new(dummy_node()),
2725                search: String::new(),
2726                replace: String::new(),
2727                modifiers: String::new(),
2728                negated: false,
2729            },
2730            NodeKind::Package { name: String::new(), name_span: loc, block: None },
2731            NodeKind::Use { module: String::new(), args: vec![], has_filter_risk: false },
2732            NodeKind::No { module: String::new(), args: vec![], has_filter_risk: false },
2733            NodeKind::PhaseBlock {
2734                phase: String::new(),
2735                phase_span: None,
2736                block: Box::new(dummy_node()),
2737            },
2738            NodeKind::DataSection { marker: String::new(), body: None },
2739            NodeKind::Class { name: String::new(), parents: vec![], body: Box::new(dummy_node()) },
2740            NodeKind::Format { name: String::new(), body: String::new() },
2741            NodeKind::Identifier { name: String::new() },
2742            NodeKind::Error {
2743                message: String::new(),
2744                expected: vec![],
2745                found: None,
2746                partial: None,
2747            },
2748            NodeKind::MissingExpression,
2749            NodeKind::MissingStatement,
2750            NodeKind::MissingIdentifier,
2751            NodeKind::MissingBlock,
2752            NodeKind::UnknownRest,
2753        ];
2754
2755        variants.iter().map(|v| v.kind_name()).collect()
2756    }
2757
2758    #[test]
2759    fn all_kind_names_is_consistent_with_kind_name() {
2760        let from_enum = all_kind_names_from_variants();
2761        let from_const: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2762
2763        // Check for duplicates in the const array
2764        assert_eq!(
2765            NodeKind::ALL_KIND_NAMES.len(),
2766            from_const.len(),
2767            "ALL_KIND_NAMES contains duplicates"
2768        );
2769
2770        let only_in_enum: Vec<_> = from_enum.difference(&from_const).collect();
2771        let only_in_const: Vec<_> = from_const.difference(&from_enum).collect();
2772
2773        assert!(
2774            only_in_enum.is_empty() && only_in_const.is_empty(),
2775            "ALL_KIND_NAMES is out of sync with NodeKind variants:\n  \
2776             in enum but not in ALL_KIND_NAMES: {only_in_enum:?}\n  \
2777             in ALL_KIND_NAMES but not in enum: {only_in_const:?}"
2778        );
2779    }
2780
2781    #[test]
2782    fn recovery_kind_names_is_subset_of_all() {
2783        let all: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2784        let recovery: BTreeSet<&str> = NodeKind::RECOVERY_KIND_NAMES.iter().copied().collect();
2785
2786        // No duplicates
2787        assert_eq!(
2788            NodeKind::RECOVERY_KIND_NAMES.len(),
2789            recovery.len(),
2790            "RECOVERY_KIND_NAMES contains duplicates"
2791        );
2792
2793        let not_in_all: Vec<_> = recovery.difference(&all).collect();
2794        assert!(
2795            not_in_all.is_empty(),
2796            "RECOVERY_KIND_NAMES contains entries not in ALL_KIND_NAMES: {not_in_all:?}"
2797        );
2798    }
2799}