Skip to main content

perl_ast/
ast.rs

1//! Abstract Syntax Tree definitions for Perl within the parsing and LSP workflow.
2//!
3//! This module defines the comprehensive AST node types that represent parsed Perl code
4//! during the Parse → Index → Navigate → Complete → Analyze stages. The design is optimized
5//! for both direct use in Rust analysis and for generating tree-sitter compatible
6//! S-expressions during large workspace processing operations.
7//!
8//! # LSP Workflow Integration
9//!
10//! The AST structures support Perl tooling workflows by:
11//! - **Parse**: Produced by the parser as the canonical syntax tree
12//! - **Index**: Traversed to build symbol and reference tables
13//! - **Navigate**: Provides locations for definition and reference lookups
14//! - **Complete**: Supplies context for completion, hover, and signature help
15//! - **Analyze**: Feeds semantic analysis, diagnostics, and refactoring
16//!
17//! # Performance Characteristics
18//!
19//! AST structures are optimized for large codebases with:
20//! - Memory-efficient node representation using `Box<Node>` for recursive structures
21//! - Fast pattern matching via enum variants for common Perl constructs
22//! - Location tracking for precise error reporting in large files
23//! - Cheap cloning for parallel analysis tasks
24//!
25//! # Usage Examples
26//!
27//! ## Basic AST Construction
28//!
29//! ```rust
30//! use perl_ast::{Node, NodeKind, SourceLocation};
31//!
32//! // Create a simple variable declaration node
33//! let location = SourceLocation { start: 0, end: 10 };
34//! let node = Node::new(
35//!     NodeKind::VariableDeclaration {
36//!         declarator: "my".to_string(),
37//!         variable: Box::new(Node::new(
38//!             NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
39//!             location,
40//!         )),
41//!         attributes: vec![],
42//!         initializer: None,
43//!     },
44//!     location,
45//! );
46//! assert_eq!(node.kind.kind_name(), "VariableDeclaration");
47//! ```
48//!
49//! ## Tree-sitter S-expression Generation
50//!
51//! ```rust
52//! use perl_ast::{Node, NodeKind, SourceLocation};
53//!
54//! let loc = SourceLocation { start: 0, end: 2 };
55//! let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
56//! let program = Node::new(NodeKind::Program { statements: vec![num] }, loc);
57//!
58//! let sexp = program.to_sexp();
59//! assert!(sexp.starts_with("(source_file"));
60//! ```
61//!
62//! ## AST Traversal and Analysis
63//!
64//! ```rust
65//! use perl_ast::{Node, NodeKind, SourceLocation};
66//!
67//! fn count_variables(node: &Node) -> usize {
68//!     let mut count = 0;
69//!     match &node.kind {
70//!         NodeKind::Variable { .. } => count += 1,
71//!         NodeKind::Program { statements } => {
72//!             for stmt in statements {
73//!                 count += count_variables(stmt);
74//!             }
75//!         }
76//!         _ => {} // Handle other node types as needed
77//!     }
78//!     count
79//! }
80//!
81//! let loc = SourceLocation { start: 0, end: 5 };
82//! let var = Node::new(
83//!     NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
84//!     loc,
85//! );
86//! let program = Node::new(NodeKind::Program { statements: vec![var] }, loc);
87//! assert_eq!(count_variables(&program), 1);
88//! ```
89//!
90//! ## Parsing Integration
91//!
92//! In practice the AST is produced by the parser rather than built by hand
93//! (requires `perl-parser-core`):
94//!
95//! ```rust,ignore
96//! use perl_parser_core::Parser;
97//! use perl_ast::NodeKind;
98//!
99//! let mut parser = Parser::new("my $x = 42;");
100//! let ast = parser.parse().expect("should parse");
101//! assert!(matches!(ast.kind, NodeKind::Program { .. }));
102//! ```
103
104// Re-export SourceLocation from perl-position-tracking for unified span handling
105pub use perl_position_tracking::SourceLocation;
106// Re-export Token and TokenKind from perl-token for AST error nodes
107pub use perl_token::{Token, TokenKind};
108use std::fmt;
109
110/// Core AST node representing any Perl language construct within parsing workflows.
111///
112/// This is the fundamental building block for representing parsed Perl code. Each node
113/// contains both the semantic information (kind) and positional information (location)
114/// necessary for comprehensive script analysis.
115///
116/// # LSP Workflow Role
117///
118/// Nodes flow through tooling stages:
119/// - **Parse**: Created by the parser as it builds the syntax tree
120/// - **Index**: Visited to build symbol and reference tables
121/// - **Navigate**: Used to resolve definitions, references, and call hierarchy
122/// - **Complete**: Provides contextual information for completion and hover
123/// - **Analyze**: Drives semantic analysis and diagnostics
124///
125/// # Memory Optimization
126///
127/// The structure is designed for efficient memory usage during large-scale parsing:
128/// - `SourceLocation` uses compact position encoding for large files
129/// - `NodeKind` enum variants minimize memory overhead for common constructs
130/// - Clone operations are optimized for shared analysis workflows
131///
132/// # Examples
133///
134/// Construct a variable declaration node manually:
135///
136/// ```
137/// use perl_ast::{Node, NodeKind, SourceLocation};
138///
139/// let loc = SourceLocation { start: 0, end: 11 };
140/// let var = Node::new(
141///     NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
142///     loc,
143/// );
144/// let decl = Node::new(
145///     NodeKind::VariableDeclaration {
146///         declarator: "my".to_string(),
147///         variable: Box::new(var),
148///         attributes: vec![],
149///         initializer: None,
150///     },
151///     loc,
152/// );
153/// assert_eq!(decl.kind.kind_name(), "VariableDeclaration");
154/// ```
155///
156/// Typically you obtain nodes from the parser rather than constructing them by hand:
157///
158/// ```ignore
159/// use perl_parser::Parser;
160///
161/// let mut parser = Parser::new("my $x = 42;");
162/// let ast = parser.parse()?;
163/// println!("AST: {}", ast.to_sexp());
164/// ```
165#[derive(Debug, Clone, PartialEq)]
166pub struct Node {
167    /// The specific type and semantic content of this AST node
168    pub kind: NodeKind,
169    /// Source position information for error reporting and code navigation
170    pub location: SourceLocation,
171}
172
173impl Node {
174    /// Create a new AST node with the given kind and source location.
175    ///
176    /// # Examples
177    ///
178    /// ```
179    /// use perl_ast::{Node, NodeKind, SourceLocation};
180    ///
181    /// let node = Node::new(
182    ///     NodeKind::Number { value: "42".to_string() },
183    ///     SourceLocation { start: 0, end: 2 },
184    /// );
185    /// assert_eq!(node.kind.kind_name(), "Number");
186    /// assert_eq!(node.location.start, 0);
187    /// ```
188    pub fn new(kind: NodeKind, location: SourceLocation) -> Self {
189        Node { kind, location }
190    }
191
192    /// Convert the AST to a tree-sitter compatible S-expression.
193    ///
194    /// Produces a parenthesized representation compatible with tree-sitter's
195    /// S-expression format, useful for debugging and snapshot testing.
196    ///
197    /// # Examples
198    ///
199    /// ```
200    /// use perl_ast::{Node, NodeKind, SourceLocation};
201    ///
202    /// let loc = SourceLocation { start: 0, end: 2 };
203    /// let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
204    /// let program = Node::new(
205    ///     NodeKind::Program { statements: vec![num] },
206    ///     loc,
207    /// );
208    /// let sexp = program.to_sexp();
209    /// assert!(sexp.starts_with("(source_file"));
210    /// ```
211    pub fn to_sexp(&self) -> String {
212        match &self.kind {
213            NodeKind::Program { statements } => {
214                let stmts =
215                    statements.iter().map(|s| s.to_sexp_inner()).collect::<Vec<_>>().join(" ");
216                format!("(source_file {})", stmts)
217            }
218
219            NodeKind::ExpressionStatement { expression } => {
220                format!("(expression_statement {})", expression.to_sexp())
221            }
222
223            NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
224                let attrs_str = if attributes.is_empty() {
225                    String::new()
226                } else {
227                    format!(" (attributes {})", attributes.join(" "))
228                };
229                if let Some(init) = initializer {
230                    format!(
231                        "({}_declaration {}{}{})",
232                        declarator,
233                        variable.to_sexp(),
234                        attrs_str,
235                        init.to_sexp()
236                    )
237                } else {
238                    format!("({}_declaration {}{})", declarator, variable.to_sexp(), attrs_str)
239                }
240            }
241
242            NodeKind::VariableListDeclaration {
243                declarator,
244                variables,
245                attributes,
246                initializer,
247            } => {
248                let vars = variables.iter().map(|v| v.to_sexp()).collect::<Vec<_>>().join(" ");
249                let attrs_str = if attributes.is_empty() {
250                    String::new()
251                } else {
252                    format!(" (attributes {})", attributes.join(" "))
253                };
254                if let Some(init) = initializer {
255                    format!(
256                        "({}_declaration ({}){}{})",
257                        declarator,
258                        vars,
259                        attrs_str,
260                        init.to_sexp()
261                    )
262                } else {
263                    format!("({}_declaration ({}){})", declarator, vars, attrs_str)
264                }
265            }
266
267            NodeKind::Variable { sigil, name } => {
268                // Format expected by bless parsing tests: (variable $ name)
269                format!("(variable {} {})", sigil, name)
270            }
271
272            NodeKind::VariableWithAttributes { variable, attributes } => {
273                let attrs = attributes.join(" ");
274                format!("({} (attributes {}))", variable.to_sexp(), attrs)
275            }
276
277            NodeKind::Assignment { lhs, rhs, op } => {
278                format!(
279                    "(assignment_{} {} {})",
280                    op.replace("=", "assign"),
281                    lhs.to_sexp(),
282                    rhs.to_sexp()
283                )
284            }
285
286            NodeKind::Binary { op, left, right } => {
287                // Tree-sitter format: (binary_op left right)
288                let op_name = format_binary_operator(op);
289                format!("({} {} {})", op_name, left.to_sexp(), right.to_sexp())
290            }
291
292            NodeKind::Ternary { condition, then_expr, else_expr } => {
293                format!(
294                    "(ternary {} {} {})",
295                    condition.to_sexp(),
296                    then_expr.to_sexp(),
297                    else_expr.to_sexp()
298                )
299            }
300
301            NodeKind::Unary { op, operand } => {
302                // Tree-sitter format: (unary_op operand)
303                let op_name = format_unary_operator(op);
304                format!("({} {})", op_name, operand.to_sexp())
305            }
306
307            NodeKind::Diamond => "(diamond)".to_string(),
308
309            NodeKind::Ellipsis => "(ellipsis)".to_string(),
310
311            NodeKind::Undef => "(undef)".to_string(),
312
313            NodeKind::Readline { filehandle } => {
314                if let Some(fh) = filehandle {
315                    format!("(readline {})", fh)
316                } else {
317                    "(readline)".to_string()
318                }
319            }
320
321            NodeKind::Glob { pattern } => {
322                format!("(glob {})", pattern)
323            }
324            NodeKind::Typeglob { name } => {
325                format!("(typeglob {})", name)
326            }
327
328            NodeKind::Number { value } => {
329                // Format expected by bless parsing tests: (number value)
330                format!("(number {})", value)
331            }
332
333            NodeKind::String { value, interpolated } => {
334                // Escape quotes in string value to prevent S-expression parsing issues
335                let escaped_value = value.replace('\\', "\\\\").replace('"', "\\\"");
336
337                // Format based on interpolation status
338                if *interpolated {
339                    format!("(string_interpolated \"{}\")", escaped_value)
340                } else {
341                    format!("(string \"{}\")", escaped_value)
342                }
343            }
344
345            NodeKind::Heredoc { delimiter, content, interpolated, indented, command, .. } => {
346                let type_str = if *command {
347                    "heredoc_command"
348                } else if *indented {
349                    if *interpolated { "heredoc_indented_interpolated" } else { "heredoc_indented" }
350                } else if *interpolated {
351                    "heredoc_interpolated"
352                } else {
353                    "heredoc"
354                };
355                format!("({} {:?} {:?})", type_str, delimiter, content)
356            }
357
358            NodeKind::ArrayLiteral { elements } => {
359                let elems = elements.iter().map(|e| e.to_sexp()).collect::<Vec<_>>().join(" ");
360                format!("(array {})", elems)
361            }
362
363            NodeKind::HashLiteral { pairs } => {
364                let kvs = pairs
365                    .iter()
366                    .map(|(k, v)| format!("({} {})", k.to_sexp(), v.to_sexp()))
367                    .collect::<Vec<_>>()
368                    .join(" ");
369                format!("(hash {})", kvs)
370            }
371
372            NodeKind::Block { statements } => {
373                let stmts = statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ");
374                format!("(block {})", stmts)
375            }
376
377            NodeKind::Eval { block } => {
378                format!("(eval {})", block.to_sexp())
379            }
380
381            NodeKind::Do { block } => {
382                format!("(do {})", block.to_sexp())
383            }
384
385            NodeKind::Defer { block } => {
386                format!("(defer {})", block.to_sexp())
387            }
388
389            NodeKind::Try { body, catch_blocks, finally_block } => {
390                let mut parts = vec![format!("(try {})", body.to_sexp())];
391
392                for (var, block) in catch_blocks {
393                    if let Some(v) = var {
394                        parts.push(format!("(catch {} {})", v, block.to_sexp()));
395                    } else {
396                        parts.push(format!("(catch {})", block.to_sexp()));
397                    }
398                }
399
400                if let Some(finally) = finally_block {
401                    parts.push(format!("(finally {})", finally.to_sexp()));
402                }
403
404                parts.join(" ")
405            }
406
407            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
408                let mut parts =
409                    vec![format!("(if {} {})", condition.to_sexp(), then_branch.to_sexp())];
410
411                for (cond, block) in elsif_branches {
412                    parts.push(format!("(elsif {} {})", cond.to_sexp(), block.to_sexp()));
413                }
414
415                if let Some(else_block) = else_branch {
416                    parts.push(format!("(else {})", else_block.to_sexp()));
417                }
418
419                parts.join(" ")
420            }
421
422            NodeKind::LabeledStatement { label, statement } => {
423                format!("(labeled_statement {} {})", label, statement.to_sexp())
424            }
425
426            NodeKind::While { condition, body, continue_block } => {
427                let mut s = format!("(while {} {})", condition.to_sexp(), body.to_sexp());
428                if let Some(cont) = continue_block {
429                    s.push_str(&format!(" (continue {})", cont.to_sexp()));
430                }
431                s
432            }
433            NodeKind::Tie { variable, package, args } => {
434                let mut s = format!("(tie {} {}", variable.to_sexp(), package.to_sexp());
435                for arg in args {
436                    s.push_str(&format!(" {}", arg.to_sexp()));
437                }
438                s.push(')');
439                s
440            }
441            NodeKind::Untie { variable } => {
442                format!("(untie {})", variable.to_sexp())
443            }
444            NodeKind::For { init, condition, update, body, continue_block } => {
445                let init_str =
446                    init.as_ref().map(|i| i.to_sexp()).unwrap_or_else(|| "()".to_string());
447                let cond_str =
448                    condition.as_ref().map(|c| c.to_sexp()).unwrap_or_else(|| "()".to_string());
449                let update_str =
450                    update.as_ref().map(|u| u.to_sexp()).unwrap_or_else(|| "()".to_string());
451                let mut result =
452                    format!("(for {} {} {} {})", init_str, cond_str, update_str, body.to_sexp());
453                if let Some(cont) = continue_block {
454                    result.push_str(&format!(" (continue {})", cont.to_sexp()));
455                }
456                result
457            }
458
459            NodeKind::Foreach { variable, list, body, continue_block } => {
460                let cont = if let Some(cb) = continue_block {
461                    format!(" {}", cb.to_sexp())
462                } else {
463                    String::new()
464                };
465                format!(
466                    "(foreach {} {} {}{})",
467                    variable.to_sexp(),
468                    list.to_sexp(),
469                    body.to_sexp(),
470                    cont
471                )
472            }
473
474            NodeKind::Given { expr, body } => {
475                format!("(given {} {})", expr.to_sexp(), body.to_sexp())
476            }
477
478            NodeKind::When { condition, body } => {
479                format!("(when {} {})", condition.to_sexp(), body.to_sexp())
480            }
481
482            NodeKind::Default { body } => {
483                format!("(default {})", body.to_sexp())
484            }
485
486            NodeKind::StatementModifier { statement, modifier, condition } => {
487                format!(
488                    "(statement_modifier_{} {} {})",
489                    modifier,
490                    statement.to_sexp(),
491                    condition.to_sexp()
492                )
493            }
494
495            NodeKind::Subroutine { name, prototype, signature, attributes, body, name_span: _ } => {
496                if let Some(sub_name) = name {
497                    // Named subroutine - bless test expected format: (sub name () block)
498                    let mut parts = vec![sub_name.clone()];
499
500                    // Add attributes if present (before prototype/signature)
501                    if !attributes.is_empty() {
502                        for attr in attributes {
503                            parts.push(format!(":{}", attr));
504                        }
505                    }
506
507                    // Add prototype/signature - use () for empty prototype
508                    if let Some(proto) = prototype {
509                        parts.push(format!("({})", proto.to_sexp()));
510                    } else if signature.is_some() {
511                        // If there's a signature but no prototype, still show ()
512                        parts.push("()".to_string());
513                    } else {
514                        parts.push("()".to_string());
515                    }
516
517                    // Add body
518                    parts.push(body.to_sexp());
519
520                    // Format: (sub name [attrs...] ()(block ...)) - space between name and (), no space between () and block
521                    if parts.len() >= 3 && parts[parts.len() - 2] == "()" {
522                        let name_and_attrs = parts[0..parts.len() - 2].join(" ");
523                        let proto = &parts[parts.len() - 2];
524                        let body = &parts[parts.len() - 1];
525                        format!("(sub {} {}{})", name_and_attrs, proto, body)
526                    } else {
527                        format!("(sub {})", parts.join(" "))
528                    }
529                } else {
530                    // Anonymous subroutine - tree-sitter format
531                    let mut parts = Vec::new();
532
533                    // Add attributes if present
534                    if !attributes.is_empty() {
535                        let attrs: Vec<String> = attributes
536                            .iter()
537                            .map(|_attr| "(attribute (attribute_name))".to_string())
538                            .collect();
539                        parts.push(format!("(attrlist {})", attrs.join("")));
540                    }
541
542                    // Add prototype if present
543                    if let Some(proto) = prototype {
544                        parts.push(proto.to_sexp());
545                    }
546
547                    // Add signature if present
548                    if let Some(sig) = signature {
549                        parts.push(sig.to_sexp());
550                    }
551
552                    // Add body
553                    parts.push(body.to_sexp());
554
555                    format!("(anonymous_subroutine_expression {})", parts.join(""))
556                }
557            }
558
559            NodeKind::Prototype { content: _ } => "(prototype)".to_string(),
560
561            NodeKind::Signature { parameters } => {
562                let params = parameters.iter().map(|p| p.to_sexp()).collect::<Vec<_>>().join(" ");
563                format!("(signature {})", params)
564            }
565
566            NodeKind::MandatoryParameter { variable } => {
567                format!("(mandatory_parameter {})", variable.to_sexp())
568            }
569
570            NodeKind::OptionalParameter { variable, default_value } => {
571                format!("(optional_parameter {} {})", variable.to_sexp(), default_value.to_sexp())
572            }
573
574            NodeKind::SlurpyParameter { variable } => {
575                format!("(slurpy_parameter {})", variable.to_sexp())
576            }
577
578            NodeKind::NamedParameter { variable } => {
579                format!("(named_parameter {})", variable.to_sexp())
580            }
581
582            NodeKind::Method { name: _, signature, attributes, body } => {
583                let block_contents = match &body.kind {
584                    NodeKind::Block { statements } => {
585                        statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ")
586                    }
587                    _ => body.to_sexp(),
588                };
589
590                let mut parts = vec!["(bareword)".to_string()];
591
592                // Add signature if present
593                if let Some(sig) = signature {
594                    parts.push(sig.to_sexp());
595                }
596
597                // Add attributes if present
598                if !attributes.is_empty() {
599                    let attrs: Vec<String> = attributes
600                        .iter()
601                        .map(|_attr| "(attribute (attribute_name))".to_string())
602                        .collect();
603                    parts.push(format!("(attrlist {})", attrs.join("")));
604                }
605
606                parts.push(format!("(block {})", block_contents));
607                format!("(method_declaration_statement {})", parts.join(" "))
608            }
609
610            NodeKind::Return { value } => {
611                if let Some(val) = value {
612                    format!("(return {})", val.to_sexp())
613                } else {
614                    "(return)".to_string()
615                }
616            }
617
618            NodeKind::LoopControl { op, label } => {
619                if let Some(l) = label {
620                    format!("({} {})", op, l)
621                } else {
622                    format!("({})", op)
623                }
624            }
625
626            NodeKind::Goto { target } => {
627                format!("(goto {})", target.to_sexp())
628            }
629
630            NodeKind::MethodCall { object, method, args } => {
631                let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
632                format!("(method_call {} {} ({}))", object.to_sexp(), method, args_str)
633            }
634
635            NodeKind::FunctionCall { name, args } => {
636                // Special handling for functions that should use call format in tree-sitter tests
637                if matches!(
638                    name.as_str(),
639                    "bless"
640                        | "shift"
641                        | "unshift"
642                        | "open"
643                        | "die"
644                        | "warn"
645                        | "print"
646                        | "printf"
647                        | "say"
648                        | "push"
649                        | "pop"
650                        | "map"
651                        | "sort"
652                        | "grep"
653                        | "keys"
654                        | "values"
655                        | "each"
656                        | "defined"
657                        | "scalar"
658                        | "ref"
659                ) {
660                    let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
661                    if args.is_empty() {
662                        format!("(call {} ())", name)
663                    } else {
664                        format!("(call {} ({}))", name, args_str)
665                    }
666                } else {
667                    // Tree-sitter format varies by context
668                    let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
669                    if args.is_empty() {
670                        "(function_call_expression (function))".to_string()
671                    } else {
672                        format!("(ambiguous_function_call_expression (function) {})", args_str)
673                    }
674                }
675            }
676
677            NodeKind::IndirectCall { method, object, args } => {
678                let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
679                format!("(indirect_call {} {} ({}))", method, object.to_sexp(), args_str)
680            }
681
682            NodeKind::Regex { pattern, replacement, modifiers, has_embedded_code } => {
683                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
684                format!("(regex {:?} {:?} {:?}{})", pattern, replacement, modifiers, risk_marker)
685            }
686
687            NodeKind::Match { expr, pattern, modifiers, has_embedded_code, negated } => {
688                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
689                let op = if *negated { "not_match" } else { "match" };
690                format!(
691                    "({} {} (regex {:?} {:?}{}))",
692                    op,
693                    expr.to_sexp(),
694                    pattern,
695                    modifiers,
696                    risk_marker
697                )
698            }
699
700            NodeKind::Substitution {
701                expr,
702                pattern,
703                replacement,
704                modifiers,
705                has_embedded_code,
706                negated,
707            } => {
708                let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
709                let neg_marker = if *negated { " (negated)" } else { "" };
710                format!(
711                    "(substitution {} {:?} {:?} {:?}{}{})",
712                    expr.to_sexp(),
713                    pattern,
714                    replacement,
715                    modifiers,
716                    risk_marker,
717                    neg_marker
718                )
719            }
720
721            NodeKind::Transliteration { expr, search, replace, modifiers, negated } => {
722                let neg_marker = if *negated { " (negated)" } else { "" };
723                format!(
724                    "(transliteration {} {:?} {:?} {:?}{})",
725                    expr.to_sexp(),
726                    search,
727                    replace,
728                    modifiers,
729                    neg_marker
730                )
731            }
732
733            NodeKind::Package { name, block, name_span: _ } => {
734                if let Some(blk) = block {
735                    format!("(package {} {})", name, blk.to_sexp())
736                } else {
737                    format!("(package {})", name)
738                }
739            }
740
741            NodeKind::Use { module, args, has_filter_risk } => {
742                let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
743                if args.is_empty() {
744                    format!("(use {}{})", module, risk_marker)
745                } else {
746                    let args_str = args.join(" ");
747                    format!("(use {} ({}){})", module, args_str, risk_marker)
748                }
749            }
750
751            NodeKind::No { module, args, has_filter_risk } => {
752                let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
753                if args.is_empty() {
754                    format!("(no {}{})", module, risk_marker)
755                } else {
756                    let args_str = args.join(" ");
757                    format!("(no {} ({}){})", module, args_str, risk_marker)
758                }
759            }
760
761            NodeKind::PhaseBlock { phase, phase_span: _, block } => {
762                format!("({} {})", phase, block.to_sexp())
763            }
764
765            NodeKind::DataSection { marker, body } => {
766                if let Some(body_text) = body {
767                    format!("(data_section {} \"{}\")", marker, body_text.escape_default())
768                } else {
769                    format!("(data_section {})", marker)
770                }
771            }
772
773            NodeKind::Class { name, parents, body } => {
774                if parents.is_empty() {
775                    format!("(class {} {})", name, body.to_sexp())
776                } else {
777                    format!("(class {} :isa({}) {})", name, parents.join(","), body.to_sexp())
778                }
779            }
780
781            NodeKind::Format { name, body } => {
782                format!("(format {} {:?})", name, body)
783            }
784
785            NodeKind::Identifier { name } => {
786                // Format expected by tests: (identifier name)
787                format!("(identifier {})", name)
788            }
789
790            NodeKind::Error { message, partial, .. } => {
791                if let Some(node) = partial {
792                    format!("(ERROR \"{}\" {})", message.escape_default(), node.to_sexp())
793                } else {
794                    format!("(ERROR \"{}\")", message.escape_default())
795                }
796            }
797            NodeKind::MissingExpression => "(missing_expression)".to_string(),
798            NodeKind::MissingStatement => "(missing_statement)".to_string(),
799            NodeKind::MissingIdentifier => "(missing_identifier)".to_string(),
800            NodeKind::MissingBlock => "(missing_block)".to_string(),
801            NodeKind::UnknownRest => "(UNKNOWN_REST)".to_string(),
802        }
803    }
804
805    /// Convert the AST to S-expression format that unwraps expression statements in programs
806    pub fn to_sexp_inner(&self) -> String {
807        match &self.kind {
808            NodeKind::ExpressionStatement { expression } => {
809                // Check if this is an anonymous subroutine - if so, keep it wrapped
810                match &expression.kind {
811                    NodeKind::Subroutine { name, .. } if name.is_none() => {
812                        // Anonymous subroutine should remain wrapped in expression statement
813                        self.to_sexp()
814                    }
815                    _ => {
816                        // In the inner format, other expression statements are unwrapped
817                        expression.to_sexp()
818                    }
819                }
820            }
821            _ => {
822                // For all other node types, use regular to_sexp
823                self.to_sexp()
824            }
825        }
826    }
827
828    /// Call a function on every direct child node of this node.
829    ///
830    /// This enables depth-first traversal for operations like heredoc content attachment.
831    /// The closure receives a mutable reference to each child node.
832    #[inline]
833    pub fn for_each_child_mut<F: FnMut(&mut Node)>(&mut self, mut f: F) {
834        match &mut self.kind {
835            NodeKind::Tie { variable, package, args } => {
836                f(variable);
837                f(package);
838                for arg in args {
839                    f(arg);
840                }
841            }
842            NodeKind::Untie { variable } => f(variable),
843
844            // Root program node
845            NodeKind::Program { statements } => {
846                for stmt in statements {
847                    f(stmt);
848                }
849            }
850
851            // Statement wrappers
852            NodeKind::ExpressionStatement { expression } => f(expression),
853
854            // Variable declarations
855            NodeKind::VariableDeclaration { variable, initializer, .. } => {
856                f(variable);
857                if let Some(init) = initializer {
858                    f(init);
859                }
860            }
861            NodeKind::VariableListDeclaration { variables, initializer, .. } => {
862                for var in variables {
863                    f(var);
864                }
865                if let Some(init) = initializer {
866                    f(init);
867                }
868            }
869            NodeKind::VariableWithAttributes { variable, .. } => f(variable),
870
871            // Binary operations
872            NodeKind::Binary { left, right, .. } => {
873                f(left);
874                f(right);
875            }
876            NodeKind::Ternary { condition, then_expr, else_expr } => {
877                f(condition);
878                f(then_expr);
879                f(else_expr);
880            }
881            NodeKind::Unary { operand, .. } => f(operand),
882            NodeKind::Assignment { lhs, rhs, .. } => {
883                f(lhs);
884                f(rhs);
885            }
886
887            // Control flow
888            NodeKind::Block { statements } => {
889                for stmt in statements {
890                    f(stmt);
891                }
892            }
893            NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
894                f(condition);
895                f(then_branch);
896                for (elsif_cond, elsif_body) in elsif_branches {
897                    f(elsif_cond);
898                    f(elsif_body);
899                }
900                if let Some(else_body) = else_branch {
901                    f(else_body);
902                }
903            }
904            NodeKind::While { condition, body, continue_block, .. } => {
905                f(condition);
906                f(body);
907                if let Some(cont) = continue_block {
908                    f(cont);
909                }
910            }
911            NodeKind::For { init, condition, update, body, continue_block, .. } => {
912                if let Some(i) = init {
913                    f(i);
914                }
915                if let Some(c) = condition {
916                    f(c);
917                }
918                if let Some(u) = update {
919                    f(u);
920                }
921                f(body);
922                if let Some(cont) = continue_block {
923                    f(cont);
924                }
925            }
926            NodeKind::Foreach { variable, list, body, continue_block } => {
927                f(variable);
928                f(list);
929                f(body);
930                if let Some(cb) = continue_block {
931                    f(cb);
932                }
933            }
934            NodeKind::Given { expr, body } => {
935                f(expr);
936                f(body);
937            }
938            NodeKind::When { condition, body } => {
939                f(condition);
940                f(body);
941            }
942            NodeKind::Default { body } => f(body),
943            NodeKind::StatementModifier { statement, condition, .. } => {
944                f(statement);
945                f(condition);
946            }
947            NodeKind::LabeledStatement { statement, .. } => f(statement),
948
949            // Eval and Do blocks
950            NodeKind::Eval { block } => f(block),
951            NodeKind::Do { block } => f(block),
952            NodeKind::Defer { block } => f(block),
953            NodeKind::Try { body, catch_blocks, finally_block } => {
954                f(body);
955                for (_, catch_body) in catch_blocks {
956                    f(catch_body);
957                }
958                if let Some(finally) = finally_block {
959                    f(finally);
960                }
961            }
962
963            // Function calls
964            NodeKind::FunctionCall { args, .. } => {
965                for arg in args {
966                    f(arg);
967                }
968            }
969            NodeKind::MethodCall { object, args, .. } => {
970                f(object);
971                for arg in args {
972                    f(arg);
973                }
974            }
975            NodeKind::IndirectCall { object, args, .. } => {
976                f(object);
977                for arg in args {
978                    f(arg);
979                }
980            }
981
982            // Functions
983            NodeKind::Subroutine { prototype, signature, body, .. } => {
984                if let Some(proto) = prototype {
985                    f(proto);
986                }
987                if let Some(sig) = signature {
988                    f(sig);
989                }
990                f(body);
991            }
992            NodeKind::Method { signature, body, .. } => {
993                if let Some(sig) = signature {
994                    f(sig);
995                }
996                f(body);
997            }
998            NodeKind::Return { value } => {
999                if let Some(v) = value {
1000                    f(v);
1001                }
1002            }
1003            NodeKind::Goto { target } => f(target),
1004            NodeKind::Signature { parameters } => {
1005                for param in parameters {
1006                    f(param);
1007                }
1008            }
1009            NodeKind::MandatoryParameter { variable } => f(variable),
1010            NodeKind::OptionalParameter { variable, default_value } => {
1011                f(variable);
1012                f(default_value);
1013            }
1014            NodeKind::SlurpyParameter { variable } => f(variable),
1015            NodeKind::NamedParameter { variable } => f(variable),
1016
1017            // Pattern matching
1018            NodeKind::Match { expr, .. } => f(expr),
1019            NodeKind::Substitution { expr, .. } => f(expr),
1020            NodeKind::Transliteration { expr, .. } => f(expr),
1021
1022            // Containers
1023            NodeKind::ArrayLiteral { elements } => {
1024                for elem in elements {
1025                    f(elem);
1026                }
1027            }
1028            NodeKind::HashLiteral { pairs } => {
1029                for (key, value) in pairs {
1030                    f(key);
1031                    f(value);
1032                }
1033            }
1034
1035            // Package system
1036            NodeKind::Package { block, .. } => {
1037                if let Some(b) = block {
1038                    f(b);
1039                }
1040            }
1041            NodeKind::PhaseBlock { block, .. } => f(block),
1042            NodeKind::Class { body, .. } => f(body),
1043
1044            // Error node might have a partial valid tree
1045            NodeKind::Error { partial, .. } => {
1046                if let Some(node) = partial {
1047                    f(node);
1048                }
1049            }
1050
1051            // Leaf nodes (no children to traverse)
1052            NodeKind::Variable { .. }
1053            | NodeKind::Identifier { .. }
1054            | NodeKind::Number { .. }
1055            | NodeKind::String { .. }
1056            | NodeKind::Heredoc { .. }
1057            | NodeKind::Regex { .. }
1058            | NodeKind::Readline { .. }
1059            | NodeKind::Glob { .. }
1060            | NodeKind::Typeglob { .. }
1061            | NodeKind::Diamond
1062            | NodeKind::Ellipsis
1063            | NodeKind::Undef
1064            | NodeKind::Use { .. }
1065            | NodeKind::No { .. }
1066            | NodeKind::Prototype { .. }
1067            | NodeKind::DataSection { .. }
1068            | NodeKind::Format { .. }
1069            | NodeKind::LoopControl { .. }
1070            | NodeKind::MissingExpression
1071            | NodeKind::MissingStatement
1072            | NodeKind::MissingIdentifier
1073            | NodeKind::MissingBlock
1074            | NodeKind::UnknownRest => {}
1075        }
1076    }
1077
1078    /// Call a function on every direct child node of this node (immutable version).
1079    ///
1080    /// This enables depth-first traversal for read-only operations like AST analysis.
1081    /// The closure receives an immutable reference to each child node.
1082    #[inline]
1083    pub fn for_each_child<'a, F: FnMut(&'a Node)>(&'a self, mut f: F) {
1084        match &self.kind {
1085            NodeKind::Tie { variable, package, args } => {
1086                f(variable);
1087                f(package);
1088                for arg in args {
1089                    f(arg);
1090                }
1091            }
1092            NodeKind::Untie { variable } => f(variable),
1093
1094            // Root program node
1095            NodeKind::Program { statements } => {
1096                for stmt in statements {
1097                    f(stmt);
1098                }
1099            }
1100
1101            // Statement wrappers
1102            NodeKind::ExpressionStatement { expression } => f(expression),
1103
1104            // Variable declarations
1105            NodeKind::VariableDeclaration { variable, initializer, .. } => {
1106                f(variable);
1107                if let Some(init) = initializer {
1108                    f(init);
1109                }
1110            }
1111            NodeKind::VariableListDeclaration { variables, initializer, .. } => {
1112                for var in variables {
1113                    f(var);
1114                }
1115                if let Some(init) = initializer {
1116                    f(init);
1117                }
1118            }
1119            NodeKind::VariableWithAttributes { variable, .. } => f(variable),
1120
1121            // Binary operations
1122            NodeKind::Binary { left, right, .. } => {
1123                f(left);
1124                f(right);
1125            }
1126            NodeKind::Ternary { condition, then_expr, else_expr } => {
1127                f(condition);
1128                f(then_expr);
1129                f(else_expr);
1130            }
1131            NodeKind::Unary { operand, .. } => f(operand),
1132            NodeKind::Assignment { lhs, rhs, .. } => {
1133                f(lhs);
1134                f(rhs);
1135            }
1136
1137            // Control flow
1138            NodeKind::Block { statements } => {
1139                for stmt in statements {
1140                    f(stmt);
1141                }
1142            }
1143            NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
1144                f(condition);
1145                f(then_branch);
1146                for (elsif_cond, elsif_body) in elsif_branches {
1147                    f(elsif_cond);
1148                    f(elsif_body);
1149                }
1150                if let Some(else_body) = else_branch {
1151                    f(else_body);
1152                }
1153            }
1154            NodeKind::While { condition, body, continue_block, .. } => {
1155                f(condition);
1156                f(body);
1157                if let Some(cont) = continue_block {
1158                    f(cont);
1159                }
1160            }
1161            NodeKind::For { init, condition, update, body, continue_block, .. } => {
1162                if let Some(i) = init {
1163                    f(i);
1164                }
1165                if let Some(c) = condition {
1166                    f(c);
1167                }
1168                if let Some(u) = update {
1169                    f(u);
1170                }
1171                f(body);
1172                if let Some(cont) = continue_block {
1173                    f(cont);
1174                }
1175            }
1176            NodeKind::Foreach { variable, list, body, continue_block } => {
1177                f(variable);
1178                f(list);
1179                f(body);
1180                if let Some(cb) = continue_block {
1181                    f(cb);
1182                }
1183            }
1184            NodeKind::Given { expr, body } => {
1185                f(expr);
1186                f(body);
1187            }
1188            NodeKind::When { condition, body } => {
1189                f(condition);
1190                f(body);
1191            }
1192            NodeKind::Default { body } => f(body),
1193            NodeKind::StatementModifier { statement, condition, .. } => {
1194                f(statement);
1195                f(condition);
1196            }
1197            NodeKind::LabeledStatement { statement, .. } => f(statement),
1198
1199            // Eval and Do blocks
1200            NodeKind::Eval { block } => f(block),
1201            NodeKind::Do { block } => f(block),
1202            NodeKind::Defer { block } => f(block),
1203            NodeKind::Try { body, catch_blocks, finally_block } => {
1204                f(body);
1205                for (_, catch_body) in catch_blocks {
1206                    f(catch_body);
1207                }
1208                if let Some(finally) = finally_block {
1209                    f(finally);
1210                }
1211            }
1212
1213            // Function calls
1214            NodeKind::FunctionCall { args, .. } => {
1215                for arg in args {
1216                    f(arg);
1217                }
1218            }
1219            NodeKind::MethodCall { object, args, .. } => {
1220                f(object);
1221                for arg in args {
1222                    f(arg);
1223                }
1224            }
1225            NodeKind::IndirectCall { object, args, .. } => {
1226                f(object);
1227                for arg in args {
1228                    f(arg);
1229                }
1230            }
1231
1232            // Functions
1233            NodeKind::Subroutine { prototype, signature, body, .. } => {
1234                if let Some(proto) = prototype {
1235                    f(proto);
1236                }
1237                if let Some(sig) = signature {
1238                    f(sig);
1239                }
1240                f(body);
1241            }
1242            NodeKind::Method { signature, body, .. } => {
1243                if let Some(sig) = signature {
1244                    f(sig);
1245                }
1246                f(body);
1247            }
1248            NodeKind::Return { value } => {
1249                if let Some(v) = value {
1250                    f(v);
1251                }
1252            }
1253            NodeKind::Goto { target } => f(target),
1254            NodeKind::Signature { parameters } => {
1255                for param in parameters {
1256                    f(param);
1257                }
1258            }
1259            NodeKind::MandatoryParameter { variable } => f(variable),
1260            NodeKind::OptionalParameter { variable, default_value } => {
1261                f(variable);
1262                f(default_value);
1263            }
1264            NodeKind::SlurpyParameter { variable } => f(variable),
1265            NodeKind::NamedParameter { variable } => f(variable),
1266
1267            // Pattern matching
1268            NodeKind::Match { expr, .. } => f(expr),
1269            NodeKind::Substitution { expr, .. } => f(expr),
1270            NodeKind::Transliteration { expr, .. } => f(expr),
1271
1272            // Containers
1273            NodeKind::ArrayLiteral { elements } => {
1274                for elem in elements {
1275                    f(elem);
1276                }
1277            }
1278            NodeKind::HashLiteral { pairs } => {
1279                for (key, value) in pairs {
1280                    f(key);
1281                    f(value);
1282                }
1283            }
1284
1285            // Package system
1286            NodeKind::Package { block, .. } => {
1287                if let Some(b) = block {
1288                    f(b);
1289                }
1290            }
1291            NodeKind::PhaseBlock { block, .. } => f(block),
1292            NodeKind::Class { body, .. } => f(body),
1293
1294            // Error node might have a partial valid tree
1295            NodeKind::Error { partial, .. } => {
1296                if let Some(node) = partial {
1297                    f(node);
1298                }
1299            }
1300
1301            // Leaf nodes (no children to traverse)
1302            NodeKind::Variable { .. }
1303            | NodeKind::Identifier { .. }
1304            | NodeKind::Number { .. }
1305            | NodeKind::String { .. }
1306            | NodeKind::Heredoc { .. }
1307            | NodeKind::Regex { .. }
1308            | NodeKind::Readline { .. }
1309            | NodeKind::Glob { .. }
1310            | NodeKind::Typeglob { .. }
1311            | NodeKind::Diamond
1312            | NodeKind::Ellipsis
1313            | NodeKind::Undef
1314            | NodeKind::Use { .. }
1315            | NodeKind::No { .. }
1316            | NodeKind::Prototype { .. }
1317            | NodeKind::DataSection { .. }
1318            | NodeKind::Format { .. }
1319            | NodeKind::LoopControl { .. }
1320            | NodeKind::MissingExpression
1321            | NodeKind::MissingStatement
1322            | NodeKind::MissingIdentifier
1323            | NodeKind::MissingBlock
1324            | NodeKind::UnknownRest => {}
1325        }
1326    }
1327
1328    /// Count the total number of nodes in this subtree (inclusive).
1329    ///
1330    /// # Examples
1331    ///
1332    /// ```
1333    /// use perl_ast::{Node, NodeKind, SourceLocation};
1334    ///
1335    /// let loc = SourceLocation { start: 0, end: 1 };
1336    /// let leaf = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1337    /// assert_eq!(leaf.count_nodes(), 1);
1338    ///
1339    /// let program = Node::new(
1340    ///     NodeKind::Program { statements: vec![leaf] },
1341    ///     loc,
1342    /// );
1343    /// assert_eq!(program.count_nodes(), 2);
1344    /// ```
1345    pub fn count_nodes(&self) -> usize {
1346        let mut count = 1;
1347        self.for_each_child(|child| {
1348            count += child.count_nodes();
1349        });
1350        count
1351    }
1352
1353    /// Collect direct child nodes into a vector for convenience APIs.
1354    ///
1355    /// # Examples
1356    ///
1357    /// ```
1358    /// use perl_ast::{Node, NodeKind, SourceLocation};
1359    ///
1360    /// let loc = SourceLocation { start: 0, end: 1 };
1361    /// let stmt = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1362    /// let program = Node::new(
1363    ///     NodeKind::Program { statements: vec![stmt] },
1364    ///     loc,
1365    /// );
1366    /// assert_eq!(program.children().len(), 1);
1367    /// ```
1368    #[inline]
1369    pub fn children(&self) -> Vec<&Node> {
1370        let mut children = Vec::new();
1371        self.for_each_child(|child| children.push(child));
1372        children
1373    }
1374
1375    /// Count direct child nodes without allocating an intermediate vector.
1376    ///
1377    /// This is more efficient than `children().len()` when callers only need
1378    /// cardinality.
1379    #[inline]
1380    pub fn child_count(&self) -> usize {
1381        let mut count = 0;
1382        self.for_each_child(|_| count += 1);
1383        count
1384    }
1385
1386    /// Get the first direct child node, if any.
1387    ///
1388    /// Optimized to avoid allocating the children vector.
1389    #[inline]
1390    pub fn first_child(&self) -> Option<&Node> {
1391        let mut result = None;
1392        self.for_each_child(|child| {
1393            if result.is_none() {
1394                result = Some(child);
1395            }
1396        });
1397        result
1398    }
1399
1400    /// Returns `true` when this node's source span contains `offset`.
1401    ///
1402    /// The start position is inclusive and the end position is exclusive.
1403    #[inline]
1404    pub fn contains_offset(&self, offset: usize) -> bool {
1405        self.location.start <= offset && offset < self.location.end
1406    }
1407
1408    /// Find the most specific node whose source span contains `offset`.
1409    ///
1410    /// Returns `None` when `offset` is outside this node. Otherwise, returns this
1411    /// node or the deepest descendant whose span contains the offset. This is useful
1412    /// for LSP features that need to map a cursor byte offset to the smallest AST
1413    /// construct at that position.
1414    ///
1415    /// The same half-open span semantics as [`Node::contains_offset`] apply: start
1416    /// positions are inclusive and end positions are exclusive.
1417    ///
1418    /// # Examples
1419    ///
1420    /// ```
1421    /// use perl_ast::{Node, NodeKind, SourceLocation};
1422    ///
1423    /// let left = Node::new(
1424    ///     NodeKind::Identifier { name: "left".to_string() },
1425    ///     SourceLocation { start: 0, end: 4 },
1426    /// );
1427    /// let right = Node::new(
1428    ///     NodeKind::Number { value: "1".to_string() },
1429    ///     SourceLocation { start: 7, end: 8 },
1430    /// );
1431    /// let expr = Node::new(
1432    ///     NodeKind::Binary {
1433    ///         op: "+".to_string(),
1434    ///         left: Box::new(left),
1435    ///         right: Box::new(right),
1436    ///     },
1437    ///     SourceLocation { start: 0, end: 8 },
1438    /// );
1439    ///
1440    /// assert_eq!(
1441    ///     expr.find_deepest_containing_offset(7).map(|node| node.kind.kind_name()),
1442    ///     Some("Number"),
1443    /// );
1444    /// assert_eq!(expr.find_deepest_containing_offset(8), None);
1445    /// ```
1446    #[inline]
1447    pub fn find_deepest_containing_offset(&self, offset: usize) -> Option<&Node> {
1448        if !self.contains_offset(offset) {
1449            return None;
1450        }
1451
1452        let mut result = self;
1453        self.for_each_child(|child| {
1454            if let Some(descendant) = child.find_deepest_containing_offset(offset) {
1455                result = descendant;
1456            }
1457        });
1458        Some(result)
1459    }
1460
1461    /// Returns the byte length of this node's source span.
1462    ///
1463    /// Uses saturating subtraction so malformed spans never underflow.
1464    #[inline]
1465    pub fn span_len(&self) -> usize {
1466        self.location.end.saturating_sub(self.location.start)
1467    }
1468
1469    /// Get the last direct child node, if any.
1470    ///
1471    /// Optimized to avoid allocating the children vector.
1472    ///
1473    /// # Examples
1474    ///
1475    /// ```
1476    /// use perl_ast::{Node, NodeKind, SourceLocation};
1477    ///
1478    /// let loc = SourceLocation { start: 0, end: 1 };
1479    /// let first = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1480    /// let second = Node::new(NodeKind::Number { value: "2".to_string() }, loc);
1481    /// let program = Node::new(
1482    ///     NodeKind::Program { statements: vec![first, second] },
1483    ///     loc,
1484    /// );
1485    ///
1486    /// assert_eq!(program.last_child().map(|n| n.kind.kind_name()), Some("Number"));
1487    /// assert_eq!(Node::new(NodeKind::Block { statements: vec![] }, loc).last_child(), None);
1488    /// ```
1489    #[inline]
1490    pub fn last_child(&self) -> Option<&Node> {
1491        let mut result = None;
1492        self.for_each_child(|child| {
1493            result = Some(child);
1494        });
1495        result
1496    }
1497}
1498
1499/// Comprehensive enumeration of all Perl language constructs supported by the parser.
1500///
1501/// This enum represents every possible AST node type that can be parsed from Perl code
1502/// during the Parse → Index → Navigate → Complete → Analyze workflow. Each variant captures
1503/// the semantic meaning and structural relationships needed for complete script analysis
1504/// and transformation.
1505///
1506/// # LSP Workflow Integration
1507///
1508/// Node kinds are processed differently across workflow stages:
1509/// - **Parse**: All variants are produced by the parser
1510/// - **Index**: Symbol-bearing variants feed workspace indexing
1511/// - **Navigate**: Call and reference variants support navigation features
1512/// - **Complete**: Expression variants provide completion context
1513/// - **Analyze**: Semantic variants drive diagnostics and refactoring
1514///
1515/// # Examples
1516///
1517/// Pattern-match on node kinds to extract semantic information:
1518///
1519/// ```
1520/// use perl_ast::{Node, NodeKind, SourceLocation};
1521///
1522/// let loc = SourceLocation { start: 0, end: 5 };
1523/// let node = Node::new(
1524///     NodeKind::Variable { sigil: "$".to_string(), name: "foo".to_string() },
1525///     loc,
1526/// );
1527///
1528/// assert!(matches!(
1529///     &node.kind,
1530///     NodeKind::Variable { sigil, name } if sigil == "$" && name == "foo"
1531/// ));
1532/// ```
1533///
1534/// Use [`kind_name()`](NodeKind::kind_name) for debugging and diagnostics:
1535///
1536/// ```
1537/// use perl_ast::NodeKind;
1538///
1539/// let kind = NodeKind::Number { value: "99".to_string() };
1540/// assert_eq!(kind.kind_name(), "Number");
1541///
1542/// let kind = NodeKind::Variable { sigil: "@".to_string(), name: "list".to_string() };
1543/// assert_eq!(kind.kind_name(), "Variable");
1544/// ```
1545///
1546/// # Performance Considerations
1547///
1548/// The enum design optimizes for large codebases:
1549/// - Box pointers minimize stack usage for recursive structures
1550/// - Vector storage enables efficient bulk operations on child nodes
1551/// - Clone operations optimized for concurrent analysis workflows
1552/// - Pattern matching performance tuned for common Perl constructs
1553#[derive(Debug, Clone, PartialEq)]
1554pub enum NodeKind {
1555    /// Top-level program containing all statements in an Perl script
1556    ///
1557    /// This is the root node for any parsed Perl script content, containing all
1558    /// top-level statements found during the Parse stage of LSP workflow.
1559    Program {
1560        /// All top-level statements in the Perl script
1561        statements: Vec<Node>,
1562    },
1563
1564    /// Statement wrapper for expressions that appear at statement level
1565    ///
1566    /// Used during Analyze stage to distinguish between expressions used as
1567    /// statements versus expressions within other contexts during Perl parsing.
1568    ExpressionStatement {
1569        /// The expression being used as a statement
1570        expression: Box<Node>,
1571    },
1572
1573    /// Variable declaration with scope declarator in Perl script processing
1574    ///
1575    /// Represents declarations like `my $var`, `our $global`, `local $dynamic`, etc.
1576    /// Critical for Analyze stage symbol table construction during Perl parsing.
1577    VariableDeclaration {
1578        /// Scope declarator: "my", "our", "local", "state"
1579        declarator: String,
1580        /// The variable being declared
1581        variable: Box<Node>,
1582        /// Variable attributes (e.g., ":shared", ":locked")
1583        attributes: Vec<String>,
1584        /// Optional initializer expression
1585        initializer: Option<Box<Node>>,
1586    },
1587
1588    /// Multiple variable declaration in a single statement
1589    ///
1590    /// Handles constructs like `my ($x, $y) = @values` common in Perl script processing.
1591    /// Supports efficient bulk variable analysis during Navigate stage operations.
1592    VariableListDeclaration {
1593        /// Scope declarator for all variables in the list
1594        declarator: String,
1595        /// All variables being declared in the list
1596        variables: Vec<Node>,
1597        /// Attributes applied to the variable list
1598        attributes: Vec<String>,
1599        /// Optional initializer for the entire variable list
1600        initializer: Option<Box<Node>>,
1601    },
1602
1603    /// Perl variable reference (scalar, array, hash, etc.) in Perl parsing workflow
1604    Variable {
1605        /// Variable sigil indicating type: $, @, %, &, *
1606        sigil: String, // $, @, %, &, *
1607        /// Variable name without sigil
1608        name: String,
1609    },
1610
1611    /// Variable with additional attributes for enhanced LSP workflow
1612    VariableWithAttributes {
1613        /// The base variable node
1614        variable: Box<Node>,
1615        /// List of attribute names applied to the variable
1616        attributes: Vec<String>,
1617    },
1618
1619    /// Assignment operation for LSP data processing workflows
1620    Assignment {
1621        /// Left-hand side of assignment
1622        lhs: Box<Node>,
1623        /// Right-hand side of assignment
1624        rhs: Box<Node>,
1625        /// Assignment operator: =, +=, -=, etc.
1626        op: String, // =, +=, -=, etc.
1627    },
1628
1629    // Expressions
1630    /// Binary operation for Perl parsing workflow calculations
1631    Binary {
1632        /// Binary operator
1633        op: String,
1634        /// Left operand
1635        left: Box<Node>,
1636        /// Right operand
1637        right: Box<Node>,
1638    },
1639
1640    /// Ternary conditional expression for Perl parsing workflow logic
1641    Ternary {
1642        /// Condition to evaluate
1643        condition: Box<Node>,
1644        /// Expression when condition is true
1645        then_expr: Box<Node>,
1646        /// Expression when condition is false
1647        else_expr: Box<Node>,
1648    },
1649
1650    /// Unary operation for Perl parsing workflow
1651    Unary {
1652        /// Unary operator
1653        op: String,
1654        /// Operand to apply operator to
1655        operand: Box<Node>,
1656    },
1657
1658    // I/O operations
1659    /// Diamond operator for file input in Perl parsing workflow
1660    Diamond, // <>
1661
1662    /// Ellipsis operator for Perl parsing workflow
1663    Ellipsis, // ...
1664
1665    /// Undef value for Perl parsing workflow
1666    Undef, // undef
1667
1668    /// Readline operation for LSP file processing
1669    Readline {
1670        /// Optional filehandle: `<STDIN>`, `<$fh>`, etc.
1671        filehandle: Option<String>, // <STDIN>, <$fh>, etc.
1672    },
1673
1674    /// Glob pattern for LSP workspace file matching
1675    Glob {
1676        /// Pattern string for file matching
1677        pattern: String, // <*.txt>
1678    },
1679
1680    /// Typeglob expression: `*foo` or `*main::bar`
1681    ///
1682    /// Provides access to all symbol table entries for a given name.
1683    Typeglob {
1684        /// Name of the symbol (including package qualification)
1685        name: String,
1686    },
1687
1688    /// Numeric literal in Perl code (integer, float, hex, octal, binary)
1689    ///
1690    /// Represents all numeric literal forms: `42`, `3.14`, `0x1A`, `0o755`, `0b1010`.
1691    Number {
1692        /// String representation preserving original format
1693        value: String,
1694    },
1695
1696    /// String literal with optional interpolation
1697    ///
1698    /// Handles both single-quoted (`'literal'`) and double-quoted (`"$interpolated"`) strings.
1699    String {
1700        /// String content (after quote processing)
1701        value: String,
1702        /// Whether the string supports variable interpolation
1703        interpolated: bool,
1704    },
1705
1706    /// Heredoc string literal for multi-line content
1707    ///
1708    /// Supports all heredoc forms: `<<EOF`, `<<'EOF'`, `<<"EOF"`, `<<~EOF` (indented).
1709    Heredoc {
1710        /// Delimiter marking heredoc boundaries
1711        delimiter: String,
1712        /// Content between delimiters
1713        content: String,
1714        /// Whether content supports variable interpolation
1715        interpolated: bool,
1716        /// Whether leading whitespace is stripped (<<~ form)
1717        indented: bool,
1718        /// Whether this is a command execution heredoc (<<`EOF`)
1719        command: bool,
1720        /// Body span for breakpoint detection (populated by drain_pending_heredocs)
1721        body_span: Option<SourceLocation>,
1722    },
1723
1724    /// Array literal expression: `(1, 2, 3)` or `[1, 2, 3]`
1725    ArrayLiteral {
1726        /// Elements in the array
1727        elements: Vec<Node>,
1728    },
1729
1730    /// Hash literal expression: `(key => 'value')` or `{key => 'value'}`
1731    HashLiteral {
1732        /// Key-value pairs in the hash
1733        pairs: Vec<(Node, Node)>,
1734    },
1735
1736    /// Block of statements: `{ ... }`
1737    ///
1738    /// Used for control structures, subroutine bodies, and bare blocks.
1739    Block {
1740        /// Statements within the block
1741        statements: Vec<Node>,
1742    },
1743
1744    /// Eval block for exception handling: `eval { ... }`
1745    Eval {
1746        /// Block to evaluate with exception trapping
1747        block: Box<Node>,
1748    },
1749
1750    /// Do block for file inclusion or expression evaluation: `do { ... }` or `do "file"`
1751    Do {
1752        /// Block to execute or file expression
1753        block: Box<Node>,
1754    },
1755
1756    /// Defer block for deferred cleanup on scope exit (Perl 5.36+ experimental, stable in 5.40)
1757    Defer {
1758        /// Block to execute on scope exit
1759        block: Box<Node>,
1760    },
1761
1762    /// Try-catch-finally for modern exception handling (Syntax::Keyword::Try style)
1763    Try {
1764        /// Try block body
1765        body: Box<Node>,
1766        /// Catch blocks: (optional exception variable, handler block)
1767        catch_blocks: Vec<(Option<String>, Box<Node>)>,
1768        /// Optional finally block
1769        finally_block: Option<Box<Node>>,
1770    },
1771
1772    /// If-elsif-else conditional statement
1773    If {
1774        /// Condition expression
1775        condition: Box<Node>,
1776        /// Then branch block
1777        then_branch: Box<Node>,
1778        /// Elsif branches: (condition, block) pairs
1779        elsif_branches: Vec<(Box<Node>, Box<Node>)>,
1780        /// Optional else branch
1781        else_branch: Option<Box<Node>>,
1782    },
1783
1784    /// Statement with a label for loop control: `LABEL: while (...)`
1785    LabeledStatement {
1786        /// Label name (e.g., "OUTER", "LINE")
1787        label: String,
1788        /// Labeled statement (typically a loop)
1789        statement: Box<Node>,
1790    },
1791
1792    /// While loop: `while (condition) { ... }`
1793    While {
1794        /// Loop condition
1795        condition: Box<Node>,
1796        /// Loop body
1797        body: Box<Node>,
1798        /// Optional continue block
1799        continue_block: Option<Box<Node>>,
1800    },
1801
1802    /// Tie operation for binding variables to objects: `tie %hash, 'Package', @args`
1803    Tie {
1804        /// Variable being tied
1805        variable: Box<Node>,
1806        /// Class/package name to tie to
1807        package: Box<Node>,
1808        /// Arguments passed to TIE* method
1809        args: Vec<Node>,
1810    },
1811
1812    /// Untie operation for unbinding variables: `untie %hash`
1813    Untie {
1814        /// Variable being untied
1815        variable: Box<Node>,
1816    },
1817
1818    /// C-style for loop: `for (init; cond; update) { ... }`
1819    For {
1820        /// Initialization expression
1821        init: Option<Box<Node>>,
1822        /// Loop condition
1823        condition: Option<Box<Node>>,
1824        /// Update expression
1825        update: Option<Box<Node>>,
1826        /// Loop body
1827        body: Box<Node>,
1828        /// Optional continue block
1829        continue_block: Option<Box<Node>>,
1830    },
1831
1832    /// Foreach loop: `foreach my $item (@list) { ... }`
1833    Foreach {
1834        /// Iterator variable
1835        variable: Box<Node>,
1836        /// List to iterate
1837        list: Box<Node>,
1838        /// Loop body
1839        body: Box<Node>,
1840        /// Optional continue block
1841        continue_block: Option<Box<Node>>,
1842    },
1843
1844    /// Given statement for switch-like matching (Perl 5.10+)
1845    Given {
1846        /// Expression to match against
1847        expr: Box<Node>,
1848        /// Body containing when/default blocks
1849        body: Box<Node>,
1850    },
1851
1852    /// When clause in given/switch: `when ($pattern) { ... }`
1853    When {
1854        /// Pattern to match
1855        condition: Box<Node>,
1856        /// Handler block
1857        body: Box<Node>,
1858    },
1859
1860    /// Default clause in given/switch: `default { ... }`
1861    Default {
1862        /// Handler block for unmatched cases
1863        body: Box<Node>,
1864    },
1865
1866    /// Statement modifier syntax: `print "ok" if $condition`
1867    StatementModifier {
1868        /// Statement to conditionally execute
1869        statement: Box<Node>,
1870        /// Modifier keyword: if, unless, while, until, for, foreach
1871        modifier: String,
1872        /// Modifier condition
1873        condition: Box<Node>,
1874    },
1875
1876    // Functions
1877    /// Subroutine declaration (function) including name, prototype, signature and body.
1878    Subroutine {
1879        /// Name of the subroutine
1880        ///
1881        /// # Precise Navigation Support
1882        /// - Added name_span for exact LSP navigation
1883        /// - Enables precise go-to-definition and hover behavior
1884        /// - O(1) span lookup in workspace symbols
1885        ///
1886        /// ## Integration Points
1887        /// - Semantic token providers
1888        /// - Cross-reference generation
1889        /// - Symbol renaming
1890        name: Option<String>,
1891
1892        /// Source location span of the subroutine name
1893        ///
1894        /// ## Usage Notes
1895        /// - Always corresponds to the name field
1896        /// - Provides constant-time position information
1897        /// - Essential for precise editor interactions
1898        name_span: Option<SourceLocation>,
1899
1900        /// Optional prototype node (e.g. `($;@)`).
1901        prototype: Option<Box<Node>>,
1902        /// Optional signature node (Perl 5.20+ feature).
1903        signature: Option<Box<Node>>,
1904        /// Attributes attached to the subroutine (`:lvalue`, etc.).
1905        attributes: Vec<String>,
1906        /// The body block of the subroutine.
1907        body: Box<Node>,
1908    },
1909
1910    /// Subroutine prototype specification: `sub foo ($;@) { ... }`
1911    Prototype {
1912        /// Prototype string defining argument behavior
1913        content: String,
1914    },
1915
1916    /// Subroutine signature (Perl 5.20+): `sub foo ($x, $y = 0) { ... }`
1917    Signature {
1918        /// List of signature parameters
1919        parameters: Vec<Node>,
1920    },
1921
1922    /// Mandatory signature parameter: `$x` in `sub foo ($x) { }`
1923    MandatoryParameter {
1924        /// Variable being bound
1925        variable: Box<Node>,
1926    },
1927
1928    /// Optional signature parameter with default: `$y = 0` in `sub foo ($y = 0) { }`
1929    OptionalParameter {
1930        /// Variable being bound
1931        variable: Box<Node>,
1932        /// Default value expression
1933        default_value: Box<Node>,
1934    },
1935
1936    /// Slurpy parameter collecting remaining args: `@rest` or `%opts` in signature
1937    SlurpyParameter {
1938        /// Array or hash variable to receive remaining arguments
1939        variable: Box<Node>,
1940    },
1941
1942    /// Named parameter placeholder in signature (future Perl feature)
1943    NamedParameter {
1944        /// Variable for named parameter binding
1945        variable: Box<Node>,
1946    },
1947
1948    /// Method declaration (Perl 5.38+ with `use feature 'class'`)
1949    Method {
1950        /// Method name
1951        name: String,
1952        /// Optional signature
1953        signature: Option<Box<Node>>,
1954        /// Method attributes (e.g., `:lvalue`)
1955        attributes: Vec<String>,
1956        /// Method body
1957        body: Box<Node>,
1958    },
1959
1960    /// Return statement: `return;` or `return $value;`
1961    Return {
1962        /// Optional return value
1963        value: Option<Box<Node>>,
1964    },
1965
1966    /// Loop control statement: `next`, `last`, or `redo`
1967    LoopControl {
1968        /// Control keyword: "next", "last", or "redo"
1969        op: String,
1970        /// Optional label: `next LABEL`
1971        label: Option<String>,
1972    },
1973
1974    /// Goto statement: `goto LABEL`, `goto &sub`, or `goto $expr`
1975    Goto {
1976        /// The target of the goto (label identifier, sub reference, or expression)
1977        target: Box<Node>,
1978    },
1979
1980    /// Method call: `$obj->method(@args)` or `$obj->method`
1981    MethodCall {
1982        /// Object or class expression
1983        object: Box<Node>,
1984        /// Method name being called
1985        method: String,
1986        /// Method arguments
1987        args: Vec<Node>,
1988    },
1989
1990    /// Function call: `foo(@args)` or `foo()`
1991    FunctionCall {
1992        /// Function name (may be qualified: `Package::func`)
1993        name: String,
1994        /// Function arguments
1995        args: Vec<Node>,
1996    },
1997
1998    /// Indirect object call (legacy syntax): `new Class @args`
1999    IndirectCall {
2000        /// Method name
2001        method: String,
2002        /// Object or class
2003        object: Box<Node>,
2004        /// Arguments
2005        args: Vec<Node>,
2006    },
2007
2008    /// Regex literal: `/pattern/modifiers` or `qr/pattern/modifiers`
2009    Regex {
2010        /// Regular expression pattern
2011        pattern: String,
2012        /// Replacement string (for s/// when parsed as regex)
2013        replacement: Option<String>,
2014        /// Regex modifiers (i, m, s, x, g, etc.)
2015        modifiers: String,
2016        /// Whether the regex contains embedded code `(?{...})`
2017        has_embedded_code: bool,
2018    },
2019
2020    /// Match operation: `$str =~ /pattern/modifiers` or `$str !~ /pattern/modifiers`
2021    Match {
2022        /// Expression to match against
2023        expr: Box<Node>,
2024        /// Pattern to match
2025        pattern: String,
2026        /// Match modifiers
2027        modifiers: String,
2028        /// Whether the regex contains embedded code `(?{...})`
2029        has_embedded_code: bool,
2030        /// Whether the binding operator was `!~` (negated match)
2031        negated: bool,
2032    },
2033
2034    /// Substitution operation: `$str =~ s/pattern/replacement/modifiers`
2035    Substitution {
2036        /// Expression to substitute in
2037        expr: Box<Node>,
2038        /// Pattern to find
2039        pattern: String,
2040        /// Replacement string
2041        replacement: String,
2042        /// Substitution modifiers (g, e, r, etc.)
2043        modifiers: String,
2044        /// Whether the regex contains embedded code `(?{...})`
2045        has_embedded_code: bool,
2046        /// Whether the binding operator was `!~` (negated match)
2047        negated: bool,
2048    },
2049
2050    /// Transliteration operation: `$str =~ tr/search/replace/` or `y///`
2051    Transliteration {
2052        /// Expression to transliterate
2053        expr: Box<Node>,
2054        /// Characters to search for
2055        search: String,
2056        /// Replacement characters
2057        replace: String,
2058        /// Transliteration modifiers (c, d, s, r)
2059        modifiers: String,
2060        /// Whether the binding operator was `!~` (negated match)
2061        negated: bool,
2062    },
2063
2064    // Package system
2065    /// Package declaration (e.g. `package Foo;`) and optional inline block form.
2066    Package {
2067        /// Name of the package
2068        ///
2069        /// # Precise Navigation Support
2070        /// - Added name_span for exact LSP navigation
2071        /// - Enables precise go-to-definition and hover behavior
2072        /// - O(1) span lookup in workspace symbols
2073        ///
2074        /// ## Integration Points
2075        /// - Workspace indexing
2076        /// - Cross-module symbol resolution
2077        /// - Code action providers
2078        name: String,
2079
2080        /// Source location span of the package name
2081        ///
2082        /// ## Usage Notes
2083        /// - Always corresponds to the name field
2084        /// - Provides constant-time position information
2085        /// - Essential for precise editor interactions
2086        name_span: SourceLocation,
2087
2088        /// Optional inline block for `package Foo { ... }` declarations.
2089        block: Option<Box<Node>>,
2090    },
2091
2092    /// Use statement for module loading: `use Module qw(imports);`
2093    Use {
2094        /// Module name to load
2095        module: String,
2096        /// Import arguments (symbols to import)
2097        args: Vec<String>,
2098        /// Whether this module is a known source filter (security risk)
2099        has_filter_risk: bool,
2100    },
2101
2102    /// No statement for disabling features: `no strict;`
2103    No {
2104        /// Module/pragma name to disable
2105        module: String,
2106        /// Arguments for the no statement
2107        args: Vec<String>,
2108        /// Whether this module is a known source filter (security risk)
2109        has_filter_risk: bool,
2110    },
2111
2112    /// Phase block for compile/runtime hooks: `BEGIN`, `END`, `CHECK`, `INIT`, `UNITCHECK`
2113    PhaseBlock {
2114        /// Phase name: BEGIN, END, CHECK, INIT, UNITCHECK
2115        phase: String,
2116        /// Source location span of the phase block name for precise navigation
2117        phase_span: Option<SourceLocation>,
2118        /// Block to execute during the specified phase
2119        block: Box<Node>,
2120    },
2121
2122    /// Data section marker: `__DATA__` or `__END__`
2123    DataSection {
2124        /// Section marker (__DATA__ or __END__)
2125        marker: String,
2126        /// Content following the marker (if any)
2127        body: Option<String>,
2128    },
2129
2130    /// Class declaration (Perl 5.38+ with `use feature 'class'`)
2131    Class {
2132        /// Class name
2133        name: String,
2134        /// Parent class names from `:isa(Parent)` attributes
2135        parents: Vec<String>,
2136        /// Class body containing methods and attributes
2137        body: Box<Node>,
2138    },
2139
2140    /// Format declaration for legacy report generation
2141    Format {
2142        /// Format name (defaults to filehandle name)
2143        name: String,
2144        /// Format specification body
2145        body: String,
2146    },
2147
2148    /// Bare identifier (bareword or package-qualified name)
2149    Identifier {
2150        /// Identifier string
2151        name: String,
2152    },
2153
2154    /// Parse error placeholder with error message and recovery context
2155    Error {
2156        /// Error description
2157        message: String,
2158        /// Expected token types (if any)
2159        expected: Vec<TokenKind>,
2160        /// The token actually found (if any)
2161        found: Option<Token>,
2162        /// Partial AST node parsed before error (if any)
2163        partial: Option<Box<Node>>,
2164    },
2165
2166    /// Missing expression where one was expected
2167    MissingExpression,
2168    /// Missing statement where one was expected
2169    MissingStatement,
2170    /// Missing identifier where one was expected
2171    MissingIdentifier,
2172    /// Missing block where one was expected
2173    MissingBlock,
2174
2175    /// Lexer budget exceeded marker preserving partial parse results
2176    ///
2177    /// Used when recursion or token limits are hit to preserve already-parsed content.
2178    UnknownRest,
2179}
2180
2181impl NodeKind {
2182    /// Get the name of this `NodeKind` as a static string.
2183    ///
2184    /// Useful for diagnostics, logging, and human-readable AST dumps.
2185    ///
2186    /// # Examples
2187    ///
2188    /// ```
2189    /// use perl_ast::NodeKind;
2190    ///
2191    /// let kind = NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() };
2192    /// assert_eq!(kind.kind_name(), "Variable");
2193    ///
2194    /// let kind = NodeKind::Program { statements: vec![] };
2195    /// assert_eq!(kind.kind_name(), "Program");
2196    /// ```
2197    pub fn kind_name(&self) -> &'static str {
2198        match self {
2199            NodeKind::Program { .. } => "Program",
2200            NodeKind::ExpressionStatement { .. } => "ExpressionStatement",
2201            NodeKind::VariableDeclaration { .. } => "VariableDeclaration",
2202            NodeKind::VariableListDeclaration { .. } => "VariableListDeclaration",
2203            NodeKind::Variable { .. } => "Variable",
2204            NodeKind::VariableWithAttributes { .. } => "VariableWithAttributes",
2205            NodeKind::Assignment { .. } => "Assignment",
2206            NodeKind::Binary { .. } => "Binary",
2207            NodeKind::Ternary { .. } => "Ternary",
2208            NodeKind::Unary { .. } => "Unary",
2209            NodeKind::Diamond => "Diamond",
2210            NodeKind::Ellipsis => "Ellipsis",
2211            NodeKind::Undef => "Undef",
2212            NodeKind::Readline { .. } => "Readline",
2213            NodeKind::Glob { .. } => "Glob",
2214            NodeKind::Typeglob { .. } => "Typeglob",
2215            NodeKind::Number { .. } => "Number",
2216            NodeKind::String { .. } => "String",
2217            NodeKind::Heredoc { .. } => "Heredoc",
2218            NodeKind::ArrayLiteral { .. } => "ArrayLiteral",
2219            NodeKind::HashLiteral { .. } => "HashLiteral",
2220            NodeKind::Block { .. } => "Block",
2221            NodeKind::Eval { .. } => "Eval",
2222            NodeKind::Do { .. } => "Do",
2223            NodeKind::Defer { .. } => "Defer",
2224            NodeKind::Try { .. } => "Try",
2225            NodeKind::If { .. } => "If",
2226            NodeKind::LabeledStatement { .. } => "LabeledStatement",
2227            NodeKind::While { .. } => "While",
2228            NodeKind::Tie { .. } => "Tie",
2229            NodeKind::Untie { .. } => "Untie",
2230            NodeKind::For { .. } => "For",
2231            NodeKind::Foreach { .. } => "Foreach",
2232            NodeKind::Given { .. } => "Given",
2233            NodeKind::When { .. } => "When",
2234            NodeKind::Default { .. } => "Default",
2235            NodeKind::StatementModifier { .. } => "StatementModifier",
2236            NodeKind::Subroutine { .. } => "Subroutine",
2237            NodeKind::Prototype { .. } => "Prototype",
2238            NodeKind::Signature { .. } => "Signature",
2239            NodeKind::MandatoryParameter { .. } => "MandatoryParameter",
2240            NodeKind::OptionalParameter { .. } => "OptionalParameter",
2241            NodeKind::SlurpyParameter { .. } => "SlurpyParameter",
2242            NodeKind::NamedParameter { .. } => "NamedParameter",
2243            NodeKind::Method { .. } => "Method",
2244            NodeKind::Return { .. } => "Return",
2245            NodeKind::LoopControl { .. } => "LoopControl",
2246            NodeKind::Goto { .. } => "Goto",
2247            NodeKind::MethodCall { .. } => "MethodCall",
2248            NodeKind::FunctionCall { .. } => "FunctionCall",
2249            NodeKind::IndirectCall { .. } => "IndirectCall",
2250            NodeKind::Regex { .. } => "Regex",
2251            NodeKind::Match { .. } => "Match",
2252            NodeKind::Substitution { .. } => "Substitution",
2253            NodeKind::Transliteration { .. } => "Transliteration",
2254            NodeKind::Package { .. } => "Package",
2255            NodeKind::Use { .. } => "Use",
2256            NodeKind::No { .. } => "No",
2257            NodeKind::PhaseBlock { .. } => "PhaseBlock",
2258            NodeKind::DataSection { .. } => "DataSection",
2259            NodeKind::Class { .. } => "Class",
2260            NodeKind::Format { .. } => "Format",
2261            NodeKind::Identifier { .. } => "Identifier",
2262            NodeKind::Error { .. } => "Error",
2263            NodeKind::MissingExpression => "MissingExpression",
2264            NodeKind::MissingStatement => "MissingStatement",
2265            NodeKind::MissingIdentifier => "MissingIdentifier",
2266            NodeKind::MissingBlock => "MissingBlock",
2267            NodeKind::UnknownRest => "UnknownRest",
2268        }
2269    }
2270
2271    /// Canonical list of **all** `kind_name()` strings, in alphabetical order.
2272    ///
2273    /// Every consumer that needs the full set of NodeKind names should reference
2274    /// this constant instead of maintaining a hand-written copy.
2275    pub const ALL_KIND_NAMES: &[&'static str] = &[
2276        "ArrayLiteral",
2277        "Assignment",
2278        "Binary",
2279        "Block",
2280        "Class",
2281        "DataSection",
2282        "Default",
2283        "Defer",
2284        "Diamond",
2285        "Do",
2286        "Ellipsis",
2287        "Error",
2288        "Eval",
2289        "ExpressionStatement",
2290        "For",
2291        "Foreach",
2292        "Format",
2293        "FunctionCall",
2294        "Given",
2295        "Glob",
2296        "Goto",
2297        "HashLiteral",
2298        "Heredoc",
2299        "Identifier",
2300        "If",
2301        "IndirectCall",
2302        "LabeledStatement",
2303        "LoopControl",
2304        "MandatoryParameter",
2305        "Match",
2306        "Method",
2307        "MethodCall",
2308        "MissingBlock",
2309        "MissingExpression",
2310        "MissingIdentifier",
2311        "MissingStatement",
2312        "NamedParameter",
2313        "No",
2314        "Number",
2315        "OptionalParameter",
2316        "Package",
2317        "PhaseBlock",
2318        "Program",
2319        "Prototype",
2320        "Readline",
2321        "Regex",
2322        "Return",
2323        "Signature",
2324        "SlurpyParameter",
2325        "StatementModifier",
2326        "String",
2327        "Subroutine",
2328        "Substitution",
2329        "Ternary",
2330        "Tie",
2331        "Transliteration",
2332        "Try",
2333        "Typeglob",
2334        "Unary",
2335        "Undef",
2336        "UnknownRest",
2337        "Untie",
2338        "Use",
2339        "Variable",
2340        "VariableDeclaration",
2341        "VariableListDeclaration",
2342        "VariableWithAttributes",
2343        "When",
2344        "While",
2345    ];
2346
2347    /// Subset of `ALL_KIND_NAMES` that represent synthetic/recovery nodes.
2348    ///
2349    /// These kinds are only produced by `parse_with_recovery()` on malformed
2350    /// input and should not be expected in clean parses.
2351    pub const RECOVERY_KIND_NAMES: &[&'static str] = &[
2352        "Error",
2353        "MissingBlock",
2354        "MissingExpression",
2355        "MissingIdentifier",
2356        "MissingStatement",
2357        "UnknownRest",
2358    ];
2359}
2360
2361impl fmt::Display for NodeKind {
2362    /// Formats as the canonical `kind_name()` string.
2363    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2364        f.write_str(self.kind_name())
2365    }
2366}
2367
2368impl fmt::Display for Node {
2369    /// Formats as the tree-sitter compatible S-expression.
2370    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2371        f.write_str(&self.to_sexp())
2372    }
2373}
2374
2375/// Format unary operator for S-expression output
2376fn format_unary_operator(op: &str) -> String {
2377    match op {
2378        // Arithmetic unary operators
2379        "+" => "unary_+".to_string(),
2380        "-" => "unary_-".to_string(),
2381
2382        // Logical unary operators
2383        "!" => "unary_not".to_string(),
2384        "not" => "unary_not".to_string(),
2385
2386        // Bitwise complement
2387        "~" => "unary_complement".to_string(),
2388
2389        // Reference operator
2390        "\\" => "unary_ref".to_string(),
2391
2392        // Postfix operators
2393        "++" => "unary_++".to_string(),
2394        "--" => "unary_--".to_string(),
2395
2396        // File test operators
2397        "-f" => "unary_-f".to_string(),
2398        "-d" => "unary_-d".to_string(),
2399        "-e" => "unary_-e".to_string(),
2400        "-r" => "unary_-r".to_string(),
2401        "-w" => "unary_-w".to_string(),
2402        "-x" => "unary_-x".to_string(),
2403        "-o" => "unary_-o".to_string(),
2404        "-R" => "unary_-R".to_string(),
2405        "-W" => "unary_-W".to_string(),
2406        "-X" => "unary_-X".to_string(),
2407        "-O" => "unary_-O".to_string(),
2408        "-s" => "unary_-s".to_string(),
2409        "-p" => "unary_-p".to_string(),
2410        "-S" => "unary_-S".to_string(),
2411        "-b" => "unary_-b".to_string(),
2412        "-c" => "unary_-c".to_string(),
2413        "-t" => "unary_-t".to_string(),
2414        "-u" => "unary_-u".to_string(),
2415        "-g" => "unary_-g".to_string(),
2416        "-k" => "unary_-k".to_string(),
2417        "-T" => "unary_-T".to_string(),
2418        "-B" => "unary_-B".to_string(),
2419        "-M" => "unary_-M".to_string(),
2420        "-A" => "unary_-A".to_string(),
2421        "-C" => "unary_-C".to_string(),
2422        "-l" => "unary_-l".to_string(),
2423        "-z" => "unary_-z".to_string(),
2424
2425        // Postfix dereferencing
2426        "->@*" => "unary_->@*".to_string(),
2427        "->%*" => "unary_->%*".to_string(),
2428        "->$*" => "unary_->$*".to_string(),
2429        "->&*" => "unary_->&*".to_string(),
2430        "->**" => "unary_->**".to_string(),
2431
2432        // Defined operator
2433        "defined" => "unary_defined".to_string(),
2434
2435        // Default case for unknown operators
2436        _ => format!("unary_{}", op.replace(' ', "_")),
2437    }
2438}
2439
2440/// Format binary operator for S-expression output
2441fn format_binary_operator(op: &str) -> String {
2442    match op {
2443        // Arithmetic operators
2444        "+" => "binary_+".to_string(),
2445        "-" => "binary_-".to_string(),
2446        "*" => "binary_*".to_string(),
2447        "/" => "binary_/".to_string(),
2448        "%" => "binary_%".to_string(),
2449        "**" => "binary_**".to_string(),
2450
2451        // Comparison operators
2452        "==" => "binary_==".to_string(),
2453        "!=" => "binary_!=".to_string(),
2454        "<" => "binary_<".to_string(),
2455        ">" => "binary_>".to_string(),
2456        "<=" => "binary_<=".to_string(),
2457        ">=" => "binary_>=".to_string(),
2458        "<=>" => "binary_<=>".to_string(),
2459
2460        // String comparison
2461        "eq" => "binary_eq".to_string(),
2462        "ne" => "binary_ne".to_string(),
2463        "lt" => "binary_lt".to_string(),
2464        "le" => "binary_le".to_string(),
2465        "gt" => "binary_gt".to_string(),
2466        "ge" => "binary_ge".to_string(),
2467        "cmp" => "binary_cmp".to_string(),
2468
2469        // Logical operators
2470        "&&" => "binary_&&".to_string(),
2471        "||" => "binary_||".to_string(),
2472        "and" => "binary_and".to_string(),
2473        "or" => "binary_or".to_string(),
2474        "xor" => "binary_xor".to_string(),
2475
2476        // Bitwise operators
2477        "&" => "binary_&".to_string(),
2478        "|" => "binary_|".to_string(),
2479        "^" => "binary_^".to_string(),
2480        "<<" => "binary_<<".to_string(),
2481        ">>" => "binary_>>".to_string(),
2482
2483        // Pattern matching
2484        "=~" => "binary_=~".to_string(),
2485        "!~" => "binary_!~".to_string(),
2486
2487        // Smart match
2488        "~~" => "binary_~~".to_string(),
2489
2490        // String repetition
2491        "x" => "binary_x".to_string(),
2492
2493        // Concatenation
2494        "." => "binary_.".to_string(),
2495
2496        // Range operators
2497        ".." => "binary_..".to_string(),
2498        "..." => "binary_...".to_string(),
2499
2500        // Type checking
2501        "isa" => "binary_isa".to_string(),
2502
2503        // Assignment operators
2504        "=" => "binary_=".to_string(),
2505        "+=" => "binary_+=".to_string(),
2506        "-=" => "binary_-=".to_string(),
2507        "*=" => "binary_*=".to_string(),
2508        "/=" => "binary_/=".to_string(),
2509        "%=" => "binary_%=".to_string(),
2510        "**=" => "binary_**=".to_string(),
2511        ".=" => "binary_.=".to_string(),
2512        "&=" => "binary_&=".to_string(),
2513        "|=" => "binary_|=".to_string(),
2514        "^=" => "binary_^=".to_string(),
2515        "<<=" => "binary_<<=".to_string(),
2516        ">>=" => "binary_>>=".to_string(),
2517        "&&=" => "binary_&&=".to_string(),
2518        "||=" => "binary_||=".to_string(),
2519        "//=" => "binary_//=".to_string(),
2520
2521        // Defined-or operator
2522        "//" => "binary_//".to_string(),
2523
2524        // Method calls and dereferencing
2525        "->" => "binary_->".to_string(),
2526
2527        // Hash/array access
2528        "{}" => "binary_{}".to_string(),
2529        "[]" => "binary_[]".to_string(),
2530
2531        // Arrow hash/array dereference
2532        "->{}" => "arrow_hash_deref".to_string(),
2533        "->[]" => "arrow_array_deref".to_string(),
2534
2535        // Default case for unknown operators
2536        _ => format!("binary_{}", op.replace(' ', "_")),
2537    }
2538}
2539
2540// SourceLocation is now provided by perl-position-tracking crate
2541// See the re-export at the top of this file
2542
2543#[cfg(test)]
2544mod tests {
2545    use super::*;
2546    use std::collections::BTreeSet;
2547
2548    /// Build a dummy instance for every `NodeKind` variant and return its
2549    /// `kind_name()`.  This ensures the compiler forces us to update here
2550    /// whenever a variant is added/removed.
2551    fn all_kind_names_from_variants() -> BTreeSet<&'static str> {
2552        let loc = SourceLocation { start: 0, end: 0 };
2553        let dummy_node = || Node::new(NodeKind::Undef, loc);
2554
2555        let variants: Vec<NodeKind> = vec![
2556            NodeKind::Program { statements: vec![] },
2557            NodeKind::ExpressionStatement { expression: Box::new(dummy_node()) },
2558            NodeKind::VariableDeclaration {
2559                declarator: String::new(),
2560                variable: Box::new(dummy_node()),
2561                attributes: vec![],
2562                initializer: None,
2563            },
2564            NodeKind::VariableListDeclaration {
2565                declarator: String::new(),
2566                variables: vec![],
2567                attributes: vec![],
2568                initializer: None,
2569            },
2570            NodeKind::Variable { sigil: String::new(), name: String::new() },
2571            NodeKind::VariableWithAttributes {
2572                variable: Box::new(dummy_node()),
2573                attributes: vec![],
2574            },
2575            NodeKind::Assignment {
2576                lhs: Box::new(dummy_node()),
2577                rhs: Box::new(dummy_node()),
2578                op: String::new(),
2579            },
2580            NodeKind::Binary {
2581                op: String::new(),
2582                left: Box::new(dummy_node()),
2583                right: Box::new(dummy_node()),
2584            },
2585            NodeKind::Ternary {
2586                condition: Box::new(dummy_node()),
2587                then_expr: Box::new(dummy_node()),
2588                else_expr: Box::new(dummy_node()),
2589            },
2590            NodeKind::Unary { op: String::new(), operand: Box::new(dummy_node()) },
2591            NodeKind::Diamond,
2592            NodeKind::Ellipsis,
2593            NodeKind::Undef,
2594            NodeKind::Readline { filehandle: None },
2595            NodeKind::Glob { pattern: String::new() },
2596            NodeKind::Typeglob { name: String::new() },
2597            NodeKind::Number { value: String::new() },
2598            NodeKind::String { value: String::new(), interpolated: false },
2599            NodeKind::Heredoc {
2600                delimiter: String::new(),
2601                content: String::new(),
2602                interpolated: false,
2603                indented: false,
2604                command: false,
2605                body_span: None,
2606            },
2607            NodeKind::ArrayLiteral { elements: vec![] },
2608            NodeKind::HashLiteral { pairs: vec![] },
2609            NodeKind::Block { statements: vec![] },
2610            NodeKind::Eval { block: Box::new(dummy_node()) },
2611            NodeKind::Do { block: Box::new(dummy_node()) },
2612            NodeKind::Defer { block: Box::new(dummy_node()) },
2613            NodeKind::Try {
2614                body: Box::new(dummy_node()),
2615                catch_blocks: vec![],
2616                finally_block: None,
2617            },
2618            NodeKind::If {
2619                condition: Box::new(dummy_node()),
2620                then_branch: Box::new(dummy_node()),
2621                elsif_branches: vec![],
2622                else_branch: None,
2623            },
2624            NodeKind::LabeledStatement { label: String::new(), statement: Box::new(dummy_node()) },
2625            NodeKind::While {
2626                condition: Box::new(dummy_node()),
2627                body: Box::new(dummy_node()),
2628                continue_block: None,
2629            },
2630            NodeKind::Tie {
2631                variable: Box::new(dummy_node()),
2632                package: Box::new(dummy_node()),
2633                args: vec![],
2634            },
2635            NodeKind::Untie { variable: Box::new(dummy_node()) },
2636            NodeKind::For {
2637                init: None,
2638                condition: None,
2639                update: None,
2640                body: Box::new(dummy_node()),
2641                continue_block: None,
2642            },
2643            NodeKind::Foreach {
2644                variable: Box::new(dummy_node()),
2645                list: Box::new(dummy_node()),
2646                body: Box::new(dummy_node()),
2647                continue_block: None,
2648            },
2649            NodeKind::Given { expr: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2650            NodeKind::When { condition: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2651            NodeKind::Default { body: Box::new(dummy_node()) },
2652            NodeKind::StatementModifier {
2653                statement: Box::new(dummy_node()),
2654                modifier: String::new(),
2655                condition: Box::new(dummy_node()),
2656            },
2657            NodeKind::Subroutine {
2658                name: None,
2659                name_span: None,
2660                prototype: None,
2661                signature: None,
2662                attributes: vec![],
2663                body: Box::new(dummy_node()),
2664            },
2665            NodeKind::Prototype { content: String::new() },
2666            NodeKind::Signature { parameters: vec![] },
2667            NodeKind::MandatoryParameter { variable: Box::new(dummy_node()) },
2668            NodeKind::OptionalParameter {
2669                variable: Box::new(dummy_node()),
2670                default_value: Box::new(dummy_node()),
2671            },
2672            NodeKind::SlurpyParameter { variable: Box::new(dummy_node()) },
2673            NodeKind::NamedParameter { variable: Box::new(dummy_node()) },
2674            NodeKind::Method {
2675                name: String::new(),
2676                signature: None,
2677                attributes: vec![],
2678                body: Box::new(dummy_node()),
2679            },
2680            NodeKind::Return { value: None },
2681            NodeKind::LoopControl { op: String::new(), label: None },
2682            NodeKind::Goto { target: Box::new(dummy_node()) },
2683            NodeKind::MethodCall {
2684                object: Box::new(dummy_node()),
2685                method: String::new(),
2686                args: vec![],
2687            },
2688            NodeKind::FunctionCall { name: String::new(), args: vec![] },
2689            NodeKind::IndirectCall {
2690                method: String::new(),
2691                object: Box::new(dummy_node()),
2692                args: vec![],
2693            },
2694            NodeKind::Regex {
2695                pattern: String::new(),
2696                replacement: None,
2697                modifiers: String::new(),
2698                has_embedded_code: false,
2699            },
2700            NodeKind::Match {
2701                expr: Box::new(dummy_node()),
2702                pattern: String::new(),
2703                modifiers: String::new(),
2704                has_embedded_code: false,
2705                negated: false,
2706            },
2707            NodeKind::Substitution {
2708                expr: Box::new(dummy_node()),
2709                pattern: String::new(),
2710                replacement: String::new(),
2711                modifiers: String::new(),
2712                has_embedded_code: false,
2713                negated: false,
2714            },
2715            NodeKind::Transliteration {
2716                expr: Box::new(dummy_node()),
2717                search: String::new(),
2718                replace: String::new(),
2719                modifiers: String::new(),
2720                negated: false,
2721            },
2722            NodeKind::Package { name: String::new(), name_span: loc, block: None },
2723            NodeKind::Use { module: String::new(), args: vec![], has_filter_risk: false },
2724            NodeKind::No { module: String::new(), args: vec![], has_filter_risk: false },
2725            NodeKind::PhaseBlock {
2726                phase: String::new(),
2727                phase_span: None,
2728                block: Box::new(dummy_node()),
2729            },
2730            NodeKind::DataSection { marker: String::new(), body: None },
2731            NodeKind::Class { name: String::new(), parents: vec![], body: Box::new(dummy_node()) },
2732            NodeKind::Format { name: String::new(), body: String::new() },
2733            NodeKind::Identifier { name: String::new() },
2734            NodeKind::Error {
2735                message: String::new(),
2736                expected: vec![],
2737                found: None,
2738                partial: None,
2739            },
2740            NodeKind::MissingExpression,
2741            NodeKind::MissingStatement,
2742            NodeKind::MissingIdentifier,
2743            NodeKind::MissingBlock,
2744            NodeKind::UnknownRest,
2745        ];
2746
2747        variants.iter().map(|v| v.kind_name()).collect()
2748    }
2749
2750    #[test]
2751    fn all_kind_names_is_consistent_with_kind_name() {
2752        let from_enum = all_kind_names_from_variants();
2753        let from_const: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2754
2755        // Check for duplicates in the const array
2756        assert_eq!(
2757            NodeKind::ALL_KIND_NAMES.len(),
2758            from_const.len(),
2759            "ALL_KIND_NAMES contains duplicates"
2760        );
2761
2762        let only_in_enum: Vec<_> = from_enum.difference(&from_const).collect();
2763        let only_in_const: Vec<_> = from_const.difference(&from_enum).collect();
2764
2765        assert!(
2766            only_in_enum.is_empty() && only_in_const.is_empty(),
2767            "ALL_KIND_NAMES is out of sync with NodeKind variants:\n  \
2768             in enum but not in ALL_KIND_NAMES: {only_in_enum:?}\n  \
2769             in ALL_KIND_NAMES but not in enum: {only_in_const:?}"
2770        );
2771    }
2772
2773    #[test]
2774    fn recovery_kind_names_is_subset_of_all() {
2775        let all: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2776        let recovery: BTreeSet<&str> = NodeKind::RECOVERY_KIND_NAMES.iter().copied().collect();
2777
2778        // No duplicates
2779        assert_eq!(
2780            NodeKind::RECOVERY_KIND_NAMES.len(),
2781            recovery.len(),
2782            "RECOVERY_KIND_NAMES contains duplicates"
2783        );
2784
2785        let not_in_all: Vec<_> = recovery.difference(&all).collect();
2786        assert!(
2787            not_in_all.is_empty(),
2788            "RECOVERY_KIND_NAMES contains entries not in ALL_KIND_NAMES: {not_in_all:?}"
2789        );
2790    }
2791}