perl_ast/ast.rs
1//! Abstract Syntax Tree definitions for Perl within the parsing and LSP workflow.
2//!
3//! This module defines the comprehensive AST node types that represent parsed Perl code
4//! during the Parse → Index → Navigate → Complete → Analyze stages. The design is optimized
5//! for both direct use in Rust analysis and for generating tree-sitter compatible
6//! S-expressions during large workspace processing operations.
7//!
8//! # LSP Workflow Integration
9//!
10//! The AST structures support Perl tooling workflows by:
11//! - **Parse**: Produced by the parser as the canonical syntax tree
12//! - **Index**: Traversed to build symbol and reference tables
13//! - **Navigate**: Provides locations for definition and reference lookups
14//! - **Complete**: Supplies context for completion, hover, and signature help
15//! - **Analyze**: Feeds semantic analysis, diagnostics, and refactoring
16//!
17//! # Performance Characteristics
18//!
19//! AST structures are optimized for large codebases with:
20//! - Memory-efficient node representation using `Box<Node>` for recursive structures
21//! - Fast pattern matching via enum variants for common Perl constructs
22//! - Location tracking for precise error reporting in large files
23//! - Cheap cloning for parallel analysis tasks
24//!
25//! # Usage Examples
26//!
27//! ## Basic AST Construction
28//!
29//! ```rust
30//! use perl_ast::{Node, NodeKind, SourceLocation};
31//!
32//! // Create a simple variable declaration node
33//! let location = SourceLocation { start: 0, end: 10 };
34//! let node = Node::new(
35//! NodeKind::VariableDeclaration {
36//! declarator: "my".to_string(),
37//! variable: Box::new(Node::new(
38//! NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
39//! location,
40//! )),
41//! attributes: vec![],
42//! initializer: None,
43//! },
44//! location,
45//! );
46//! assert_eq!(node.kind.kind_name(), "VariableDeclaration");
47//! ```
48//!
49//! ## Tree-sitter S-expression Generation
50//!
51//! ```rust
52//! use perl_ast::{Node, NodeKind, SourceLocation};
53//!
54//! let loc = SourceLocation { start: 0, end: 2 };
55//! let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
56//! let program = Node::new(NodeKind::Program { statements: vec![num] }, loc);
57//!
58//! let sexp = program.to_sexp();
59//! assert!(sexp.starts_with("(source_file"));
60//! ```
61//!
62//! ## AST Traversal and Analysis
63//!
64//! ```rust
65//! use perl_ast::{Node, NodeKind, SourceLocation};
66//!
67//! fn count_variables(node: &Node) -> usize {
68//! let mut count = 0;
69//! match &node.kind {
70//! NodeKind::Variable { .. } => count += 1,
71//! NodeKind::Program { statements } => {
72//! for stmt in statements {
73//! count += count_variables(stmt);
74//! }
75//! }
76//! _ => {} // Handle other node types as needed
77//! }
78//! count
79//! }
80//!
81//! let loc = SourceLocation { start: 0, end: 5 };
82//! let var = Node::new(
83//! NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
84//! loc,
85//! );
86//! let program = Node::new(NodeKind::Program { statements: vec![var] }, loc);
87//! assert_eq!(count_variables(&program), 1);
88//! ```
89//!
90//! ## Parsing Integration
91//!
92//! In practice the AST is produced by the parser rather than built by hand
93//! (requires `perl-parser-core`):
94//!
95//! ```rust,ignore
96//! use perl_parser_core::Parser;
97//! use perl_ast::NodeKind;
98//!
99//! let mut parser = Parser::new("my $x = 42;");
100//! let ast = parser.parse().expect("should parse");
101//! assert!(matches!(ast.kind, NodeKind::Program { .. }));
102//! ```
103
104// Re-export SourceLocation from perl-position-tracking for unified span handling
105pub use perl_position_tracking::SourceLocation;
106// Re-export Token and TokenKind from perl-token for AST error nodes
107pub use perl_token::{Token, TokenKind};
108
109/// Core AST node representing any Perl language construct within parsing workflows.
110///
111/// This is the fundamental building block for representing parsed Perl code. Each node
112/// contains both the semantic information (kind) and positional information (location)
113/// necessary for comprehensive script analysis.
114///
115/// # LSP Workflow Role
116///
117/// Nodes flow through tooling stages:
118/// - **Parse**: Created by the parser as it builds the syntax tree
119/// - **Index**: Visited to build symbol and reference tables
120/// - **Navigate**: Used to resolve definitions, references, and call hierarchy
121/// - **Complete**: Provides contextual information for completion and hover
122/// - **Analyze**: Drives semantic analysis and diagnostics
123///
124/// # Memory Optimization
125///
126/// The structure is designed for efficient memory usage during large-scale parsing:
127/// - `SourceLocation` uses compact position encoding for large files
128/// - `NodeKind` enum variants minimize memory overhead for common constructs
129/// - Clone operations are optimized for shared analysis workflows
130///
131/// # Examples
132///
133/// Construct a variable declaration node manually:
134///
135/// ```
136/// use perl_ast::{Node, NodeKind, SourceLocation};
137///
138/// let loc = SourceLocation { start: 0, end: 11 };
139/// let var = Node::new(
140/// NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
141/// loc,
142/// );
143/// let decl = Node::new(
144/// NodeKind::VariableDeclaration {
145/// declarator: "my".to_string(),
146/// variable: Box::new(var),
147/// attributes: vec![],
148/// initializer: None,
149/// },
150/// loc,
151/// );
152/// assert_eq!(decl.kind.kind_name(), "VariableDeclaration");
153/// ```
154///
155/// Typically you obtain nodes from the parser rather than constructing them by hand:
156///
157/// ```ignore
158/// use perl_parser::Parser;
159///
160/// let mut parser = Parser::new("my $x = 42;");
161/// let ast = parser.parse()?;
162/// println!("AST: {}", ast.to_sexp());
163/// ```
164#[derive(Debug, Clone, PartialEq)]
165pub struct Node {
166 /// The specific type and semantic content of this AST node
167 pub kind: NodeKind,
168 /// Source position information for error reporting and code navigation
169 pub location: SourceLocation,
170}
171
172impl Node {
173 /// Create a new AST node with the given kind and source location.
174 ///
175 /// # Examples
176 ///
177 /// ```
178 /// use perl_ast::{Node, NodeKind, SourceLocation};
179 ///
180 /// let node = Node::new(
181 /// NodeKind::Number { value: "42".to_string() },
182 /// SourceLocation { start: 0, end: 2 },
183 /// );
184 /// assert_eq!(node.kind.kind_name(), "Number");
185 /// assert_eq!(node.location.start, 0);
186 /// ```
187 pub fn new(kind: NodeKind, location: SourceLocation) -> Self {
188 Node { kind, location }
189 }
190
191 /// Convert the AST to a tree-sitter compatible S-expression.
192 ///
193 /// Produces a parenthesized representation compatible with tree-sitter's
194 /// S-expression format, useful for debugging and snapshot testing.
195 ///
196 /// # Examples
197 ///
198 /// ```
199 /// use perl_ast::{Node, NodeKind, SourceLocation};
200 ///
201 /// let loc = SourceLocation { start: 0, end: 2 };
202 /// let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
203 /// let program = Node::new(
204 /// NodeKind::Program { statements: vec![num] },
205 /// loc,
206 /// );
207 /// let sexp = program.to_sexp();
208 /// assert!(sexp.starts_with("(source_file"));
209 /// ```
210 pub fn to_sexp(&self) -> String {
211 match &self.kind {
212 NodeKind::Program { statements } => {
213 let stmts =
214 statements.iter().map(|s| s.to_sexp_inner()).collect::<Vec<_>>().join(" ");
215 format!("(source_file {})", stmts)
216 }
217
218 NodeKind::ExpressionStatement { expression } => {
219 format!("(expression_statement {})", expression.to_sexp())
220 }
221
222 NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
223 let attrs_str = if attributes.is_empty() {
224 String::new()
225 } else {
226 format!(" (attributes {})", attributes.join(" "))
227 };
228 if let Some(init) = initializer {
229 format!(
230 "({}_declaration {}{}{})",
231 declarator,
232 variable.to_sexp(),
233 attrs_str,
234 init.to_sexp()
235 )
236 } else {
237 format!("({}_declaration {}{})", declarator, variable.to_sexp(), attrs_str)
238 }
239 }
240
241 NodeKind::VariableListDeclaration {
242 declarator,
243 variables,
244 attributes,
245 initializer,
246 } => {
247 let vars = variables.iter().map(|v| v.to_sexp()).collect::<Vec<_>>().join(" ");
248 let attrs_str = if attributes.is_empty() {
249 String::new()
250 } else {
251 format!(" (attributes {})", attributes.join(" "))
252 };
253 if let Some(init) = initializer {
254 format!(
255 "({}_declaration ({}){}{})",
256 declarator,
257 vars,
258 attrs_str,
259 init.to_sexp()
260 )
261 } else {
262 format!("({}_declaration ({}){})", declarator, vars, attrs_str)
263 }
264 }
265
266 NodeKind::Variable { sigil, name } => {
267 // Format expected by bless parsing tests: (variable $ name)
268 format!("(variable {} {})", sigil, name)
269 }
270
271 NodeKind::VariableWithAttributes { variable, attributes } => {
272 let attrs = attributes.join(" ");
273 format!("({} (attributes {}))", variable.to_sexp(), attrs)
274 }
275
276 NodeKind::Assignment { lhs, rhs, op } => {
277 format!(
278 "(assignment_{} {} {})",
279 op.replace("=", "assign"),
280 lhs.to_sexp(),
281 rhs.to_sexp()
282 )
283 }
284
285 NodeKind::Binary { op, left, right } => {
286 // Tree-sitter format: (binary_op left right)
287 let op_name = format_binary_operator(op);
288 format!("({} {} {})", op_name, left.to_sexp(), right.to_sexp())
289 }
290
291 NodeKind::Ternary { condition, then_expr, else_expr } => {
292 format!(
293 "(ternary {} {} {})",
294 condition.to_sexp(),
295 then_expr.to_sexp(),
296 else_expr.to_sexp()
297 )
298 }
299
300 NodeKind::Unary { op, operand } => {
301 // Tree-sitter format: (unary_op operand)
302 let op_name = format_unary_operator(op);
303 format!("({} {})", op_name, operand.to_sexp())
304 }
305
306 NodeKind::Diamond => "(diamond)".to_string(),
307
308 NodeKind::Ellipsis => "(ellipsis)".to_string(),
309
310 NodeKind::Undef => "(undef)".to_string(),
311
312 NodeKind::Readline { filehandle } => {
313 if let Some(fh) = filehandle {
314 format!("(readline {})", fh)
315 } else {
316 "(readline)".to_string()
317 }
318 }
319
320 NodeKind::Glob { pattern } => {
321 format!("(glob {})", pattern)
322 }
323 NodeKind::Typeglob { name } => {
324 format!("(typeglob {})", name)
325 }
326
327 NodeKind::Number { value } => {
328 // Format expected by bless parsing tests: (number value)
329 format!("(number {})", value)
330 }
331
332 NodeKind::String { value, interpolated } => {
333 // Escape quotes in string value to prevent S-expression parsing issues
334 let escaped_value = value.replace('\\', "\\\\").replace('"', "\\\"");
335
336 // Format based on interpolation status
337 if *interpolated {
338 format!("(string_interpolated \"{}\")", escaped_value)
339 } else {
340 format!("(string \"{}\")", escaped_value)
341 }
342 }
343
344 NodeKind::Heredoc { delimiter, content, interpolated, indented, command, .. } => {
345 let type_str = if *command {
346 "heredoc_command"
347 } else if *indented {
348 if *interpolated { "heredoc_indented_interpolated" } else { "heredoc_indented" }
349 } else if *interpolated {
350 "heredoc_interpolated"
351 } else {
352 "heredoc"
353 };
354 format!("({} {:?} {:?})", type_str, delimiter, content)
355 }
356
357 NodeKind::ArrayLiteral { elements } => {
358 let elems = elements.iter().map(|e| e.to_sexp()).collect::<Vec<_>>().join(" ");
359 format!("(array {})", elems)
360 }
361
362 NodeKind::HashLiteral { pairs } => {
363 let kvs = pairs
364 .iter()
365 .map(|(k, v)| format!("({} {})", k.to_sexp(), v.to_sexp()))
366 .collect::<Vec<_>>()
367 .join(" ");
368 format!("(hash {})", kvs)
369 }
370
371 NodeKind::Block { statements } => {
372 let stmts = statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ");
373 format!("(block {})", stmts)
374 }
375
376 NodeKind::Eval { block } => {
377 format!("(eval {})", block.to_sexp())
378 }
379
380 NodeKind::Do { block } => {
381 format!("(do {})", block.to_sexp())
382 }
383
384 NodeKind::Try { body, catch_blocks, finally_block } => {
385 let mut parts = vec![format!("(try {})", body.to_sexp())];
386
387 for (var, block) in catch_blocks {
388 if let Some(v) = var {
389 parts.push(format!("(catch {} {})", v, block.to_sexp()));
390 } else {
391 parts.push(format!("(catch {})", block.to_sexp()));
392 }
393 }
394
395 if let Some(finally) = finally_block {
396 parts.push(format!("(finally {})", finally.to_sexp()));
397 }
398
399 parts.join(" ")
400 }
401
402 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
403 let mut parts =
404 vec![format!("(if {} {})", condition.to_sexp(), then_branch.to_sexp())];
405
406 for (cond, block) in elsif_branches {
407 parts.push(format!("(elsif {} {})", cond.to_sexp(), block.to_sexp()));
408 }
409
410 if let Some(else_block) = else_branch {
411 parts.push(format!("(else {})", else_block.to_sexp()));
412 }
413
414 parts.join(" ")
415 }
416
417 NodeKind::LabeledStatement { label, statement } => {
418 format!("(labeled_statement {} {})", label, statement.to_sexp())
419 }
420
421 NodeKind::While { condition, body, continue_block } => {
422 let mut s = format!("(while {} {})", condition.to_sexp(), body.to_sexp());
423 if let Some(cont) = continue_block {
424 s.push_str(&format!(" (continue {})", cont.to_sexp()));
425 }
426 s
427 }
428 NodeKind::Tie { variable, package, args } => {
429 let mut s = format!("(tie {} {}", variable.to_sexp(), package.to_sexp());
430 for arg in args {
431 s.push_str(&format!(" {}", arg.to_sexp()));
432 }
433 s.push(')');
434 s
435 }
436 NodeKind::Untie { variable } => {
437 format!("(untie {})", variable.to_sexp())
438 }
439 NodeKind::For { init, condition, update, body, continue_block } => {
440 let init_str =
441 init.as_ref().map(|i| i.to_sexp()).unwrap_or_else(|| "()".to_string());
442 let cond_str =
443 condition.as_ref().map(|c| c.to_sexp()).unwrap_or_else(|| "()".to_string());
444 let update_str =
445 update.as_ref().map(|u| u.to_sexp()).unwrap_or_else(|| "()".to_string());
446 let mut result =
447 format!("(for {} {} {} {})", init_str, cond_str, update_str, body.to_sexp());
448 if let Some(cont) = continue_block {
449 result.push_str(&format!(" (continue {})", cont.to_sexp()));
450 }
451 result
452 }
453
454 NodeKind::Foreach { variable, list, body, continue_block } => {
455 let cont = if let Some(cb) = continue_block {
456 format!(" {}", cb.to_sexp())
457 } else {
458 String::new()
459 };
460 format!(
461 "(foreach {} {} {}{})",
462 variable.to_sexp(),
463 list.to_sexp(),
464 body.to_sexp(),
465 cont
466 )
467 }
468
469 NodeKind::Given { expr, body } => {
470 format!("(given {} {})", expr.to_sexp(), body.to_sexp())
471 }
472
473 NodeKind::When { condition, body } => {
474 format!("(when {} {})", condition.to_sexp(), body.to_sexp())
475 }
476
477 NodeKind::Default { body } => {
478 format!("(default {})", body.to_sexp())
479 }
480
481 NodeKind::StatementModifier { statement, modifier, condition } => {
482 format!(
483 "(statement_modifier_{} {} {})",
484 modifier,
485 statement.to_sexp(),
486 condition.to_sexp()
487 )
488 }
489
490 NodeKind::Subroutine { name, prototype, signature, attributes, body, name_span: _ } => {
491 if let Some(sub_name) = name {
492 // Named subroutine - bless test expected format: (sub name () block)
493 let mut parts = vec![sub_name.clone()];
494
495 // Add attributes if present (before prototype/signature)
496 if !attributes.is_empty() {
497 for attr in attributes {
498 parts.push(format!(":{}", attr));
499 }
500 }
501
502 // Add prototype/signature - use () for empty prototype
503 if let Some(proto) = prototype {
504 parts.push(format!("({})", proto.to_sexp()));
505 } else if signature.is_some() {
506 // If there's a signature but no prototype, still show ()
507 parts.push("()".to_string());
508 } else {
509 parts.push("()".to_string());
510 }
511
512 // Add body
513 parts.push(body.to_sexp());
514
515 // Format: (sub name [attrs...] ()(block ...)) - space between name and (), no space between () and block
516 if parts.len() >= 3 && parts[parts.len() - 2] == "()" {
517 let name_and_attrs = parts[0..parts.len() - 2].join(" ");
518 let proto = &parts[parts.len() - 2];
519 let body = &parts[parts.len() - 1];
520 format!("(sub {} {}{})", name_and_attrs, proto, body)
521 } else {
522 format!("(sub {})", parts.join(" "))
523 }
524 } else {
525 // Anonymous subroutine - tree-sitter format
526 let mut parts = Vec::new();
527
528 // Add attributes if present
529 if !attributes.is_empty() {
530 let attrs: Vec<String> = attributes
531 .iter()
532 .map(|_attr| "(attribute (attribute_name))".to_string())
533 .collect();
534 parts.push(format!("(attrlist {})", attrs.join("")));
535 }
536
537 // Add prototype if present
538 if let Some(proto) = prototype {
539 parts.push(proto.to_sexp());
540 }
541
542 // Add signature if present
543 if let Some(sig) = signature {
544 parts.push(sig.to_sexp());
545 }
546
547 // Add body
548 parts.push(body.to_sexp());
549
550 format!("(anonymous_subroutine_expression {})", parts.join(""))
551 }
552 }
553
554 NodeKind::Prototype { content: _ } => "(prototype)".to_string(),
555
556 NodeKind::Signature { parameters } => {
557 let params = parameters.iter().map(|p| p.to_sexp()).collect::<Vec<_>>().join(" ");
558 format!("(signature {})", params)
559 }
560
561 NodeKind::MandatoryParameter { variable } => {
562 format!("(mandatory_parameter {})", variable.to_sexp())
563 }
564
565 NodeKind::OptionalParameter { variable, default_value } => {
566 format!("(optional_parameter {} {})", variable.to_sexp(), default_value.to_sexp())
567 }
568
569 NodeKind::SlurpyParameter { variable } => {
570 format!("(slurpy_parameter {})", variable.to_sexp())
571 }
572
573 NodeKind::NamedParameter { variable } => {
574 format!("(named_parameter {})", variable.to_sexp())
575 }
576
577 NodeKind::Method { name: _, signature, attributes, body } => {
578 let block_contents = match &body.kind {
579 NodeKind::Block { statements } => {
580 statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ")
581 }
582 _ => body.to_sexp(),
583 };
584
585 let mut parts = vec!["(bareword)".to_string()];
586
587 // Add signature if present
588 if let Some(sig) = signature {
589 parts.push(sig.to_sexp());
590 }
591
592 // Add attributes if present
593 if !attributes.is_empty() {
594 let attrs: Vec<String> = attributes
595 .iter()
596 .map(|_attr| "(attribute (attribute_name))".to_string())
597 .collect();
598 parts.push(format!("(attrlist {})", attrs.join("")));
599 }
600
601 parts.push(format!("(block {})", block_contents));
602 format!("(method_declaration_statement {})", parts.join(" "))
603 }
604
605 NodeKind::Return { value } => {
606 if let Some(val) = value {
607 format!("(return {})", val.to_sexp())
608 } else {
609 "(return)".to_string()
610 }
611 }
612
613 NodeKind::LoopControl { op, label } => {
614 if let Some(l) = label {
615 format!("({} {})", op, l)
616 } else {
617 format!("({})", op)
618 }
619 }
620
621 NodeKind::Goto { target } => {
622 format!("(goto {})", target.to_sexp())
623 }
624
625 NodeKind::MethodCall { object, method, args } => {
626 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
627 format!("(method_call {} {} ({}))", object.to_sexp(), method, args_str)
628 }
629
630 NodeKind::FunctionCall { name, args } => {
631 // Special handling for functions that should use call format in tree-sitter tests
632 if matches!(
633 name.as_str(),
634 "bless"
635 | "shift"
636 | "unshift"
637 | "open"
638 | "die"
639 | "warn"
640 | "print"
641 | "printf"
642 | "say"
643 | "push"
644 | "pop"
645 | "map"
646 | "sort"
647 | "grep"
648 | "keys"
649 | "values"
650 | "each"
651 | "defined"
652 | "scalar"
653 | "ref"
654 ) {
655 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
656 if args.is_empty() {
657 format!("(call {} ())", name)
658 } else {
659 format!("(call {} ({}))", name, args_str)
660 }
661 } else {
662 // Tree-sitter format varies by context
663 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
664 if args.is_empty() {
665 "(function_call_expression (function))".to_string()
666 } else {
667 format!("(ambiguous_function_call_expression (function) {})", args_str)
668 }
669 }
670 }
671
672 NodeKind::IndirectCall { method, object, args } => {
673 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
674 format!("(indirect_call {} {} ({}))", method, object.to_sexp(), args_str)
675 }
676
677 NodeKind::Regex { pattern, replacement, modifiers, has_embedded_code } => {
678 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
679 format!("(regex {:?} {:?} {:?}{})", pattern, replacement, modifiers, risk_marker)
680 }
681
682 NodeKind::Match { expr, pattern, modifiers, has_embedded_code, negated } => {
683 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
684 let op = if *negated { "not_match" } else { "match" };
685 format!(
686 "({} {} (regex {:?} {:?}{}))",
687 op,
688 expr.to_sexp(),
689 pattern,
690 modifiers,
691 risk_marker
692 )
693 }
694
695 NodeKind::Substitution {
696 expr,
697 pattern,
698 replacement,
699 modifiers,
700 has_embedded_code,
701 negated,
702 } => {
703 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
704 let neg_marker = if *negated { " (negated)" } else { "" };
705 format!(
706 "(substitution {} {:?} {:?} {:?}{}{})",
707 expr.to_sexp(),
708 pattern,
709 replacement,
710 modifiers,
711 risk_marker,
712 neg_marker
713 )
714 }
715
716 NodeKind::Transliteration { expr, search, replace, modifiers, negated } => {
717 let neg_marker = if *negated { " (negated)" } else { "" };
718 format!(
719 "(transliteration {} {:?} {:?} {:?}{})",
720 expr.to_sexp(),
721 search,
722 replace,
723 modifiers,
724 neg_marker
725 )
726 }
727
728 NodeKind::Package { name, block, name_span: _ } => {
729 if let Some(blk) = block {
730 format!("(package {} {})", name, blk.to_sexp())
731 } else {
732 format!("(package {})", name)
733 }
734 }
735
736 NodeKind::Use { module, args, has_filter_risk } => {
737 let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
738 if args.is_empty() {
739 format!("(use {}{})", module, risk_marker)
740 } else {
741 let args_str = args.join(" ");
742 format!("(use {} ({}){})", module, args_str, risk_marker)
743 }
744 }
745
746 NodeKind::No { module, args, has_filter_risk } => {
747 let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
748 if args.is_empty() {
749 format!("(no {}{})", module, risk_marker)
750 } else {
751 let args_str = args.join(" ");
752 format!("(no {} ({}){})", module, args_str, risk_marker)
753 }
754 }
755
756 NodeKind::PhaseBlock { phase, phase_span: _, block } => {
757 format!("({} {})", phase, block.to_sexp())
758 }
759
760 NodeKind::DataSection { marker, body } => {
761 if let Some(body_text) = body {
762 format!("(data_section {} \"{}\")", marker, body_text.escape_default())
763 } else {
764 format!("(data_section {})", marker)
765 }
766 }
767
768 NodeKind::Class { name, body } => {
769 format!("(class {} {})", name, body.to_sexp())
770 }
771
772 NodeKind::Format { name, body } => {
773 format!("(format {} {:?})", name, body)
774 }
775
776 NodeKind::Identifier { name } => {
777 // Format expected by tests: (identifier name)
778 format!("(identifier {})", name)
779 }
780
781 NodeKind::Error { message, partial, .. } => {
782 if let Some(node) = partial {
783 format!("(ERROR \"{}\" {})", message.escape_default(), node.to_sexp())
784 } else {
785 format!("(ERROR \"{}\")", message.escape_default())
786 }
787 }
788 NodeKind::MissingExpression => "(missing_expression)".to_string(),
789 NodeKind::MissingStatement => "(missing_statement)".to_string(),
790 NodeKind::MissingIdentifier => "(missing_identifier)".to_string(),
791 NodeKind::MissingBlock => "(missing_block)".to_string(),
792 NodeKind::UnknownRest => "(UNKNOWN_REST)".to_string(),
793 }
794 }
795
796 /// Convert the AST to S-expression format that unwraps expression statements in programs
797 pub fn to_sexp_inner(&self) -> String {
798 match &self.kind {
799 NodeKind::ExpressionStatement { expression } => {
800 // Check if this is an anonymous subroutine - if so, keep it wrapped
801 match &expression.kind {
802 NodeKind::Subroutine { name, .. } if name.is_none() => {
803 // Anonymous subroutine should remain wrapped in expression statement
804 self.to_sexp()
805 }
806 _ => {
807 // In the inner format, other expression statements are unwrapped
808 expression.to_sexp()
809 }
810 }
811 }
812 _ => {
813 // For all other node types, use regular to_sexp
814 self.to_sexp()
815 }
816 }
817 }
818
819 /// Call a function on every direct child node of this node.
820 ///
821 /// This enables depth-first traversal for operations like heredoc content attachment.
822 /// The closure receives a mutable reference to each child node.
823 #[inline]
824 pub fn for_each_child_mut<F: FnMut(&mut Node)>(&mut self, mut f: F) {
825 match &mut self.kind {
826 NodeKind::Tie { variable, package, args } => {
827 f(variable);
828 f(package);
829 for arg in args {
830 f(arg);
831 }
832 }
833 NodeKind::Untie { variable } => f(variable),
834
835 // Root program node
836 NodeKind::Program { statements } => {
837 for stmt in statements {
838 f(stmt);
839 }
840 }
841
842 // Statement wrappers
843 NodeKind::ExpressionStatement { expression } => f(expression),
844
845 // Variable declarations
846 NodeKind::VariableDeclaration { variable, initializer, .. } => {
847 f(variable);
848 if let Some(init) = initializer {
849 f(init);
850 }
851 }
852 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
853 for var in variables {
854 f(var);
855 }
856 if let Some(init) = initializer {
857 f(init);
858 }
859 }
860 NodeKind::VariableWithAttributes { variable, .. } => f(variable),
861
862 // Binary operations
863 NodeKind::Binary { left, right, .. } => {
864 f(left);
865 f(right);
866 }
867 NodeKind::Ternary { condition, then_expr, else_expr } => {
868 f(condition);
869 f(then_expr);
870 f(else_expr);
871 }
872 NodeKind::Unary { operand, .. } => f(operand),
873 NodeKind::Assignment { lhs, rhs, .. } => {
874 f(lhs);
875 f(rhs);
876 }
877
878 // Control flow
879 NodeKind::Block { statements } => {
880 for stmt in statements {
881 f(stmt);
882 }
883 }
884 NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
885 f(condition);
886 f(then_branch);
887 for (elsif_cond, elsif_body) in elsif_branches {
888 f(elsif_cond);
889 f(elsif_body);
890 }
891 if let Some(else_body) = else_branch {
892 f(else_body);
893 }
894 }
895 NodeKind::While { condition, body, continue_block, .. } => {
896 f(condition);
897 f(body);
898 if let Some(cont) = continue_block {
899 f(cont);
900 }
901 }
902 NodeKind::For { init, condition, update, body, continue_block, .. } => {
903 if let Some(i) = init {
904 f(i);
905 }
906 if let Some(c) = condition {
907 f(c);
908 }
909 if let Some(u) = update {
910 f(u);
911 }
912 f(body);
913 if let Some(cont) = continue_block {
914 f(cont);
915 }
916 }
917 NodeKind::Foreach { variable, list, body, continue_block } => {
918 f(variable);
919 f(list);
920 f(body);
921 if let Some(cb) = continue_block {
922 f(cb);
923 }
924 }
925 NodeKind::Given { expr, body } => {
926 f(expr);
927 f(body);
928 }
929 NodeKind::When { condition, body } => {
930 f(condition);
931 f(body);
932 }
933 NodeKind::Default { body } => f(body),
934 NodeKind::StatementModifier { statement, condition, .. } => {
935 f(statement);
936 f(condition);
937 }
938 NodeKind::LabeledStatement { statement, .. } => f(statement),
939
940 // Eval and Do blocks
941 NodeKind::Eval { block } => f(block),
942 NodeKind::Do { block } => f(block),
943 NodeKind::Try { body, catch_blocks, finally_block } => {
944 f(body);
945 for (_, catch_body) in catch_blocks {
946 f(catch_body);
947 }
948 if let Some(finally) = finally_block {
949 f(finally);
950 }
951 }
952
953 // Function calls
954 NodeKind::FunctionCall { args, .. } => {
955 for arg in args {
956 f(arg);
957 }
958 }
959 NodeKind::MethodCall { object, args, .. } => {
960 f(object);
961 for arg in args {
962 f(arg);
963 }
964 }
965 NodeKind::IndirectCall { object, args, .. } => {
966 f(object);
967 for arg in args {
968 f(arg);
969 }
970 }
971
972 // Functions
973 NodeKind::Subroutine { prototype, signature, body, .. } => {
974 if let Some(proto) = prototype {
975 f(proto);
976 }
977 if let Some(sig) = signature {
978 f(sig);
979 }
980 f(body);
981 }
982 NodeKind::Method { signature, body, .. } => {
983 if let Some(sig) = signature {
984 f(sig);
985 }
986 f(body);
987 }
988 NodeKind::Return { value } => {
989 if let Some(v) = value {
990 f(v);
991 }
992 }
993 NodeKind::Goto { target } => f(target),
994 NodeKind::Signature { parameters } => {
995 for param in parameters {
996 f(param);
997 }
998 }
999 NodeKind::MandatoryParameter { variable } => f(variable),
1000 NodeKind::OptionalParameter { variable, default_value } => {
1001 f(variable);
1002 f(default_value);
1003 }
1004 NodeKind::SlurpyParameter { variable } => f(variable),
1005 NodeKind::NamedParameter { variable } => f(variable),
1006
1007 // Pattern matching
1008 NodeKind::Match { expr, .. } => f(expr),
1009 NodeKind::Substitution { expr, .. } => f(expr),
1010 NodeKind::Transliteration { expr, .. } => f(expr),
1011
1012 // Containers
1013 NodeKind::ArrayLiteral { elements } => {
1014 for elem in elements {
1015 f(elem);
1016 }
1017 }
1018 NodeKind::HashLiteral { pairs } => {
1019 for (key, value) in pairs {
1020 f(key);
1021 f(value);
1022 }
1023 }
1024
1025 // Package system
1026 NodeKind::Package { block, .. } => {
1027 if let Some(b) = block {
1028 f(b);
1029 }
1030 }
1031 NodeKind::PhaseBlock { block, .. } => f(block),
1032 NodeKind::Class { body, .. } => f(body),
1033
1034 // Error node might have a partial valid tree
1035 NodeKind::Error { partial, .. } => {
1036 if let Some(node) = partial {
1037 f(node);
1038 }
1039 }
1040
1041 // Leaf nodes (no children to traverse)
1042 NodeKind::Variable { .. }
1043 | NodeKind::Identifier { .. }
1044 | NodeKind::Number { .. }
1045 | NodeKind::String { .. }
1046 | NodeKind::Heredoc { .. }
1047 | NodeKind::Regex { .. }
1048 | NodeKind::Readline { .. }
1049 | NodeKind::Glob { .. }
1050 | NodeKind::Typeglob { .. }
1051 | NodeKind::Diamond
1052 | NodeKind::Ellipsis
1053 | NodeKind::Undef
1054 | NodeKind::Use { .. }
1055 | NodeKind::No { .. }
1056 | NodeKind::Prototype { .. }
1057 | NodeKind::DataSection { .. }
1058 | NodeKind::Format { .. }
1059 | NodeKind::LoopControl { .. }
1060 | NodeKind::MissingExpression
1061 | NodeKind::MissingStatement
1062 | NodeKind::MissingIdentifier
1063 | NodeKind::MissingBlock
1064 | NodeKind::UnknownRest => {}
1065 }
1066 }
1067
1068 /// Call a function on every direct child node of this node (immutable version).
1069 ///
1070 /// This enables depth-first traversal for read-only operations like AST analysis.
1071 /// The closure receives an immutable reference to each child node.
1072 #[inline]
1073 pub fn for_each_child<'a, F: FnMut(&'a Node)>(&'a self, mut f: F) {
1074 match &self.kind {
1075 NodeKind::Tie { variable, package, args } => {
1076 f(variable);
1077 f(package);
1078 for arg in args {
1079 f(arg);
1080 }
1081 }
1082 NodeKind::Untie { variable } => f(variable),
1083
1084 // Root program node
1085 NodeKind::Program { statements } => {
1086 for stmt in statements {
1087 f(stmt);
1088 }
1089 }
1090
1091 // Statement wrappers
1092 NodeKind::ExpressionStatement { expression } => f(expression),
1093
1094 // Variable declarations
1095 NodeKind::VariableDeclaration { variable, initializer, .. } => {
1096 f(variable);
1097 if let Some(init) = initializer {
1098 f(init);
1099 }
1100 }
1101 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
1102 for var in variables {
1103 f(var);
1104 }
1105 if let Some(init) = initializer {
1106 f(init);
1107 }
1108 }
1109 NodeKind::VariableWithAttributes { variable, .. } => f(variable),
1110
1111 // Binary operations
1112 NodeKind::Binary { left, right, .. } => {
1113 f(left);
1114 f(right);
1115 }
1116 NodeKind::Ternary { condition, then_expr, else_expr } => {
1117 f(condition);
1118 f(then_expr);
1119 f(else_expr);
1120 }
1121 NodeKind::Unary { operand, .. } => f(operand),
1122 NodeKind::Assignment { lhs, rhs, .. } => {
1123 f(lhs);
1124 f(rhs);
1125 }
1126
1127 // Control flow
1128 NodeKind::Block { statements } => {
1129 for stmt in statements {
1130 f(stmt);
1131 }
1132 }
1133 NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
1134 f(condition);
1135 f(then_branch);
1136 for (elsif_cond, elsif_body) in elsif_branches {
1137 f(elsif_cond);
1138 f(elsif_body);
1139 }
1140 if let Some(else_body) = else_branch {
1141 f(else_body);
1142 }
1143 }
1144 NodeKind::While { condition, body, continue_block, .. } => {
1145 f(condition);
1146 f(body);
1147 if let Some(cont) = continue_block {
1148 f(cont);
1149 }
1150 }
1151 NodeKind::For { init, condition, update, body, continue_block, .. } => {
1152 if let Some(i) = init {
1153 f(i);
1154 }
1155 if let Some(c) = condition {
1156 f(c);
1157 }
1158 if let Some(u) = update {
1159 f(u);
1160 }
1161 f(body);
1162 if let Some(cont) = continue_block {
1163 f(cont);
1164 }
1165 }
1166 NodeKind::Foreach { variable, list, body, continue_block } => {
1167 f(variable);
1168 f(list);
1169 f(body);
1170 if let Some(cb) = continue_block {
1171 f(cb);
1172 }
1173 }
1174 NodeKind::Given { expr, body } => {
1175 f(expr);
1176 f(body);
1177 }
1178 NodeKind::When { condition, body } => {
1179 f(condition);
1180 f(body);
1181 }
1182 NodeKind::Default { body } => f(body),
1183 NodeKind::StatementModifier { statement, condition, .. } => {
1184 f(statement);
1185 f(condition);
1186 }
1187 NodeKind::LabeledStatement { statement, .. } => f(statement),
1188
1189 // Eval and Do blocks
1190 NodeKind::Eval { block } => f(block),
1191 NodeKind::Do { block } => f(block),
1192 NodeKind::Try { body, catch_blocks, finally_block } => {
1193 f(body);
1194 for (_, catch_body) in catch_blocks {
1195 f(catch_body);
1196 }
1197 if let Some(finally) = finally_block {
1198 f(finally);
1199 }
1200 }
1201
1202 // Function calls
1203 NodeKind::FunctionCall { args, .. } => {
1204 for arg in args {
1205 f(arg);
1206 }
1207 }
1208 NodeKind::MethodCall { object, args, .. } => {
1209 f(object);
1210 for arg in args {
1211 f(arg);
1212 }
1213 }
1214 NodeKind::IndirectCall { object, args, .. } => {
1215 f(object);
1216 for arg in args {
1217 f(arg);
1218 }
1219 }
1220
1221 // Functions
1222 NodeKind::Subroutine { prototype, signature, body, .. } => {
1223 if let Some(proto) = prototype {
1224 f(proto);
1225 }
1226 if let Some(sig) = signature {
1227 f(sig);
1228 }
1229 f(body);
1230 }
1231 NodeKind::Method { signature, body, .. } => {
1232 if let Some(sig) = signature {
1233 f(sig);
1234 }
1235 f(body);
1236 }
1237 NodeKind::Return { value } => {
1238 if let Some(v) = value {
1239 f(v);
1240 }
1241 }
1242 NodeKind::Goto { target } => f(target),
1243 NodeKind::Signature { parameters } => {
1244 for param in parameters {
1245 f(param);
1246 }
1247 }
1248 NodeKind::MandatoryParameter { variable } => f(variable),
1249 NodeKind::OptionalParameter { variable, default_value } => {
1250 f(variable);
1251 f(default_value);
1252 }
1253 NodeKind::SlurpyParameter { variable } => f(variable),
1254 NodeKind::NamedParameter { variable } => f(variable),
1255
1256 // Pattern matching
1257 NodeKind::Match { expr, .. } => f(expr),
1258 NodeKind::Substitution { expr, .. } => f(expr),
1259 NodeKind::Transliteration { expr, .. } => f(expr),
1260
1261 // Containers
1262 NodeKind::ArrayLiteral { elements } => {
1263 for elem in elements {
1264 f(elem);
1265 }
1266 }
1267 NodeKind::HashLiteral { pairs } => {
1268 for (key, value) in pairs {
1269 f(key);
1270 f(value);
1271 }
1272 }
1273
1274 // Package system
1275 NodeKind::Package { block, .. } => {
1276 if let Some(b) = block {
1277 f(b);
1278 }
1279 }
1280 NodeKind::PhaseBlock { block, .. } => f(block),
1281 NodeKind::Class { body, .. } => f(body),
1282
1283 // Error node might have a partial valid tree
1284 NodeKind::Error { partial, .. } => {
1285 if let Some(node) = partial {
1286 f(node);
1287 }
1288 }
1289
1290 // Leaf nodes (no children to traverse)
1291 NodeKind::Variable { .. }
1292 | NodeKind::Identifier { .. }
1293 | NodeKind::Number { .. }
1294 | NodeKind::String { .. }
1295 | NodeKind::Heredoc { .. }
1296 | NodeKind::Regex { .. }
1297 | NodeKind::Readline { .. }
1298 | NodeKind::Glob { .. }
1299 | NodeKind::Typeglob { .. }
1300 | NodeKind::Diamond
1301 | NodeKind::Ellipsis
1302 | NodeKind::Undef
1303 | NodeKind::Use { .. }
1304 | NodeKind::No { .. }
1305 | NodeKind::Prototype { .. }
1306 | NodeKind::DataSection { .. }
1307 | NodeKind::Format { .. }
1308 | NodeKind::LoopControl { .. }
1309 | NodeKind::MissingExpression
1310 | NodeKind::MissingStatement
1311 | NodeKind::MissingIdentifier
1312 | NodeKind::MissingBlock
1313 | NodeKind::UnknownRest => {}
1314 }
1315 }
1316
1317 /// Count the total number of nodes in this subtree (inclusive).
1318 ///
1319 /// # Examples
1320 ///
1321 /// ```
1322 /// use perl_ast::{Node, NodeKind, SourceLocation};
1323 ///
1324 /// let loc = SourceLocation { start: 0, end: 1 };
1325 /// let leaf = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1326 /// assert_eq!(leaf.count_nodes(), 1);
1327 ///
1328 /// let program = Node::new(
1329 /// NodeKind::Program { statements: vec![leaf] },
1330 /// loc,
1331 /// );
1332 /// assert_eq!(program.count_nodes(), 2);
1333 /// ```
1334 pub fn count_nodes(&self) -> usize {
1335 let mut count = 1;
1336 self.for_each_child(|child| {
1337 count += child.count_nodes();
1338 });
1339 count
1340 }
1341
1342 /// Collect direct child nodes into a vector for convenience APIs.
1343 ///
1344 /// # Examples
1345 ///
1346 /// ```
1347 /// use perl_ast::{Node, NodeKind, SourceLocation};
1348 ///
1349 /// let loc = SourceLocation { start: 0, end: 1 };
1350 /// let stmt = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1351 /// let program = Node::new(
1352 /// NodeKind::Program { statements: vec![stmt] },
1353 /// loc,
1354 /// );
1355 /// assert_eq!(program.children().len(), 1);
1356 /// ```
1357 #[inline]
1358 pub fn children(&self) -> Vec<&Node> {
1359 let mut children = Vec::new();
1360 self.for_each_child(|child| children.push(child));
1361 children
1362 }
1363
1364 /// Get the first direct child node, if any.
1365 ///
1366 /// Optimized to avoid allocating the children vector.
1367 #[inline]
1368 pub fn first_child(&self) -> Option<&Node> {
1369 let mut result = None;
1370 self.for_each_child(|child| {
1371 if result.is_none() {
1372 result = Some(child);
1373 }
1374 });
1375 result
1376 }
1377}
1378
1379/// Comprehensive enumeration of all Perl language constructs supported by the parser.
1380///
1381/// This enum represents every possible AST node type that can be parsed from Perl code
1382/// during the Parse → Index → Navigate → Complete → Analyze workflow. Each variant captures
1383/// the semantic meaning and structural relationships needed for complete script analysis
1384/// and transformation.
1385///
1386/// # LSP Workflow Integration
1387///
1388/// Node kinds are processed differently across workflow stages:
1389/// - **Parse**: All variants are produced by the parser
1390/// - **Index**: Symbol-bearing variants feed workspace indexing
1391/// - **Navigate**: Call and reference variants support navigation features
1392/// - **Complete**: Expression variants provide completion context
1393/// - **Analyze**: Semantic variants drive diagnostics and refactoring
1394///
1395/// # Examples
1396///
1397/// Pattern-match on node kinds to extract semantic information:
1398///
1399/// ```
1400/// use perl_ast::{Node, NodeKind, SourceLocation};
1401///
1402/// let loc = SourceLocation { start: 0, end: 5 };
1403/// let node = Node::new(
1404/// NodeKind::Variable { sigil: "$".to_string(), name: "foo".to_string() },
1405/// loc,
1406/// );
1407///
1408/// match &node.kind {
1409/// NodeKind::Variable { sigil, name } => {
1410/// assert_eq!(sigil, "$");
1411/// assert_eq!(name, "foo");
1412/// }
1413/// _ => panic!("expected Variable"),
1414/// }
1415/// ```
1416///
1417/// Use [`kind_name()`](NodeKind::kind_name) for debugging and diagnostics:
1418///
1419/// ```
1420/// use perl_ast::NodeKind;
1421///
1422/// let kind = NodeKind::Number { value: "99".to_string() };
1423/// assert_eq!(kind.kind_name(), "Number");
1424///
1425/// let kind = NodeKind::Variable { sigil: "@".to_string(), name: "list".to_string() };
1426/// assert_eq!(kind.kind_name(), "Variable");
1427/// ```
1428///
1429/// # Performance Considerations
1430///
1431/// The enum design optimizes for large codebases:
1432/// - Box pointers minimize stack usage for recursive structures
1433/// - Vector storage enables efficient bulk operations on child nodes
1434/// - Clone operations optimized for concurrent analysis workflows
1435/// - Pattern matching performance tuned for common Perl constructs
1436#[derive(Debug, Clone, PartialEq)]
1437pub enum NodeKind {
1438 /// Top-level program containing all statements in an Perl script
1439 ///
1440 /// This is the root node for any parsed Perl script content, containing all
1441 /// top-level statements found during the Parse stage of LSP workflow.
1442 Program {
1443 /// All top-level statements in the Perl script
1444 statements: Vec<Node>,
1445 },
1446
1447 /// Statement wrapper for expressions that appear at statement level
1448 ///
1449 /// Used during Analyze stage to distinguish between expressions used as
1450 /// statements versus expressions within other contexts during Perl parsing.
1451 ExpressionStatement {
1452 /// The expression being used as a statement
1453 expression: Box<Node>,
1454 },
1455
1456 /// Variable declaration with scope declarator in Perl script processing
1457 ///
1458 /// Represents declarations like `my $var`, `our $global`, `local $dynamic`, etc.
1459 /// Critical for Analyze stage symbol table construction during Perl parsing.
1460 VariableDeclaration {
1461 /// Scope declarator: "my", "our", "local", "state"
1462 declarator: String,
1463 /// The variable being declared
1464 variable: Box<Node>,
1465 /// Variable attributes (e.g., ":shared", ":locked")
1466 attributes: Vec<String>,
1467 /// Optional initializer expression
1468 initializer: Option<Box<Node>>,
1469 },
1470
1471 /// Multiple variable declaration in a single statement
1472 ///
1473 /// Handles constructs like `my ($x, $y) = @values` common in Perl script processing.
1474 /// Supports efficient bulk variable analysis during Navigate stage operations.
1475 VariableListDeclaration {
1476 /// Scope declarator for all variables in the list
1477 declarator: String,
1478 /// All variables being declared in the list
1479 variables: Vec<Node>,
1480 /// Attributes applied to the variable list
1481 attributes: Vec<String>,
1482 /// Optional initializer for the entire variable list
1483 initializer: Option<Box<Node>>,
1484 },
1485
1486 /// Perl variable reference (scalar, array, hash, etc.) in Perl parsing workflow
1487 Variable {
1488 /// Variable sigil indicating type: $, @, %, &, *
1489 sigil: String, // $, @, %, &, *
1490 /// Variable name without sigil
1491 name: String,
1492 },
1493
1494 /// Variable with additional attributes for enhanced LSP workflow
1495 VariableWithAttributes {
1496 /// The base variable node
1497 variable: Box<Node>,
1498 /// List of attribute names applied to the variable
1499 attributes: Vec<String>,
1500 },
1501
1502 /// Assignment operation for LSP data processing workflows
1503 Assignment {
1504 /// Left-hand side of assignment
1505 lhs: Box<Node>,
1506 /// Right-hand side of assignment
1507 rhs: Box<Node>,
1508 /// Assignment operator: =, +=, -=, etc.
1509 op: String, // =, +=, -=, etc.
1510 },
1511
1512 // Expressions
1513 /// Binary operation for Perl parsing workflow calculations
1514 Binary {
1515 /// Binary operator
1516 op: String,
1517 /// Left operand
1518 left: Box<Node>,
1519 /// Right operand
1520 right: Box<Node>,
1521 },
1522
1523 /// Ternary conditional expression for Perl parsing workflow logic
1524 Ternary {
1525 /// Condition to evaluate
1526 condition: Box<Node>,
1527 /// Expression when condition is true
1528 then_expr: Box<Node>,
1529 /// Expression when condition is false
1530 else_expr: Box<Node>,
1531 },
1532
1533 /// Unary operation for Perl parsing workflow
1534 Unary {
1535 /// Unary operator
1536 op: String,
1537 /// Operand to apply operator to
1538 operand: Box<Node>,
1539 },
1540
1541 // I/O operations
1542 /// Diamond operator for file input in Perl parsing workflow
1543 Diamond, // <>
1544
1545 /// Ellipsis operator for Perl parsing workflow
1546 Ellipsis, // ...
1547
1548 /// Undef value for Perl parsing workflow
1549 Undef, // undef
1550
1551 /// Readline operation for LSP file processing
1552 Readline {
1553 /// Optional filehandle: `<STDIN>`, `<$fh>`, etc.
1554 filehandle: Option<String>, // <STDIN>, <$fh>, etc.
1555 },
1556
1557 /// Glob pattern for LSP workspace file matching
1558 Glob {
1559 /// Pattern string for file matching
1560 pattern: String, // <*.txt>
1561 },
1562
1563 /// Typeglob expression: `*foo` or `*main::bar`
1564 ///
1565 /// Provides access to all symbol table entries for a given name.
1566 Typeglob {
1567 /// Name of the symbol (including package qualification)
1568 name: String,
1569 },
1570
1571 /// Numeric literal in Perl code (integer, float, hex, octal, binary)
1572 ///
1573 /// Represents all numeric literal forms: `42`, `3.14`, `0x1A`, `0o755`, `0b1010`.
1574 Number {
1575 /// String representation preserving original format
1576 value: String,
1577 },
1578
1579 /// String literal with optional interpolation
1580 ///
1581 /// Handles both single-quoted (`'literal'`) and double-quoted (`"$interpolated"`) strings.
1582 String {
1583 /// String content (after quote processing)
1584 value: String,
1585 /// Whether the string supports variable interpolation
1586 interpolated: bool,
1587 },
1588
1589 /// Heredoc string literal for multi-line content
1590 ///
1591 /// Supports all heredoc forms: `<<EOF`, `<<'EOF'`, `<<"EOF"`, `<<~EOF` (indented).
1592 Heredoc {
1593 /// Delimiter marking heredoc boundaries
1594 delimiter: String,
1595 /// Content between delimiters
1596 content: String,
1597 /// Whether content supports variable interpolation
1598 interpolated: bool,
1599 /// Whether leading whitespace is stripped (<<~ form)
1600 indented: bool,
1601 /// Whether this is a command execution heredoc (<<`EOF`)
1602 command: bool,
1603 /// Body span for breakpoint detection (populated by drain_pending_heredocs)
1604 body_span: Option<SourceLocation>,
1605 },
1606
1607 /// Array literal expression: `(1, 2, 3)` or `[1, 2, 3]`
1608 ArrayLiteral {
1609 /// Elements in the array
1610 elements: Vec<Node>,
1611 },
1612
1613 /// Hash literal expression: `(key => 'value')` or `{key => 'value'}`
1614 HashLiteral {
1615 /// Key-value pairs in the hash
1616 pairs: Vec<(Node, Node)>,
1617 },
1618
1619 /// Block of statements: `{ ... }`
1620 ///
1621 /// Used for control structures, subroutine bodies, and bare blocks.
1622 Block {
1623 /// Statements within the block
1624 statements: Vec<Node>,
1625 },
1626
1627 /// Eval block for exception handling: `eval { ... }`
1628 Eval {
1629 /// Block to evaluate with exception trapping
1630 block: Box<Node>,
1631 },
1632
1633 /// Do block for file inclusion or expression evaluation: `do { ... }` or `do "file"`
1634 Do {
1635 /// Block to execute or file expression
1636 block: Box<Node>,
1637 },
1638
1639 /// Try-catch-finally for modern exception handling (Syntax::Keyword::Try style)
1640 Try {
1641 /// Try block body
1642 body: Box<Node>,
1643 /// Catch blocks: (optional exception variable, handler block)
1644 catch_blocks: Vec<(Option<String>, Box<Node>)>,
1645 /// Optional finally block
1646 finally_block: Option<Box<Node>>,
1647 },
1648
1649 /// If-elsif-else conditional statement
1650 If {
1651 /// Condition expression
1652 condition: Box<Node>,
1653 /// Then branch block
1654 then_branch: Box<Node>,
1655 /// Elsif branches: (condition, block) pairs
1656 elsif_branches: Vec<(Box<Node>, Box<Node>)>,
1657 /// Optional else branch
1658 else_branch: Option<Box<Node>>,
1659 },
1660
1661 /// Statement with a label for loop control: `LABEL: while (...)`
1662 LabeledStatement {
1663 /// Label name (e.g., "OUTER", "LINE")
1664 label: String,
1665 /// Labeled statement (typically a loop)
1666 statement: Box<Node>,
1667 },
1668
1669 /// While loop: `while (condition) { ... }`
1670 While {
1671 /// Loop condition
1672 condition: Box<Node>,
1673 /// Loop body
1674 body: Box<Node>,
1675 /// Optional continue block
1676 continue_block: Option<Box<Node>>,
1677 },
1678
1679 /// Tie operation for binding variables to objects: `tie %hash, 'Package', @args`
1680 Tie {
1681 /// Variable being tied
1682 variable: Box<Node>,
1683 /// Class/package name to tie to
1684 package: Box<Node>,
1685 /// Arguments passed to TIE* method
1686 args: Vec<Node>,
1687 },
1688
1689 /// Untie operation for unbinding variables: `untie %hash`
1690 Untie {
1691 /// Variable being untied
1692 variable: Box<Node>,
1693 },
1694
1695 /// C-style for loop: `for (init; cond; update) { ... }`
1696 For {
1697 /// Initialization expression
1698 init: Option<Box<Node>>,
1699 /// Loop condition
1700 condition: Option<Box<Node>>,
1701 /// Update expression
1702 update: Option<Box<Node>>,
1703 /// Loop body
1704 body: Box<Node>,
1705 /// Optional continue block
1706 continue_block: Option<Box<Node>>,
1707 },
1708
1709 /// Foreach loop: `foreach my $item (@list) { ... }`
1710 Foreach {
1711 /// Iterator variable
1712 variable: Box<Node>,
1713 /// List to iterate
1714 list: Box<Node>,
1715 /// Loop body
1716 body: Box<Node>,
1717 /// Optional continue block
1718 continue_block: Option<Box<Node>>,
1719 },
1720
1721 /// Given statement for switch-like matching (Perl 5.10+)
1722 Given {
1723 /// Expression to match against
1724 expr: Box<Node>,
1725 /// Body containing when/default blocks
1726 body: Box<Node>,
1727 },
1728
1729 /// When clause in given/switch: `when ($pattern) { ... }`
1730 When {
1731 /// Pattern to match
1732 condition: Box<Node>,
1733 /// Handler block
1734 body: Box<Node>,
1735 },
1736
1737 /// Default clause in given/switch: `default { ... }`
1738 Default {
1739 /// Handler block for unmatched cases
1740 body: Box<Node>,
1741 },
1742
1743 /// Statement modifier syntax: `print "ok" if $condition`
1744 StatementModifier {
1745 /// Statement to conditionally execute
1746 statement: Box<Node>,
1747 /// Modifier keyword: if, unless, while, until, for, foreach
1748 modifier: String,
1749 /// Modifier condition
1750 condition: Box<Node>,
1751 },
1752
1753 // Functions
1754 /// Subroutine declaration (function) including name, prototype, signature and body.
1755 Subroutine {
1756 /// Name of the subroutine
1757 ///
1758 /// # Precise Navigation Support
1759 /// - Added name_span for exact LSP navigation
1760 /// - Enables precise go-to-definition and hover behavior
1761 /// - O(1) span lookup in workspace symbols
1762 ///
1763 /// ## Integration Points
1764 /// - Semantic token providers
1765 /// - Cross-reference generation
1766 /// - Symbol renaming
1767 name: Option<String>,
1768
1769 /// Source location span of the subroutine name
1770 ///
1771 /// ## Usage Notes
1772 /// - Always corresponds to the name field
1773 /// - Provides constant-time position information
1774 /// - Essential for precise editor interactions
1775 name_span: Option<SourceLocation>,
1776
1777 /// Optional prototype node (e.g. `($;@)`).
1778 prototype: Option<Box<Node>>,
1779 /// Optional signature node (Perl 5.20+ feature).
1780 signature: Option<Box<Node>>,
1781 /// Attributes attached to the subroutine (`:lvalue`, etc.).
1782 attributes: Vec<String>,
1783 /// The body block of the subroutine.
1784 body: Box<Node>,
1785 },
1786
1787 /// Subroutine prototype specification: `sub foo ($;@) { ... }`
1788 Prototype {
1789 /// Prototype string defining argument behavior
1790 content: String,
1791 },
1792
1793 /// Subroutine signature (Perl 5.20+): `sub foo ($x, $y = 0) { ... }`
1794 Signature {
1795 /// List of signature parameters
1796 parameters: Vec<Node>,
1797 },
1798
1799 /// Mandatory signature parameter: `$x` in `sub foo ($x) { }`
1800 MandatoryParameter {
1801 /// Variable being bound
1802 variable: Box<Node>,
1803 },
1804
1805 /// Optional signature parameter with default: `$y = 0` in `sub foo ($y = 0) { }`
1806 OptionalParameter {
1807 /// Variable being bound
1808 variable: Box<Node>,
1809 /// Default value expression
1810 default_value: Box<Node>,
1811 },
1812
1813 /// Slurpy parameter collecting remaining args: `@rest` or `%opts` in signature
1814 SlurpyParameter {
1815 /// Array or hash variable to receive remaining arguments
1816 variable: Box<Node>,
1817 },
1818
1819 /// Named parameter placeholder in signature (future Perl feature)
1820 NamedParameter {
1821 /// Variable for named parameter binding
1822 variable: Box<Node>,
1823 },
1824
1825 /// Method declaration (Perl 5.38+ with `use feature 'class'`)
1826 Method {
1827 /// Method name
1828 name: String,
1829 /// Optional signature
1830 signature: Option<Box<Node>>,
1831 /// Method attributes (e.g., `:lvalue`)
1832 attributes: Vec<String>,
1833 /// Method body
1834 body: Box<Node>,
1835 },
1836
1837 /// Return statement: `return;` or `return $value;`
1838 Return {
1839 /// Optional return value
1840 value: Option<Box<Node>>,
1841 },
1842
1843 /// Loop control statement: `next`, `last`, or `redo`
1844 LoopControl {
1845 /// Control keyword: "next", "last", or "redo"
1846 op: String,
1847 /// Optional label: `next LABEL`
1848 label: Option<String>,
1849 },
1850
1851 /// Goto statement: `goto LABEL`, `goto &sub`, or `goto $expr`
1852 Goto {
1853 /// The target of the goto (label identifier, sub reference, or expression)
1854 target: Box<Node>,
1855 },
1856
1857 /// Method call: `$obj->method(@args)` or `$obj->method`
1858 MethodCall {
1859 /// Object or class expression
1860 object: Box<Node>,
1861 /// Method name being called
1862 method: String,
1863 /// Method arguments
1864 args: Vec<Node>,
1865 },
1866
1867 /// Function call: `foo(@args)` or `foo()`
1868 FunctionCall {
1869 /// Function name (may be qualified: `Package::func`)
1870 name: String,
1871 /// Function arguments
1872 args: Vec<Node>,
1873 },
1874
1875 /// Indirect object call (legacy syntax): `new Class @args`
1876 IndirectCall {
1877 /// Method name
1878 method: String,
1879 /// Object or class
1880 object: Box<Node>,
1881 /// Arguments
1882 args: Vec<Node>,
1883 },
1884
1885 /// Regex literal: `/pattern/modifiers` or `qr/pattern/modifiers`
1886 Regex {
1887 /// Regular expression pattern
1888 pattern: String,
1889 /// Replacement string (for s/// when parsed as regex)
1890 replacement: Option<String>,
1891 /// Regex modifiers (i, m, s, x, g, etc.)
1892 modifiers: String,
1893 /// Whether the regex contains embedded code `(?{...})`
1894 has_embedded_code: bool,
1895 },
1896
1897 /// Match operation: `$str =~ /pattern/modifiers` or `$str !~ /pattern/modifiers`
1898 Match {
1899 /// Expression to match against
1900 expr: Box<Node>,
1901 /// Pattern to match
1902 pattern: String,
1903 /// Match modifiers
1904 modifiers: String,
1905 /// Whether the regex contains embedded code `(?{...})`
1906 has_embedded_code: bool,
1907 /// Whether the binding operator was `!~` (negated match)
1908 negated: bool,
1909 },
1910
1911 /// Substitution operation: `$str =~ s/pattern/replacement/modifiers`
1912 Substitution {
1913 /// Expression to substitute in
1914 expr: Box<Node>,
1915 /// Pattern to find
1916 pattern: String,
1917 /// Replacement string
1918 replacement: String,
1919 /// Substitution modifiers (g, e, r, etc.)
1920 modifiers: String,
1921 /// Whether the regex contains embedded code `(?{...})`
1922 has_embedded_code: bool,
1923 /// Whether the binding operator was `!~` (negated match)
1924 negated: bool,
1925 },
1926
1927 /// Transliteration operation: `$str =~ tr/search/replace/` or `y///`
1928 Transliteration {
1929 /// Expression to transliterate
1930 expr: Box<Node>,
1931 /// Characters to search for
1932 search: String,
1933 /// Replacement characters
1934 replace: String,
1935 /// Transliteration modifiers (c, d, s, r)
1936 modifiers: String,
1937 /// Whether the binding operator was `!~` (negated match)
1938 negated: bool,
1939 },
1940
1941 // Package system
1942 /// Package declaration (e.g. `package Foo;`) and optional inline block form.
1943 Package {
1944 /// Name of the package
1945 ///
1946 /// # Precise Navigation Support
1947 /// - Added name_span for exact LSP navigation
1948 /// - Enables precise go-to-definition and hover behavior
1949 /// - O(1) span lookup in workspace symbols
1950 ///
1951 /// ## Integration Points
1952 /// - Workspace indexing
1953 /// - Cross-module symbol resolution
1954 /// - Code action providers
1955 name: String,
1956
1957 /// Source location span of the package name
1958 ///
1959 /// ## Usage Notes
1960 /// - Always corresponds to the name field
1961 /// - Provides constant-time position information
1962 /// - Essential for precise editor interactions
1963 name_span: SourceLocation,
1964
1965 /// Optional inline block for `package Foo { ... }` declarations.
1966 block: Option<Box<Node>>,
1967 },
1968
1969 /// Use statement for module loading: `use Module qw(imports);`
1970 Use {
1971 /// Module name to load
1972 module: String,
1973 /// Import arguments (symbols to import)
1974 args: Vec<String>,
1975 /// Whether this module is a known source filter (security risk)
1976 has_filter_risk: bool,
1977 },
1978
1979 /// No statement for disabling features: `no strict;`
1980 No {
1981 /// Module/pragma name to disable
1982 module: String,
1983 /// Arguments for the no statement
1984 args: Vec<String>,
1985 /// Whether this module is a known source filter (security risk)
1986 has_filter_risk: bool,
1987 },
1988
1989 /// Phase block for compile/runtime hooks: `BEGIN`, `END`, `CHECK`, `INIT`, `UNITCHECK`
1990 PhaseBlock {
1991 /// Phase name: BEGIN, END, CHECK, INIT, UNITCHECK
1992 phase: String,
1993 /// Source location span of the phase block name for precise navigation
1994 phase_span: Option<SourceLocation>,
1995 /// Block to execute during the specified phase
1996 block: Box<Node>,
1997 },
1998
1999 /// Data section marker: `__DATA__` or `__END__`
2000 DataSection {
2001 /// Section marker (__DATA__ or __END__)
2002 marker: String,
2003 /// Content following the marker (if any)
2004 body: Option<String>,
2005 },
2006
2007 /// Class declaration (Perl 5.38+ with `use feature 'class'`)
2008 Class {
2009 /// Class name
2010 name: String,
2011 /// Class body containing methods and attributes
2012 body: Box<Node>,
2013 },
2014
2015 /// Format declaration for legacy report generation
2016 Format {
2017 /// Format name (defaults to filehandle name)
2018 name: String,
2019 /// Format specification body
2020 body: String,
2021 },
2022
2023 /// Bare identifier (bareword or package-qualified name)
2024 Identifier {
2025 /// Identifier string
2026 name: String,
2027 },
2028
2029 /// Parse error placeholder with error message and recovery context
2030 Error {
2031 /// Error description
2032 message: String,
2033 /// Expected token types (if any)
2034 expected: Vec<TokenKind>,
2035 /// The token actually found (if any)
2036 found: Option<Token>,
2037 /// Partial AST node parsed before error (if any)
2038 partial: Option<Box<Node>>,
2039 },
2040
2041 /// Missing expression where one was expected
2042 MissingExpression,
2043 /// Missing statement where one was expected
2044 MissingStatement,
2045 /// Missing identifier where one was expected
2046 MissingIdentifier,
2047 /// Missing block where one was expected
2048 MissingBlock,
2049
2050 /// Lexer budget exceeded marker preserving partial parse results
2051 ///
2052 /// Used when recursion or token limits are hit to preserve already-parsed content.
2053 UnknownRest,
2054}
2055
2056impl NodeKind {
2057 /// Get the name of this `NodeKind` as a static string.
2058 ///
2059 /// Useful for diagnostics, logging, and human-readable AST dumps.
2060 ///
2061 /// # Examples
2062 ///
2063 /// ```
2064 /// use perl_ast::NodeKind;
2065 ///
2066 /// let kind = NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() };
2067 /// assert_eq!(kind.kind_name(), "Variable");
2068 ///
2069 /// let kind = NodeKind::Program { statements: vec![] };
2070 /// assert_eq!(kind.kind_name(), "Program");
2071 /// ```
2072 pub fn kind_name(&self) -> &'static str {
2073 match self {
2074 NodeKind::Program { .. } => "Program",
2075 NodeKind::ExpressionStatement { .. } => "ExpressionStatement",
2076 NodeKind::VariableDeclaration { .. } => "VariableDeclaration",
2077 NodeKind::VariableListDeclaration { .. } => "VariableListDeclaration",
2078 NodeKind::Variable { .. } => "Variable",
2079 NodeKind::VariableWithAttributes { .. } => "VariableWithAttributes",
2080 NodeKind::Assignment { .. } => "Assignment",
2081 NodeKind::Binary { .. } => "Binary",
2082 NodeKind::Ternary { .. } => "Ternary",
2083 NodeKind::Unary { .. } => "Unary",
2084 NodeKind::Diamond => "Diamond",
2085 NodeKind::Ellipsis => "Ellipsis",
2086 NodeKind::Undef => "Undef",
2087 NodeKind::Readline { .. } => "Readline",
2088 NodeKind::Glob { .. } => "Glob",
2089 NodeKind::Typeglob { .. } => "Typeglob",
2090 NodeKind::Number { .. } => "Number",
2091 NodeKind::String { .. } => "String",
2092 NodeKind::Heredoc { .. } => "Heredoc",
2093 NodeKind::ArrayLiteral { .. } => "ArrayLiteral",
2094 NodeKind::HashLiteral { .. } => "HashLiteral",
2095 NodeKind::Block { .. } => "Block",
2096 NodeKind::Eval { .. } => "Eval",
2097 NodeKind::Do { .. } => "Do",
2098 NodeKind::Try { .. } => "Try",
2099 NodeKind::If { .. } => "If",
2100 NodeKind::LabeledStatement { .. } => "LabeledStatement",
2101 NodeKind::While { .. } => "While",
2102 NodeKind::Tie { .. } => "Tie",
2103 NodeKind::Untie { .. } => "Untie",
2104 NodeKind::For { .. } => "For",
2105 NodeKind::Foreach { .. } => "Foreach",
2106 NodeKind::Given { .. } => "Given",
2107 NodeKind::When { .. } => "When",
2108 NodeKind::Default { .. } => "Default",
2109 NodeKind::StatementModifier { .. } => "StatementModifier",
2110 NodeKind::Subroutine { .. } => "Subroutine",
2111 NodeKind::Prototype { .. } => "Prototype",
2112 NodeKind::Signature { .. } => "Signature",
2113 NodeKind::MandatoryParameter { .. } => "MandatoryParameter",
2114 NodeKind::OptionalParameter { .. } => "OptionalParameter",
2115 NodeKind::SlurpyParameter { .. } => "SlurpyParameter",
2116 NodeKind::NamedParameter { .. } => "NamedParameter",
2117 NodeKind::Method { .. } => "Method",
2118 NodeKind::Return { .. } => "Return",
2119 NodeKind::LoopControl { .. } => "LoopControl",
2120 NodeKind::Goto { .. } => "Goto",
2121 NodeKind::MethodCall { .. } => "MethodCall",
2122 NodeKind::FunctionCall { .. } => "FunctionCall",
2123 NodeKind::IndirectCall { .. } => "IndirectCall",
2124 NodeKind::Regex { .. } => "Regex",
2125 NodeKind::Match { .. } => "Match",
2126 NodeKind::Substitution { .. } => "Substitution",
2127 NodeKind::Transliteration { .. } => "Transliteration",
2128 NodeKind::Package { .. } => "Package",
2129 NodeKind::Use { .. } => "Use",
2130 NodeKind::No { .. } => "No",
2131 NodeKind::PhaseBlock { .. } => "PhaseBlock",
2132 NodeKind::DataSection { .. } => "DataSection",
2133 NodeKind::Class { .. } => "Class",
2134 NodeKind::Format { .. } => "Format",
2135 NodeKind::Identifier { .. } => "Identifier",
2136 NodeKind::Error { .. } => "Error",
2137 NodeKind::MissingExpression => "MissingExpression",
2138 NodeKind::MissingStatement => "MissingStatement",
2139 NodeKind::MissingIdentifier => "MissingIdentifier",
2140 NodeKind::MissingBlock => "MissingBlock",
2141 NodeKind::UnknownRest => "UnknownRest",
2142 }
2143 }
2144
2145 /// Canonical list of **all** `kind_name()` strings, in alphabetical order.
2146 ///
2147 /// Every consumer that needs the full set of NodeKind names should reference
2148 /// this constant instead of maintaining a hand-written copy.
2149 pub const ALL_KIND_NAMES: &[&'static str] = &[
2150 "ArrayLiteral",
2151 "Assignment",
2152 "Binary",
2153 "Block",
2154 "Class",
2155 "DataSection",
2156 "Default",
2157 "Diamond",
2158 "Do",
2159 "Ellipsis",
2160 "Error",
2161 "Eval",
2162 "ExpressionStatement",
2163 "For",
2164 "Foreach",
2165 "Format",
2166 "FunctionCall",
2167 "Given",
2168 "Glob",
2169 "Goto",
2170 "HashLiteral",
2171 "Heredoc",
2172 "Identifier",
2173 "If",
2174 "IndirectCall",
2175 "LabeledStatement",
2176 "LoopControl",
2177 "MandatoryParameter",
2178 "Match",
2179 "Method",
2180 "MethodCall",
2181 "MissingBlock",
2182 "MissingExpression",
2183 "MissingIdentifier",
2184 "MissingStatement",
2185 "NamedParameter",
2186 "No",
2187 "Number",
2188 "OptionalParameter",
2189 "Package",
2190 "PhaseBlock",
2191 "Program",
2192 "Prototype",
2193 "Readline",
2194 "Regex",
2195 "Return",
2196 "Signature",
2197 "SlurpyParameter",
2198 "StatementModifier",
2199 "String",
2200 "Subroutine",
2201 "Substitution",
2202 "Ternary",
2203 "Tie",
2204 "Transliteration",
2205 "Try",
2206 "Typeglob",
2207 "Unary",
2208 "Undef",
2209 "UnknownRest",
2210 "Untie",
2211 "Use",
2212 "Variable",
2213 "VariableDeclaration",
2214 "VariableListDeclaration",
2215 "VariableWithAttributes",
2216 "When",
2217 "While",
2218 ];
2219
2220 /// Subset of `ALL_KIND_NAMES` that represent synthetic/recovery nodes.
2221 ///
2222 /// These kinds are only produced by `parse_with_recovery()` on malformed
2223 /// input and should not be expected in clean parses.
2224 pub const RECOVERY_KIND_NAMES: &[&'static str] = &[
2225 "Error",
2226 "MissingBlock",
2227 "MissingExpression",
2228 "MissingIdentifier",
2229 "MissingStatement",
2230 "UnknownRest",
2231 ];
2232}
2233
2234/// Format unary operator for S-expression output
2235fn format_unary_operator(op: &str) -> String {
2236 match op {
2237 // Arithmetic unary operators
2238 "+" => "unary_+".to_string(),
2239 "-" => "unary_-".to_string(),
2240
2241 // Logical unary operators
2242 "!" => "unary_not".to_string(),
2243 "not" => "unary_not".to_string(),
2244
2245 // Bitwise complement
2246 "~" => "unary_complement".to_string(),
2247
2248 // Reference operator
2249 "\\" => "unary_ref".to_string(),
2250
2251 // Postfix operators
2252 "++" => "unary_++".to_string(),
2253 "--" => "unary_--".to_string(),
2254
2255 // File test operators
2256 "-f" => "unary_-f".to_string(),
2257 "-d" => "unary_-d".to_string(),
2258 "-e" => "unary_-e".to_string(),
2259 "-r" => "unary_-r".to_string(),
2260 "-w" => "unary_-w".to_string(),
2261 "-x" => "unary_-x".to_string(),
2262 "-o" => "unary_-o".to_string(),
2263 "-R" => "unary_-R".to_string(),
2264 "-W" => "unary_-W".to_string(),
2265 "-X" => "unary_-X".to_string(),
2266 "-O" => "unary_-O".to_string(),
2267 "-s" => "unary_-s".to_string(),
2268 "-p" => "unary_-p".to_string(),
2269 "-S" => "unary_-S".to_string(),
2270 "-b" => "unary_-b".to_string(),
2271 "-c" => "unary_-c".to_string(),
2272 "-t" => "unary_-t".to_string(),
2273 "-u" => "unary_-u".to_string(),
2274 "-g" => "unary_-g".to_string(),
2275 "-k" => "unary_-k".to_string(),
2276 "-T" => "unary_-T".to_string(),
2277 "-B" => "unary_-B".to_string(),
2278 "-M" => "unary_-M".to_string(),
2279 "-A" => "unary_-A".to_string(),
2280 "-C" => "unary_-C".to_string(),
2281 "-l" => "unary_-l".to_string(),
2282 "-z" => "unary_-z".to_string(),
2283
2284 // Postfix dereferencing
2285 "->@*" => "unary_->@*".to_string(),
2286 "->%*" => "unary_->%*".to_string(),
2287 "->$*" => "unary_->$*".to_string(),
2288 "->&*" => "unary_->&*".to_string(),
2289 "->**" => "unary_->**".to_string(),
2290
2291 // Defined operator
2292 "defined" => "unary_defined".to_string(),
2293
2294 // Default case for unknown operators
2295 _ => format!("unary_{}", op.replace(' ', "_")),
2296 }
2297}
2298
2299/// Format binary operator for S-expression output
2300fn format_binary_operator(op: &str) -> String {
2301 match op {
2302 // Arithmetic operators
2303 "+" => "binary_+".to_string(),
2304 "-" => "binary_-".to_string(),
2305 "*" => "binary_*".to_string(),
2306 "/" => "binary_/".to_string(),
2307 "%" => "binary_%".to_string(),
2308 "**" => "binary_**".to_string(),
2309
2310 // Comparison operators
2311 "==" => "binary_==".to_string(),
2312 "!=" => "binary_!=".to_string(),
2313 "<" => "binary_<".to_string(),
2314 ">" => "binary_>".to_string(),
2315 "<=" => "binary_<=".to_string(),
2316 ">=" => "binary_>=".to_string(),
2317 "<=>" => "binary_<=>".to_string(),
2318
2319 // String comparison
2320 "eq" => "binary_eq".to_string(),
2321 "ne" => "binary_ne".to_string(),
2322 "lt" => "binary_lt".to_string(),
2323 "le" => "binary_le".to_string(),
2324 "gt" => "binary_gt".to_string(),
2325 "ge" => "binary_ge".to_string(),
2326 "cmp" => "binary_cmp".to_string(),
2327
2328 // Logical operators
2329 "&&" => "binary_&&".to_string(),
2330 "||" => "binary_||".to_string(),
2331 "and" => "binary_and".to_string(),
2332 "or" => "binary_or".to_string(),
2333 "xor" => "binary_xor".to_string(),
2334
2335 // Bitwise operators
2336 "&" => "binary_&".to_string(),
2337 "|" => "binary_|".to_string(),
2338 "^" => "binary_^".to_string(),
2339 "<<" => "binary_<<".to_string(),
2340 ">>" => "binary_>>".to_string(),
2341
2342 // Pattern matching
2343 "=~" => "binary_=~".to_string(),
2344 "!~" => "binary_!~".to_string(),
2345
2346 // Smart match
2347 "~~" => "binary_~~".to_string(),
2348
2349 // String repetition
2350 "x" => "binary_x".to_string(),
2351
2352 // Concatenation
2353 "." => "binary_.".to_string(),
2354
2355 // Range operators
2356 ".." => "binary_..".to_string(),
2357 "..." => "binary_...".to_string(),
2358
2359 // Type checking
2360 "isa" => "binary_isa".to_string(),
2361
2362 // Assignment operators
2363 "=" => "binary_=".to_string(),
2364 "+=" => "binary_+=".to_string(),
2365 "-=" => "binary_-=".to_string(),
2366 "*=" => "binary_*=".to_string(),
2367 "/=" => "binary_/=".to_string(),
2368 "%=" => "binary_%=".to_string(),
2369 "**=" => "binary_**=".to_string(),
2370 ".=" => "binary_.=".to_string(),
2371 "&=" => "binary_&=".to_string(),
2372 "|=" => "binary_|=".to_string(),
2373 "^=" => "binary_^=".to_string(),
2374 "<<=" => "binary_<<=".to_string(),
2375 ">>=" => "binary_>>=".to_string(),
2376 "&&=" => "binary_&&=".to_string(),
2377 "||=" => "binary_||=".to_string(),
2378 "//=" => "binary_//=".to_string(),
2379
2380 // Defined-or operator
2381 "//" => "binary_//".to_string(),
2382
2383 // Method calls and dereferencing
2384 "->" => "binary_->".to_string(),
2385
2386 // Hash/array access
2387 "{}" => "binary_{}".to_string(),
2388 "[]" => "binary_[]".to_string(),
2389
2390 // Arrow hash/array dereference
2391 "->{}" => "arrow_hash_deref".to_string(),
2392 "->[]" => "arrow_array_deref".to_string(),
2393
2394 // Default case for unknown operators
2395 _ => format!("binary_{}", op.replace(' ', "_")),
2396 }
2397}
2398
2399// SourceLocation is now provided by perl-position-tracking crate
2400// See the re-export at the top of this file
2401
2402#[cfg(test)]
2403mod tests {
2404 use super::*;
2405 use std::collections::BTreeSet;
2406
2407 /// Build a dummy instance for every `NodeKind` variant and return its
2408 /// `kind_name()`. This ensures the compiler forces us to update here
2409 /// whenever a variant is added/removed.
2410 fn all_kind_names_from_variants() -> BTreeSet<&'static str> {
2411 let loc = SourceLocation { start: 0, end: 0 };
2412 let dummy_node = || Node::new(NodeKind::Undef, loc);
2413
2414 let variants: Vec<NodeKind> = vec![
2415 NodeKind::Program { statements: vec![] },
2416 NodeKind::ExpressionStatement { expression: Box::new(dummy_node()) },
2417 NodeKind::VariableDeclaration {
2418 declarator: String::new(),
2419 variable: Box::new(dummy_node()),
2420 attributes: vec![],
2421 initializer: None,
2422 },
2423 NodeKind::VariableListDeclaration {
2424 declarator: String::new(),
2425 variables: vec![],
2426 attributes: vec![],
2427 initializer: None,
2428 },
2429 NodeKind::Variable { sigil: String::new(), name: String::new() },
2430 NodeKind::VariableWithAttributes {
2431 variable: Box::new(dummy_node()),
2432 attributes: vec![],
2433 },
2434 NodeKind::Assignment {
2435 lhs: Box::new(dummy_node()),
2436 rhs: Box::new(dummy_node()),
2437 op: String::new(),
2438 },
2439 NodeKind::Binary {
2440 op: String::new(),
2441 left: Box::new(dummy_node()),
2442 right: Box::new(dummy_node()),
2443 },
2444 NodeKind::Ternary {
2445 condition: Box::new(dummy_node()),
2446 then_expr: Box::new(dummy_node()),
2447 else_expr: Box::new(dummy_node()),
2448 },
2449 NodeKind::Unary { op: String::new(), operand: Box::new(dummy_node()) },
2450 NodeKind::Diamond,
2451 NodeKind::Ellipsis,
2452 NodeKind::Undef,
2453 NodeKind::Readline { filehandle: None },
2454 NodeKind::Glob { pattern: String::new() },
2455 NodeKind::Typeglob { name: String::new() },
2456 NodeKind::Number { value: String::new() },
2457 NodeKind::String { value: String::new(), interpolated: false },
2458 NodeKind::Heredoc {
2459 delimiter: String::new(),
2460 content: String::new(),
2461 interpolated: false,
2462 indented: false,
2463 command: false,
2464 body_span: None,
2465 },
2466 NodeKind::ArrayLiteral { elements: vec![] },
2467 NodeKind::HashLiteral { pairs: vec![] },
2468 NodeKind::Block { statements: vec![] },
2469 NodeKind::Eval { block: Box::new(dummy_node()) },
2470 NodeKind::Do { block: Box::new(dummy_node()) },
2471 NodeKind::Try {
2472 body: Box::new(dummy_node()),
2473 catch_blocks: vec![],
2474 finally_block: None,
2475 },
2476 NodeKind::If {
2477 condition: Box::new(dummy_node()),
2478 then_branch: Box::new(dummy_node()),
2479 elsif_branches: vec![],
2480 else_branch: None,
2481 },
2482 NodeKind::LabeledStatement { label: String::new(), statement: Box::new(dummy_node()) },
2483 NodeKind::While {
2484 condition: Box::new(dummy_node()),
2485 body: Box::new(dummy_node()),
2486 continue_block: None,
2487 },
2488 NodeKind::Tie {
2489 variable: Box::new(dummy_node()),
2490 package: Box::new(dummy_node()),
2491 args: vec![],
2492 },
2493 NodeKind::Untie { variable: Box::new(dummy_node()) },
2494 NodeKind::For {
2495 init: None,
2496 condition: None,
2497 update: None,
2498 body: Box::new(dummy_node()),
2499 continue_block: None,
2500 },
2501 NodeKind::Foreach {
2502 variable: Box::new(dummy_node()),
2503 list: Box::new(dummy_node()),
2504 body: Box::new(dummy_node()),
2505 continue_block: None,
2506 },
2507 NodeKind::Given { expr: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2508 NodeKind::When { condition: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2509 NodeKind::Default { body: Box::new(dummy_node()) },
2510 NodeKind::StatementModifier {
2511 statement: Box::new(dummy_node()),
2512 modifier: String::new(),
2513 condition: Box::new(dummy_node()),
2514 },
2515 NodeKind::Subroutine {
2516 name: None,
2517 name_span: None,
2518 prototype: None,
2519 signature: None,
2520 attributes: vec![],
2521 body: Box::new(dummy_node()),
2522 },
2523 NodeKind::Prototype { content: String::new() },
2524 NodeKind::Signature { parameters: vec![] },
2525 NodeKind::MandatoryParameter { variable: Box::new(dummy_node()) },
2526 NodeKind::OptionalParameter {
2527 variable: Box::new(dummy_node()),
2528 default_value: Box::new(dummy_node()),
2529 },
2530 NodeKind::SlurpyParameter { variable: Box::new(dummy_node()) },
2531 NodeKind::NamedParameter { variable: Box::new(dummy_node()) },
2532 NodeKind::Method {
2533 name: String::new(),
2534 signature: None,
2535 attributes: vec![],
2536 body: Box::new(dummy_node()),
2537 },
2538 NodeKind::Return { value: None },
2539 NodeKind::LoopControl { op: String::new(), label: None },
2540 NodeKind::Goto { target: Box::new(dummy_node()) },
2541 NodeKind::MethodCall {
2542 object: Box::new(dummy_node()),
2543 method: String::new(),
2544 args: vec![],
2545 },
2546 NodeKind::FunctionCall { name: String::new(), args: vec![] },
2547 NodeKind::IndirectCall {
2548 method: String::new(),
2549 object: Box::new(dummy_node()),
2550 args: vec![],
2551 },
2552 NodeKind::Regex {
2553 pattern: String::new(),
2554 replacement: None,
2555 modifiers: String::new(),
2556 has_embedded_code: false,
2557 },
2558 NodeKind::Match {
2559 expr: Box::new(dummy_node()),
2560 pattern: String::new(),
2561 modifiers: String::new(),
2562 has_embedded_code: false,
2563 negated: false,
2564 },
2565 NodeKind::Substitution {
2566 expr: Box::new(dummy_node()),
2567 pattern: String::new(),
2568 replacement: String::new(),
2569 modifiers: String::new(),
2570 has_embedded_code: false,
2571 negated: false,
2572 },
2573 NodeKind::Transliteration {
2574 expr: Box::new(dummy_node()),
2575 search: String::new(),
2576 replace: String::new(),
2577 modifiers: String::new(),
2578 negated: false,
2579 },
2580 NodeKind::Package { name: String::new(), name_span: loc, block: None },
2581 NodeKind::Use { module: String::new(), args: vec![], has_filter_risk: false },
2582 NodeKind::No { module: String::new(), args: vec![], has_filter_risk: false },
2583 NodeKind::PhaseBlock {
2584 phase: String::new(),
2585 phase_span: None,
2586 block: Box::new(dummy_node()),
2587 },
2588 NodeKind::DataSection { marker: String::new(), body: None },
2589 NodeKind::Class { name: String::new(), body: Box::new(dummy_node()) },
2590 NodeKind::Format { name: String::new(), body: String::new() },
2591 NodeKind::Identifier { name: String::new() },
2592 NodeKind::Error {
2593 message: String::new(),
2594 expected: vec![],
2595 found: None,
2596 partial: None,
2597 },
2598 NodeKind::MissingExpression,
2599 NodeKind::MissingStatement,
2600 NodeKind::MissingIdentifier,
2601 NodeKind::MissingBlock,
2602 NodeKind::UnknownRest,
2603 ];
2604
2605 variants.iter().map(|v| v.kind_name()).collect()
2606 }
2607
2608 #[test]
2609 fn all_kind_names_is_consistent_with_kind_name() {
2610 let from_enum = all_kind_names_from_variants();
2611 let from_const: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2612
2613 // Check for duplicates in the const array
2614 assert_eq!(
2615 NodeKind::ALL_KIND_NAMES.len(),
2616 from_const.len(),
2617 "ALL_KIND_NAMES contains duplicates"
2618 );
2619
2620 let only_in_enum: Vec<_> = from_enum.difference(&from_const).collect();
2621 let only_in_const: Vec<_> = from_const.difference(&from_enum).collect();
2622
2623 assert!(
2624 only_in_enum.is_empty() && only_in_const.is_empty(),
2625 "ALL_KIND_NAMES is out of sync with NodeKind variants:\n \
2626 in enum but not in ALL_KIND_NAMES: {only_in_enum:?}\n \
2627 in ALL_KIND_NAMES but not in enum: {only_in_const:?}"
2628 );
2629 }
2630
2631 #[test]
2632 fn recovery_kind_names_is_subset_of_all() {
2633 let all: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2634 let recovery: BTreeSet<&str> = NodeKind::RECOVERY_KIND_NAMES.iter().copied().collect();
2635
2636 // No duplicates
2637 assert_eq!(
2638 NodeKind::RECOVERY_KIND_NAMES.len(),
2639 recovery.len(),
2640 "RECOVERY_KIND_NAMES contains duplicates"
2641 );
2642
2643 let not_in_all: Vec<_> = recovery.difference(&all).collect();
2644 assert!(
2645 not_in_all.is_empty(),
2646 "RECOVERY_KIND_NAMES contains entries not in ALL_KIND_NAMES: {not_in_all:?}"
2647 );
2648 }
2649}