perl_ast/ast.rs
1//! Abstract Syntax Tree definitions for Perl within the parsing and LSP workflow.
2//!
3//! This module defines the comprehensive AST node types that represent parsed Perl code
4//! during the Parse → Index → Navigate → Complete → Analyze stages. The design is optimized
5//! for both direct use in Rust analysis and for generating tree-sitter compatible
6//! S-expressions during large workspace processing operations.
7//!
8//! # LSP Workflow Integration
9//!
10//! The AST structures support Perl tooling workflows by:
11//! - **Parse**: Produced by the parser as the canonical syntax tree
12//! - **Index**: Traversed to build symbol and reference tables
13//! - **Navigate**: Provides locations for definition and reference lookups
14//! - **Complete**: Supplies context for completion, hover, and signature help
15//! - **Analyze**: Feeds semantic analysis, diagnostics, and refactoring
16//!
17//! # Performance Characteristics
18//!
19//! AST structures are optimized for large codebases with:
20//! - Memory-efficient node representation using `Box<Node>` for recursive structures
21//! - Fast pattern matching via enum variants for common Perl constructs
22//! - Location tracking for precise error reporting in large files
23//! - Cheap cloning for parallel analysis tasks
24//!
25//! # Usage Examples
26//!
27//! ## Basic AST Construction
28//!
29//! ```rust
30//! use perl_ast::{Node, NodeKind, SourceLocation};
31//!
32//! // Create a simple variable declaration node
33//! let location = SourceLocation { start: 0, end: 10 };
34//! let node = Node::new(
35//! NodeKind::VariableDeclaration {
36//! declarator: "my".to_string(),
37//! variable: Box::new(Node::new(
38//! NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
39//! location,
40//! )),
41//! attributes: vec![],
42//! initializer: None,
43//! },
44//! location,
45//! );
46//! assert_eq!(node.kind.kind_name(), "VariableDeclaration");
47//! ```
48//!
49//! ## Tree-sitter S-expression Generation
50//!
51//! ```rust
52//! use perl_ast::{Node, NodeKind, SourceLocation};
53//!
54//! let loc = SourceLocation { start: 0, end: 2 };
55//! let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
56//! let program = Node::new(NodeKind::Program { statements: vec![num] }, loc);
57//!
58//! let sexp = program.to_sexp();
59//! assert!(sexp.starts_with("(source_file"));
60//! ```
61//!
62//! ## AST Traversal and Analysis
63//!
64//! ```rust
65//! use perl_ast::{Node, NodeKind, SourceLocation};
66//!
67//! fn count_variables(node: &Node) -> usize {
68//! let mut count = 0;
69//! match &node.kind {
70//! NodeKind::Variable { .. } => count += 1,
71//! NodeKind::Program { statements } => {
72//! for stmt in statements {
73//! count += count_variables(stmt);
74//! }
75//! }
76//! _ => {} // Handle other node types as needed
77//! }
78//! count
79//! }
80//!
81//! let loc = SourceLocation { start: 0, end: 5 };
82//! let var = Node::new(
83//! NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
84//! loc,
85//! );
86//! let program = Node::new(NodeKind::Program { statements: vec![var] }, loc);
87//! assert_eq!(count_variables(&program), 1);
88//! ```
89//!
90//! ## Parsing Integration
91//!
92//! In practice the AST is produced by the parser rather than built by hand
93//! (requires `perl-parser-core`):
94//!
95//! ```rust,ignore
96//! use perl_parser_core::Parser;
97//! use perl_ast::NodeKind;
98//!
99//! let mut parser = Parser::new("my $x = 42;");
100//! let ast = parser.parse().expect("should parse");
101//! assert!(matches!(ast.kind, NodeKind::Program { .. }));
102//! ```
103
104// Re-export SourceLocation from perl-position-tracking for unified span handling
105pub use perl_position_tracking::SourceLocation;
106// Re-export Token and TokenKind from perl-token for AST error nodes
107pub use perl_token::{Token, TokenKind};
108use std::fmt;
109
110/// Core AST node representing any Perl language construct within parsing workflows.
111///
112/// This is the fundamental building block for representing parsed Perl code. Each node
113/// contains both the semantic information (kind) and positional information (location)
114/// necessary for comprehensive script analysis.
115///
116/// # LSP Workflow Role
117///
118/// Nodes flow through tooling stages:
119/// - **Parse**: Created by the parser as it builds the syntax tree
120/// - **Index**: Visited to build symbol and reference tables
121/// - **Navigate**: Used to resolve definitions, references, and call hierarchy
122/// - **Complete**: Provides contextual information for completion and hover
123/// - **Analyze**: Drives semantic analysis and diagnostics
124///
125/// # Memory Optimization
126///
127/// The structure is designed for efficient memory usage during large-scale parsing:
128/// - `SourceLocation` uses compact position encoding for large files
129/// - `NodeKind` enum variants minimize memory overhead for common constructs
130/// - Clone operations are optimized for shared analysis workflows
131///
132/// # Examples
133///
134/// Construct a variable declaration node manually:
135///
136/// ```
137/// use perl_ast::{Node, NodeKind, SourceLocation};
138///
139/// let loc = SourceLocation { start: 0, end: 11 };
140/// let var = Node::new(
141/// NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
142/// loc,
143/// );
144/// let decl = Node::new(
145/// NodeKind::VariableDeclaration {
146/// declarator: "my".to_string(),
147/// variable: Box::new(var),
148/// attributes: vec![],
149/// initializer: None,
150/// },
151/// loc,
152/// );
153/// assert_eq!(decl.kind.kind_name(), "VariableDeclaration");
154/// ```
155///
156/// Typically you obtain nodes from the parser rather than constructing them by hand:
157///
158/// ```ignore
159/// use perl_parser::Parser;
160///
161/// let mut parser = Parser::new("my $x = 42;");
162/// let ast = parser.parse()?;
163/// println!("AST: {}", ast.to_sexp());
164/// ```
165#[derive(Debug, Clone, PartialEq)]
166pub struct Node {
167 /// The specific type and semantic content of this AST node
168 pub kind: NodeKind,
169 /// Source position information for error reporting and code navigation
170 pub location: SourceLocation,
171}
172
173impl Node {
174 /// Create a new AST node with the given kind and source location.
175 ///
176 /// # Examples
177 ///
178 /// ```
179 /// use perl_ast::{Node, NodeKind, SourceLocation};
180 ///
181 /// let node = Node::new(
182 /// NodeKind::Number { value: "42".to_string() },
183 /// SourceLocation { start: 0, end: 2 },
184 /// );
185 /// assert_eq!(node.kind.kind_name(), "Number");
186 /// assert_eq!(node.location.start, 0);
187 /// ```
188 pub fn new(kind: NodeKind, location: SourceLocation) -> Self {
189 Node { kind, location }
190 }
191
192 /// Convert the AST to a tree-sitter compatible S-expression.
193 ///
194 /// Produces a parenthesized representation compatible with tree-sitter's
195 /// S-expression format, useful for debugging and snapshot testing.
196 ///
197 /// # Examples
198 ///
199 /// ```
200 /// use perl_ast::{Node, NodeKind, SourceLocation};
201 ///
202 /// let loc = SourceLocation { start: 0, end: 2 };
203 /// let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
204 /// let program = Node::new(
205 /// NodeKind::Program { statements: vec![num] },
206 /// loc,
207 /// );
208 /// let sexp = program.to_sexp();
209 /// assert!(sexp.starts_with("(source_file"));
210 /// ```
211 pub fn to_sexp(&self) -> String {
212 match &self.kind {
213 NodeKind::Program { statements } => {
214 let stmts =
215 statements.iter().map(|s| s.to_sexp_inner()).collect::<Vec<_>>().join(" ");
216 format!("(source_file {})", stmts)
217 }
218
219 NodeKind::ExpressionStatement { expression } => {
220 format!("(expression_statement {})", expression.to_sexp())
221 }
222
223 NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
224 let attrs_str = if attributes.is_empty() {
225 String::new()
226 } else {
227 format!(" (attributes {})", attributes.join(" "))
228 };
229 if let Some(init) = initializer {
230 format!(
231 "({}_declaration {}{}{})",
232 declarator,
233 variable.to_sexp(),
234 attrs_str,
235 init.to_sexp()
236 )
237 } else {
238 format!("({}_declaration {}{})", declarator, variable.to_sexp(), attrs_str)
239 }
240 }
241
242 NodeKind::VariableListDeclaration {
243 declarator,
244 variables,
245 attributes,
246 initializer,
247 } => {
248 let vars = variables.iter().map(|v| v.to_sexp()).collect::<Vec<_>>().join(" ");
249 let attrs_str = if attributes.is_empty() {
250 String::new()
251 } else {
252 format!(" (attributes {})", attributes.join(" "))
253 };
254 if let Some(init) = initializer {
255 format!(
256 "({}_declaration ({}){}{})",
257 declarator,
258 vars,
259 attrs_str,
260 init.to_sexp()
261 )
262 } else {
263 format!("({}_declaration ({}){})", declarator, vars, attrs_str)
264 }
265 }
266
267 NodeKind::Variable { sigil, name } => {
268 // Format expected by bless parsing tests: (variable $ name)
269 format!("(variable {} {})", sigil, name)
270 }
271
272 NodeKind::VariableWithAttributes { variable, attributes } => {
273 let attrs = attributes.join(" ");
274 format!("({} (attributes {}))", variable.to_sexp(), attrs)
275 }
276
277 NodeKind::Assignment { lhs, rhs, op } => {
278 format!(
279 "(assignment_{} {} {})",
280 op.replace("=", "assign"),
281 lhs.to_sexp(),
282 rhs.to_sexp()
283 )
284 }
285
286 NodeKind::Binary { op, left, right } => {
287 // Tree-sitter format: (binary_op left right)
288 let op_name = format_binary_operator(op);
289 format!("({} {} {})", op_name, left.to_sexp(), right.to_sexp())
290 }
291
292 NodeKind::Ternary { condition, then_expr, else_expr } => {
293 format!(
294 "(ternary {} {} {})",
295 condition.to_sexp(),
296 then_expr.to_sexp(),
297 else_expr.to_sexp()
298 )
299 }
300
301 NodeKind::Unary { op, operand } => {
302 // Tree-sitter format: (unary_op operand)
303 let op_name = format_unary_operator(op);
304 format!("({} {})", op_name, operand.to_sexp())
305 }
306
307 NodeKind::Diamond => "(diamond)".to_string(),
308
309 NodeKind::Ellipsis => "(ellipsis)".to_string(),
310
311 NodeKind::Undef => "(undef)".to_string(),
312
313 NodeKind::Readline { filehandle } => {
314 if let Some(fh) = filehandle {
315 format!("(readline {})", fh)
316 } else {
317 "(readline)".to_string()
318 }
319 }
320
321 NodeKind::Glob { pattern } => {
322 format!("(glob {})", pattern)
323 }
324 NodeKind::Typeglob { name } => {
325 format!("(typeglob {})", name)
326 }
327
328 NodeKind::Number { value } => {
329 // Format expected by bless parsing tests: (number value)
330 format!("(number {})", value)
331 }
332
333 NodeKind::String { value, interpolated } => {
334 // Escape quotes in string value to prevent S-expression parsing issues
335 let escaped_value = value.replace('\\', "\\\\").replace('"', "\\\"");
336
337 // Format based on interpolation status
338 if *interpolated {
339 format!("(string_interpolated \"{}\")", escaped_value)
340 } else {
341 format!("(string \"{}\")", escaped_value)
342 }
343 }
344
345 NodeKind::Heredoc { delimiter, content, interpolated, indented, command, .. } => {
346 let type_str = if *command {
347 "heredoc_command"
348 } else if *indented {
349 if *interpolated { "heredoc_indented_interpolated" } else { "heredoc_indented" }
350 } else if *interpolated {
351 "heredoc_interpolated"
352 } else {
353 "heredoc"
354 };
355 format!("({} {:?} {:?})", type_str, delimiter, content)
356 }
357
358 NodeKind::ArrayLiteral { elements } => {
359 let elems = elements.iter().map(|e| e.to_sexp()).collect::<Vec<_>>().join(" ");
360 format!("(array {})", elems)
361 }
362
363 NodeKind::HashLiteral { pairs } => {
364 let kvs = pairs
365 .iter()
366 .map(|(k, v)| format!("({} {})", k.to_sexp(), v.to_sexp()))
367 .collect::<Vec<_>>()
368 .join(" ");
369 format!("(hash {})", kvs)
370 }
371
372 NodeKind::Block { statements } => {
373 let stmts = statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ");
374 format!("(block {})", stmts)
375 }
376
377 NodeKind::Eval { block } => {
378 format!("(eval {})", block.to_sexp())
379 }
380
381 NodeKind::Do { block } => {
382 format!("(do {})", block.to_sexp())
383 }
384
385 NodeKind::Defer { block } => {
386 format!("(defer {})", block.to_sexp())
387 }
388
389 NodeKind::Try { body, catch_blocks, finally_block } => {
390 let mut parts = vec![format!("(try {})", body.to_sexp())];
391
392 for (var, block) in catch_blocks {
393 if let Some(v) = var {
394 parts.push(format!("(catch {} {})", v, block.to_sexp()));
395 } else {
396 parts.push(format!("(catch {})", block.to_sexp()));
397 }
398 }
399
400 if let Some(finally) = finally_block {
401 parts.push(format!("(finally {})", finally.to_sexp()));
402 }
403
404 parts.join(" ")
405 }
406
407 NodeKind::If { condition, then_branch, elsif_branches, else_branch, keyword } => {
408 let kw = keyword.as_deref().unwrap_or("if");
409 let mut parts =
410 vec![format!("({} {} {})", kw, condition.to_sexp(), then_branch.to_sexp())];
411
412 for (cond, block) in elsif_branches {
413 parts.push(format!("(elsif {} {})", cond.to_sexp(), block.to_sexp()));
414 }
415
416 if let Some(else_block) = else_branch {
417 parts.push(format!("(else {})", else_block.to_sexp()));
418 }
419
420 parts.join(" ")
421 }
422
423 NodeKind::LabeledStatement { label, statement } => {
424 format!("(labeled_statement {} {})", label, statement.to_sexp())
425 }
426
427 NodeKind::While { condition, body, continue_block, keyword } => {
428 let kw = keyword.as_deref().unwrap_or("while");
429 let mut s = format!("({} {} {})", kw, condition.to_sexp(), body.to_sexp());
430 if let Some(cont) = continue_block {
431 s.push_str(&format!(" (continue {})", cont.to_sexp()));
432 }
433 s
434 }
435 NodeKind::Tie { variable, package, args } => {
436 let mut s = format!("(tie {} {}", variable.to_sexp(), package.to_sexp());
437 for arg in args {
438 s.push_str(&format!(" {}", arg.to_sexp()));
439 }
440 s.push(')');
441 s
442 }
443 NodeKind::Untie { variable } => {
444 format!("(untie {})", variable.to_sexp())
445 }
446 NodeKind::For { init, condition, update, body, continue_block } => {
447 let init_str =
448 init.as_ref().map(|i| i.to_sexp()).unwrap_or_else(|| "()".to_string());
449 let cond_str =
450 condition.as_ref().map(|c| c.to_sexp()).unwrap_or_else(|| "()".to_string());
451 let update_str =
452 update.as_ref().map(|u| u.to_sexp()).unwrap_or_else(|| "()".to_string());
453 let mut result =
454 format!("(for {} {} {} {})", init_str, cond_str, update_str, body.to_sexp());
455 if let Some(cont) = continue_block {
456 result.push_str(&format!(" (continue {})", cont.to_sexp()));
457 }
458 result
459 }
460
461 NodeKind::Foreach { variable, list, body, continue_block } => {
462 let cont = if let Some(cb) = continue_block {
463 format!(" {}", cb.to_sexp())
464 } else {
465 String::new()
466 };
467 format!(
468 "(foreach {} {} {}{})",
469 variable.to_sexp(),
470 list.to_sexp(),
471 body.to_sexp(),
472 cont
473 )
474 }
475
476 NodeKind::Given { expr, body } => {
477 format!("(given {} {})", expr.to_sexp(), body.to_sexp())
478 }
479
480 NodeKind::When { condition, body } => {
481 format!("(when {} {})", condition.to_sexp(), body.to_sexp())
482 }
483
484 NodeKind::Default { body } => {
485 format!("(default {})", body.to_sexp())
486 }
487
488 NodeKind::StatementModifier { statement, modifier, condition } => {
489 format!(
490 "(statement_modifier_{} {} {})",
491 modifier,
492 statement.to_sexp(),
493 condition.to_sexp()
494 )
495 }
496
497 NodeKind::Subroutine { name, prototype, signature, attributes, body, name_span: _ } => {
498 if let Some(sub_name) = name {
499 // Named subroutine - bless test expected format: (sub name () block)
500 let mut parts = vec![sub_name.clone()];
501
502 // Add attributes if present (before prototype/signature)
503 if !attributes.is_empty() {
504 for attr in attributes {
505 parts.push(format!(":{}", attr));
506 }
507 }
508
509 // Add prototype/signature - use () for empty prototype
510 if let Some(proto) = prototype {
511 parts.push(format!("({})", proto.to_sexp()));
512 } else if signature.is_some() {
513 // If there's a signature but no prototype, still show ()
514 parts.push("()".to_string());
515 } else {
516 parts.push("()".to_string());
517 }
518
519 // Add body
520 parts.push(body.to_sexp());
521
522 // Format: (sub name [attrs...] ()(block ...)) - space between name and (), no space between () and block
523 if parts.len() >= 3 && parts[parts.len() - 2] == "()" {
524 let name_and_attrs = parts[0..parts.len() - 2].join(" ");
525 let proto = &parts[parts.len() - 2];
526 let body = &parts[parts.len() - 1];
527 format!("(sub {} {}{})", name_and_attrs, proto, body)
528 } else {
529 format!("(sub {})", parts.join(" "))
530 }
531 } else {
532 // Anonymous subroutine - tree-sitter format
533 let mut parts = Vec::new();
534
535 // Add attributes if present
536 if !attributes.is_empty() {
537 let attrs: Vec<String> = attributes
538 .iter()
539 .map(|_attr| "(attribute (attribute_name))".to_string())
540 .collect();
541 parts.push(format!("(attrlist {})", attrs.join("")));
542 }
543
544 // Add prototype if present
545 if let Some(proto) = prototype {
546 parts.push(proto.to_sexp());
547 }
548
549 // Add signature if present
550 if let Some(sig) = signature {
551 parts.push(sig.to_sexp());
552 }
553
554 // Add body
555 parts.push(body.to_sexp());
556
557 format!("(anonymous_subroutine_expression {})", parts.join(""))
558 }
559 }
560
561 NodeKind::Prototype { content: _ } => "(prototype)".to_string(),
562
563 NodeKind::Signature { parameters } => {
564 let params = parameters.iter().map(|p| p.to_sexp()).collect::<Vec<_>>().join(" ");
565 format!("(signature {})", params)
566 }
567
568 NodeKind::MandatoryParameter { variable } => {
569 format!("(mandatory_parameter {})", variable.to_sexp())
570 }
571
572 NodeKind::OptionalParameter { variable, default_value } => {
573 format!("(optional_parameter {} {})", variable.to_sexp(), default_value.to_sexp())
574 }
575
576 NodeKind::SlurpyParameter { variable } => {
577 format!("(slurpy_parameter {})", variable.to_sexp())
578 }
579
580 NodeKind::NamedParameter { variable } => {
581 format!("(named_parameter {})", variable.to_sexp())
582 }
583
584 NodeKind::Method { name: _, signature, attributes, body } => {
585 let block_contents = match &body.kind {
586 NodeKind::Block { statements } => {
587 statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ")
588 }
589 _ => body.to_sexp(),
590 };
591
592 let mut parts = vec!["(bareword)".to_string()];
593
594 // Add signature if present
595 if let Some(sig) = signature {
596 parts.push(sig.to_sexp());
597 }
598
599 // Add attributes if present
600 if !attributes.is_empty() {
601 let attrs: Vec<String> = attributes
602 .iter()
603 .map(|_attr| "(attribute (attribute_name))".to_string())
604 .collect();
605 parts.push(format!("(attrlist {})", attrs.join("")));
606 }
607
608 parts.push(format!("(block {})", block_contents));
609 format!("(method_declaration_statement {})", parts.join(" "))
610 }
611
612 NodeKind::Return { value } => {
613 if let Some(val) = value {
614 format!("(return {})", val.to_sexp())
615 } else {
616 "(return)".to_string()
617 }
618 }
619
620 NodeKind::LoopControl { op, label } => {
621 if let Some(l) = label {
622 format!("({} {})", op, l)
623 } else {
624 format!("({})", op)
625 }
626 }
627
628 NodeKind::Goto { target } => {
629 format!("(goto {})", target.to_sexp())
630 }
631
632 NodeKind::MethodCall { object, method, args } => {
633 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
634 format!("(method_call {} {} ({}))", object.to_sexp(), method, args_str)
635 }
636
637 NodeKind::FunctionCall { name, args } => {
638 // Special handling for functions that should use call format in tree-sitter tests
639 if matches!(
640 name.as_str(),
641 "bless"
642 | "shift"
643 | "unshift"
644 | "open"
645 | "die"
646 | "warn"
647 | "print"
648 | "printf"
649 | "say"
650 | "push"
651 | "pop"
652 | "map"
653 | "sort"
654 | "grep"
655 | "keys"
656 | "values"
657 | "each"
658 | "defined"
659 | "scalar"
660 | "ref"
661 ) {
662 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
663 if args.is_empty() {
664 format!("(call {} ())", name)
665 } else {
666 format!("(call {} ({}))", name, args_str)
667 }
668 } else {
669 // Tree-sitter format varies by context
670 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
671 if args.is_empty() {
672 "(function_call_expression (function))".to_string()
673 } else {
674 format!("(ambiguous_function_call_expression (function) {})", args_str)
675 }
676 }
677 }
678
679 NodeKind::IndirectCall { method, object, args } => {
680 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
681 format!("(indirect_call {} {} ({}))", method, object.to_sexp(), args_str)
682 }
683
684 NodeKind::Regex { pattern, replacement, modifiers, has_embedded_code } => {
685 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
686 format!("(regex {:?} {:?} {:?}{})", pattern, replacement, modifiers, risk_marker)
687 }
688
689 NodeKind::Match { expr, pattern, modifiers, has_embedded_code, negated } => {
690 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
691 let op = if *negated { "not_match" } else { "match" };
692 format!(
693 "({} {} (regex {:?} {:?}{}))",
694 op,
695 expr.to_sexp(),
696 pattern,
697 modifiers,
698 risk_marker
699 )
700 }
701
702 NodeKind::Substitution {
703 expr,
704 pattern,
705 replacement,
706 modifiers,
707 has_embedded_code,
708 negated,
709 } => {
710 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
711 let neg_marker = if *negated { " (negated)" } else { "" };
712 format!(
713 "(substitution {} {:?} {:?} {:?}{}{})",
714 expr.to_sexp(),
715 pattern,
716 replacement,
717 modifiers,
718 risk_marker,
719 neg_marker
720 )
721 }
722
723 NodeKind::Transliteration { expr, search, replace, modifiers, negated } => {
724 let neg_marker = if *negated { " (negated)" } else { "" };
725 format!(
726 "(transliteration {} {:?} {:?} {:?}{})",
727 expr.to_sexp(),
728 search,
729 replace,
730 modifiers,
731 neg_marker
732 )
733 }
734
735 NodeKind::Package { name, block, name_span: _ } => {
736 if let Some(blk) = block {
737 format!("(package {} {})", name, blk.to_sexp())
738 } else {
739 format!("(package {})", name)
740 }
741 }
742
743 NodeKind::Use { module, args, has_filter_risk } => {
744 let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
745 if args.is_empty() {
746 format!("(use {}{})", module, risk_marker)
747 } else {
748 let args_str = args.join(" ");
749 format!("(use {} ({}){})", module, args_str, risk_marker)
750 }
751 }
752
753 NodeKind::No { module, args, has_filter_risk } => {
754 let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
755 if args.is_empty() {
756 format!("(no {}{})", module, risk_marker)
757 } else {
758 let args_str = args.join(" ");
759 format!("(no {} ({}){})", module, args_str, risk_marker)
760 }
761 }
762
763 NodeKind::PhaseBlock { phase, phase_span: _, block } => {
764 format!("({} {})", phase, block.to_sexp())
765 }
766
767 NodeKind::DataSection { marker, body } => {
768 if let Some(body_text) = body {
769 format!("(data_section {} \"{}\")", marker, body_text.escape_default())
770 } else {
771 format!("(data_section {})", marker)
772 }
773 }
774
775 NodeKind::Class { name, parents, body } => {
776 if parents.is_empty() {
777 format!("(class {} {})", name, body.to_sexp())
778 } else {
779 format!("(class {} :isa({}) {})", name, parents.join(","), body.to_sexp())
780 }
781 }
782
783 NodeKind::Format { name, body } => {
784 format!("(format {} {:?})", name, body)
785 }
786
787 NodeKind::Identifier { name } => {
788 // Format expected by tests: (identifier name)
789 format!("(identifier {})", name)
790 }
791
792 NodeKind::Error { message, partial, .. } => {
793 if let Some(node) = partial {
794 format!("(ERROR \"{}\" {})", message.escape_default(), node.to_sexp())
795 } else {
796 format!("(ERROR \"{}\")", message.escape_default())
797 }
798 }
799 NodeKind::MissingExpression => "(missing_expression)".to_string(),
800 NodeKind::MissingStatement => "(missing_statement)".to_string(),
801 NodeKind::MissingIdentifier => "(missing_identifier)".to_string(),
802 NodeKind::MissingBlock => "(missing_block)".to_string(),
803 NodeKind::UnknownRest => "(UNKNOWN_REST)".to_string(),
804 }
805 }
806
807 /// Convert the AST to S-expression format that unwraps expression statements in programs
808 pub fn to_sexp_inner(&self) -> String {
809 match &self.kind {
810 NodeKind::ExpressionStatement { expression } => {
811 // Check if this is an anonymous subroutine - if so, keep it wrapped
812 match &expression.kind {
813 NodeKind::Subroutine { name, .. } if name.is_none() => {
814 // Anonymous subroutine should remain wrapped in expression statement
815 self.to_sexp()
816 }
817 _ => {
818 // In the inner format, other expression statements are unwrapped
819 expression.to_sexp()
820 }
821 }
822 }
823 _ => {
824 // For all other node types, use regular to_sexp
825 self.to_sexp()
826 }
827 }
828 }
829
830 /// Call a function on every direct child node of this node.
831 ///
832 /// This enables depth-first traversal for operations like heredoc content attachment.
833 /// The closure receives a mutable reference to each child node.
834 #[inline]
835 pub fn for_each_child_mut<F: FnMut(&mut Node)>(&mut self, mut f: F) {
836 match &mut self.kind {
837 NodeKind::Tie { variable, package, args } => {
838 f(variable);
839 f(package);
840 for arg in args {
841 f(arg);
842 }
843 }
844 NodeKind::Untie { variable } => f(variable),
845
846 // Root program node
847 NodeKind::Program { statements } => {
848 for stmt in statements {
849 f(stmt);
850 }
851 }
852
853 // Statement wrappers
854 NodeKind::ExpressionStatement { expression } => f(expression),
855
856 // Variable declarations
857 NodeKind::VariableDeclaration { variable, initializer, .. } => {
858 f(variable);
859 if let Some(init) = initializer {
860 f(init);
861 }
862 }
863 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
864 for var in variables {
865 f(var);
866 }
867 if let Some(init) = initializer {
868 f(init);
869 }
870 }
871 NodeKind::VariableWithAttributes { variable, .. } => f(variable),
872
873 // Binary operations
874 NodeKind::Binary { left, right, .. } => {
875 f(left);
876 f(right);
877 }
878 NodeKind::Ternary { condition, then_expr, else_expr } => {
879 f(condition);
880 f(then_expr);
881 f(else_expr);
882 }
883 NodeKind::Unary { operand, .. } => f(operand),
884 NodeKind::Assignment { lhs, rhs, .. } => {
885 f(lhs);
886 f(rhs);
887 }
888
889 // Control flow
890 NodeKind::Block { statements } => {
891 for stmt in statements {
892 f(stmt);
893 }
894 }
895 NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
896 f(condition);
897 f(then_branch);
898 for (elsif_cond, elsif_body) in elsif_branches {
899 f(elsif_cond);
900 f(elsif_body);
901 }
902 if let Some(else_body) = else_branch {
903 f(else_body);
904 }
905 }
906 NodeKind::While { condition, body, continue_block, .. } => {
907 f(condition);
908 f(body);
909 if let Some(cont) = continue_block {
910 f(cont);
911 }
912 }
913 NodeKind::For { init, condition, update, body, continue_block, .. } => {
914 if let Some(i) = init {
915 f(i);
916 }
917 if let Some(c) = condition {
918 f(c);
919 }
920 if let Some(u) = update {
921 f(u);
922 }
923 f(body);
924 if let Some(cont) = continue_block {
925 f(cont);
926 }
927 }
928 NodeKind::Foreach { variable, list, body, continue_block } => {
929 f(variable);
930 f(list);
931 f(body);
932 if let Some(cb) = continue_block {
933 f(cb);
934 }
935 }
936 NodeKind::Given { expr, body } => {
937 f(expr);
938 f(body);
939 }
940 NodeKind::When { condition, body } => {
941 f(condition);
942 f(body);
943 }
944 NodeKind::Default { body } => f(body),
945 NodeKind::StatementModifier { statement, condition, .. } => {
946 f(statement);
947 f(condition);
948 }
949 NodeKind::LabeledStatement { statement, .. } => f(statement),
950
951 // Eval and Do blocks
952 NodeKind::Eval { block } => f(block),
953 NodeKind::Do { block } => f(block),
954 NodeKind::Defer { block } => f(block),
955 NodeKind::Try { body, catch_blocks, finally_block } => {
956 f(body);
957 for (_, catch_body) in catch_blocks {
958 f(catch_body);
959 }
960 if let Some(finally) = finally_block {
961 f(finally);
962 }
963 }
964
965 // Function calls
966 NodeKind::FunctionCall { args, .. } => {
967 for arg in args {
968 f(arg);
969 }
970 }
971 NodeKind::MethodCall { object, args, .. } => {
972 f(object);
973 for arg in args {
974 f(arg);
975 }
976 }
977 NodeKind::IndirectCall { object, args, .. } => {
978 f(object);
979 for arg in args {
980 f(arg);
981 }
982 }
983
984 // Functions
985 NodeKind::Subroutine { prototype, signature, body, .. } => {
986 if let Some(proto) = prototype {
987 f(proto);
988 }
989 if let Some(sig) = signature {
990 f(sig);
991 }
992 f(body);
993 }
994 NodeKind::Method { signature, body, .. } => {
995 if let Some(sig) = signature {
996 f(sig);
997 }
998 f(body);
999 }
1000 NodeKind::Return { value } => {
1001 if let Some(v) = value {
1002 f(v);
1003 }
1004 }
1005 NodeKind::Goto { target } => f(target),
1006 NodeKind::Signature { parameters } => {
1007 for param in parameters {
1008 f(param);
1009 }
1010 }
1011 NodeKind::MandatoryParameter { variable } => f(variable),
1012 NodeKind::OptionalParameter { variable, default_value } => {
1013 f(variable);
1014 f(default_value);
1015 }
1016 NodeKind::SlurpyParameter { variable } => f(variable),
1017 NodeKind::NamedParameter { variable } => f(variable),
1018
1019 // Pattern matching
1020 NodeKind::Match { expr, .. } => f(expr),
1021 NodeKind::Substitution { expr, .. } => f(expr),
1022 NodeKind::Transliteration { expr, .. } => f(expr),
1023
1024 // Containers
1025 NodeKind::ArrayLiteral { elements } => {
1026 for elem in elements {
1027 f(elem);
1028 }
1029 }
1030 NodeKind::HashLiteral { pairs } => {
1031 for (key, value) in pairs {
1032 f(key);
1033 f(value);
1034 }
1035 }
1036
1037 // Package system
1038 NodeKind::Package { block, .. } => {
1039 if let Some(b) = block {
1040 f(b);
1041 }
1042 }
1043 NodeKind::PhaseBlock { block, .. } => f(block),
1044 NodeKind::Class { body, .. } => f(body),
1045
1046 // Error node might have a partial valid tree
1047 NodeKind::Error { partial, .. } => {
1048 if let Some(node) = partial {
1049 f(node);
1050 }
1051 }
1052
1053 // Leaf nodes (no children to traverse)
1054 NodeKind::Variable { .. }
1055 | NodeKind::Identifier { .. }
1056 | NodeKind::Number { .. }
1057 | NodeKind::String { .. }
1058 | NodeKind::Heredoc { .. }
1059 | NodeKind::Regex { .. }
1060 | NodeKind::Readline { .. }
1061 | NodeKind::Glob { .. }
1062 | NodeKind::Typeglob { .. }
1063 | NodeKind::Diamond
1064 | NodeKind::Ellipsis
1065 | NodeKind::Undef
1066 | NodeKind::Use { .. }
1067 | NodeKind::No { .. }
1068 | NodeKind::Prototype { .. }
1069 | NodeKind::DataSection { .. }
1070 | NodeKind::Format { .. }
1071 | NodeKind::LoopControl { .. }
1072 | NodeKind::MissingExpression
1073 | NodeKind::MissingStatement
1074 | NodeKind::MissingIdentifier
1075 | NodeKind::MissingBlock
1076 | NodeKind::UnknownRest => {}
1077 }
1078 }
1079
1080 /// Call a function on every direct child node of this node (immutable version).
1081 ///
1082 /// This enables depth-first traversal for read-only operations like AST analysis.
1083 /// The closure receives an immutable reference to each child node.
1084 #[inline]
1085 pub fn for_each_child<'a, F: FnMut(&'a Node)>(&'a self, mut f: F) {
1086 match &self.kind {
1087 NodeKind::Tie { variable, package, args } => {
1088 f(variable);
1089 f(package);
1090 for arg in args {
1091 f(arg);
1092 }
1093 }
1094 NodeKind::Untie { variable } => f(variable),
1095
1096 // Root program node
1097 NodeKind::Program { statements } => {
1098 for stmt in statements {
1099 f(stmt);
1100 }
1101 }
1102
1103 // Statement wrappers
1104 NodeKind::ExpressionStatement { expression } => f(expression),
1105
1106 // Variable declarations
1107 NodeKind::VariableDeclaration { variable, initializer, .. } => {
1108 f(variable);
1109 if let Some(init) = initializer {
1110 f(init);
1111 }
1112 }
1113 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
1114 for var in variables {
1115 f(var);
1116 }
1117 if let Some(init) = initializer {
1118 f(init);
1119 }
1120 }
1121 NodeKind::VariableWithAttributes { variable, .. } => f(variable),
1122
1123 // Binary operations
1124 NodeKind::Binary { left, right, .. } => {
1125 f(left);
1126 f(right);
1127 }
1128 NodeKind::Ternary { condition, then_expr, else_expr } => {
1129 f(condition);
1130 f(then_expr);
1131 f(else_expr);
1132 }
1133 NodeKind::Unary { operand, .. } => f(operand),
1134 NodeKind::Assignment { lhs, rhs, .. } => {
1135 f(lhs);
1136 f(rhs);
1137 }
1138
1139 // Control flow
1140 NodeKind::Block { statements } => {
1141 for stmt in statements {
1142 f(stmt);
1143 }
1144 }
1145 NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
1146 f(condition);
1147 f(then_branch);
1148 for (elsif_cond, elsif_body) in elsif_branches {
1149 f(elsif_cond);
1150 f(elsif_body);
1151 }
1152 if let Some(else_body) = else_branch {
1153 f(else_body);
1154 }
1155 }
1156 NodeKind::While { condition, body, continue_block, .. } => {
1157 f(condition);
1158 f(body);
1159 if let Some(cont) = continue_block {
1160 f(cont);
1161 }
1162 }
1163 NodeKind::For { init, condition, update, body, continue_block, .. } => {
1164 if let Some(i) = init {
1165 f(i);
1166 }
1167 if let Some(c) = condition {
1168 f(c);
1169 }
1170 if let Some(u) = update {
1171 f(u);
1172 }
1173 f(body);
1174 if let Some(cont) = continue_block {
1175 f(cont);
1176 }
1177 }
1178 NodeKind::Foreach { variable, list, body, continue_block } => {
1179 f(variable);
1180 f(list);
1181 f(body);
1182 if let Some(cb) = continue_block {
1183 f(cb);
1184 }
1185 }
1186 NodeKind::Given { expr, body } => {
1187 f(expr);
1188 f(body);
1189 }
1190 NodeKind::When { condition, body } => {
1191 f(condition);
1192 f(body);
1193 }
1194 NodeKind::Default { body } => f(body),
1195 NodeKind::StatementModifier { statement, condition, .. } => {
1196 f(statement);
1197 f(condition);
1198 }
1199 NodeKind::LabeledStatement { statement, .. } => f(statement),
1200
1201 // Eval and Do blocks
1202 NodeKind::Eval { block } => f(block),
1203 NodeKind::Do { block } => f(block),
1204 NodeKind::Defer { block } => f(block),
1205 NodeKind::Try { body, catch_blocks, finally_block } => {
1206 f(body);
1207 for (_, catch_body) in catch_blocks {
1208 f(catch_body);
1209 }
1210 if let Some(finally) = finally_block {
1211 f(finally);
1212 }
1213 }
1214
1215 // Function calls
1216 NodeKind::FunctionCall { args, .. } => {
1217 for arg in args {
1218 f(arg);
1219 }
1220 }
1221 NodeKind::MethodCall { object, args, .. } => {
1222 f(object);
1223 for arg in args {
1224 f(arg);
1225 }
1226 }
1227 NodeKind::IndirectCall { object, args, .. } => {
1228 f(object);
1229 for arg in args {
1230 f(arg);
1231 }
1232 }
1233
1234 // Functions
1235 NodeKind::Subroutine { prototype, signature, body, .. } => {
1236 if let Some(proto) = prototype {
1237 f(proto);
1238 }
1239 if let Some(sig) = signature {
1240 f(sig);
1241 }
1242 f(body);
1243 }
1244 NodeKind::Method { signature, body, .. } => {
1245 if let Some(sig) = signature {
1246 f(sig);
1247 }
1248 f(body);
1249 }
1250 NodeKind::Return { value } => {
1251 if let Some(v) = value {
1252 f(v);
1253 }
1254 }
1255 NodeKind::Goto { target } => f(target),
1256 NodeKind::Signature { parameters } => {
1257 for param in parameters {
1258 f(param);
1259 }
1260 }
1261 NodeKind::MandatoryParameter { variable } => f(variable),
1262 NodeKind::OptionalParameter { variable, default_value } => {
1263 f(variable);
1264 f(default_value);
1265 }
1266 NodeKind::SlurpyParameter { variable } => f(variable),
1267 NodeKind::NamedParameter { variable } => f(variable),
1268
1269 // Pattern matching
1270 NodeKind::Match { expr, .. } => f(expr),
1271 NodeKind::Substitution { expr, .. } => f(expr),
1272 NodeKind::Transliteration { expr, .. } => f(expr),
1273
1274 // Containers
1275 NodeKind::ArrayLiteral { elements } => {
1276 for elem in elements {
1277 f(elem);
1278 }
1279 }
1280 NodeKind::HashLiteral { pairs } => {
1281 for (key, value) in pairs {
1282 f(key);
1283 f(value);
1284 }
1285 }
1286
1287 // Package system
1288 NodeKind::Package { block, .. } => {
1289 if let Some(b) = block {
1290 f(b);
1291 }
1292 }
1293 NodeKind::PhaseBlock { block, .. } => f(block),
1294 NodeKind::Class { body, .. } => f(body),
1295
1296 // Error node might have a partial valid tree
1297 NodeKind::Error { partial, .. } => {
1298 if let Some(node) = partial {
1299 f(node);
1300 }
1301 }
1302
1303 // Leaf nodes (no children to traverse)
1304 NodeKind::Variable { .. }
1305 | NodeKind::Identifier { .. }
1306 | NodeKind::Number { .. }
1307 | NodeKind::String { .. }
1308 | NodeKind::Heredoc { .. }
1309 | NodeKind::Regex { .. }
1310 | NodeKind::Readline { .. }
1311 | NodeKind::Glob { .. }
1312 | NodeKind::Typeglob { .. }
1313 | NodeKind::Diamond
1314 | NodeKind::Ellipsis
1315 | NodeKind::Undef
1316 | NodeKind::Use { .. }
1317 | NodeKind::No { .. }
1318 | NodeKind::Prototype { .. }
1319 | NodeKind::DataSection { .. }
1320 | NodeKind::Format { .. }
1321 | NodeKind::LoopControl { .. }
1322 | NodeKind::MissingExpression
1323 | NodeKind::MissingStatement
1324 | NodeKind::MissingIdentifier
1325 | NodeKind::MissingBlock
1326 | NodeKind::UnknownRest => {}
1327 }
1328 }
1329
1330 /// Count the total number of nodes in this subtree (inclusive).
1331 ///
1332 /// # Examples
1333 ///
1334 /// ```
1335 /// use perl_ast::{Node, NodeKind, SourceLocation};
1336 ///
1337 /// let loc = SourceLocation { start: 0, end: 1 };
1338 /// let leaf = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1339 /// assert_eq!(leaf.count_nodes(), 1);
1340 ///
1341 /// let program = Node::new(
1342 /// NodeKind::Program { statements: vec![leaf] },
1343 /// loc,
1344 /// );
1345 /// assert_eq!(program.count_nodes(), 2);
1346 /// ```
1347 pub fn count_nodes(&self) -> usize {
1348 let mut count = 1;
1349 self.for_each_child(|child| {
1350 count += child.count_nodes();
1351 });
1352 count
1353 }
1354
1355 /// Collect direct child nodes into a vector for convenience APIs.
1356 ///
1357 /// # Examples
1358 ///
1359 /// ```
1360 /// use perl_ast::{Node, NodeKind, SourceLocation};
1361 ///
1362 /// let loc = SourceLocation { start: 0, end: 1 };
1363 /// let stmt = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1364 /// let program = Node::new(
1365 /// NodeKind::Program { statements: vec![stmt] },
1366 /// loc,
1367 /// );
1368 /// assert_eq!(program.children().len(), 1);
1369 /// ```
1370 #[inline]
1371 pub fn children(&self) -> Vec<&Node> {
1372 let mut children = Vec::new();
1373 self.for_each_child(|child| children.push(child));
1374 children
1375 }
1376
1377 /// Count direct child nodes without allocating an intermediate vector.
1378 ///
1379 /// This is more efficient than `children().len()` when callers only need
1380 /// cardinality.
1381 #[inline]
1382 pub fn child_count(&self) -> usize {
1383 let mut count = 0;
1384 self.for_each_child(|_| count += 1);
1385 count
1386 }
1387
1388 /// Get the first direct child node, if any.
1389 ///
1390 /// Optimized to avoid allocating the children vector.
1391 #[inline]
1392 pub fn first_child(&self) -> Option<&Node> {
1393 let mut result = None;
1394 self.for_each_child(|child| {
1395 if result.is_none() {
1396 result = Some(child);
1397 }
1398 });
1399 result
1400 }
1401
1402 /// Returns `true` when this node's source span contains `offset`.
1403 ///
1404 /// The start position is inclusive and the end position is exclusive.
1405 #[inline]
1406 pub fn contains_offset(&self, offset: usize) -> bool {
1407 self.location.start <= offset && offset < self.location.end
1408 }
1409
1410 /// Find the most specific node whose source span contains `offset`.
1411 ///
1412 /// Returns `None` when `offset` is outside this node. Otherwise, returns this
1413 /// node or the deepest descendant whose span contains the offset. This is useful
1414 /// for LSP features that need to map a cursor byte offset to the smallest AST
1415 /// construct at that position.
1416 ///
1417 /// The same half-open span semantics as [`Node::contains_offset`] apply: start
1418 /// positions are inclusive and end positions are exclusive.
1419 ///
1420 /// # Examples
1421 ///
1422 /// ```
1423 /// use perl_ast::{Node, NodeKind, SourceLocation};
1424 ///
1425 /// let left = Node::new(
1426 /// NodeKind::Identifier { name: "left".to_string() },
1427 /// SourceLocation { start: 0, end: 4 },
1428 /// );
1429 /// let right = Node::new(
1430 /// NodeKind::Number { value: "1".to_string() },
1431 /// SourceLocation { start: 7, end: 8 },
1432 /// );
1433 /// let expr = Node::new(
1434 /// NodeKind::Binary {
1435 /// op: "+".to_string(),
1436 /// left: Box::new(left),
1437 /// right: Box::new(right),
1438 /// },
1439 /// SourceLocation { start: 0, end: 8 },
1440 /// );
1441 ///
1442 /// assert_eq!(
1443 /// expr.find_deepest_containing_offset(7).map(|node| node.kind.kind_name()),
1444 /// Some("Number"),
1445 /// );
1446 /// assert_eq!(expr.find_deepest_containing_offset(8), None);
1447 /// ```
1448 #[inline]
1449 pub fn find_deepest_containing_offset(&self, offset: usize) -> Option<&Node> {
1450 if !self.contains_offset(offset) {
1451 return None;
1452 }
1453
1454 let mut result = self;
1455 self.for_each_child(|child| {
1456 if let Some(descendant) = child.find_deepest_containing_offset(offset) {
1457 result = descendant;
1458 }
1459 });
1460 Some(result)
1461 }
1462
1463 /// Returns the byte length of this node's source span.
1464 ///
1465 /// Uses saturating subtraction so malformed spans never underflow.
1466 #[inline]
1467 pub fn span_len(&self) -> usize {
1468 self.location.end.saturating_sub(self.location.start)
1469 }
1470
1471 /// Get the last direct child node, if any.
1472 ///
1473 /// Optimized to avoid allocating the children vector.
1474 ///
1475 /// # Examples
1476 ///
1477 /// ```
1478 /// use perl_ast::{Node, NodeKind, SourceLocation};
1479 ///
1480 /// let loc = SourceLocation { start: 0, end: 1 };
1481 /// let first = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1482 /// let second = Node::new(NodeKind::Number { value: "2".to_string() }, loc);
1483 /// let program = Node::new(
1484 /// NodeKind::Program { statements: vec![first, second] },
1485 /// loc,
1486 /// );
1487 ///
1488 /// assert_eq!(program.last_child().map(|n| n.kind.kind_name()), Some("Number"));
1489 /// assert_eq!(Node::new(NodeKind::Block { statements: vec![] }, loc).last_child(), None);
1490 /// ```
1491 #[inline]
1492 pub fn last_child(&self) -> Option<&Node> {
1493 let mut result = None;
1494 self.for_each_child(|child| {
1495 result = Some(child);
1496 });
1497 result
1498 }
1499}
1500
1501/// Comprehensive enumeration of all Perl language constructs supported by the parser.
1502///
1503/// This enum represents every possible AST node type that can be parsed from Perl code
1504/// during the Parse → Index → Navigate → Complete → Analyze workflow. Each variant captures
1505/// the semantic meaning and structural relationships needed for complete script analysis
1506/// and transformation.
1507///
1508/// # LSP Workflow Integration
1509///
1510/// Node kinds are processed differently across workflow stages:
1511/// - **Parse**: All variants are produced by the parser
1512/// - **Index**: Symbol-bearing variants feed workspace indexing
1513/// - **Navigate**: Call and reference variants support navigation features
1514/// - **Complete**: Expression variants provide completion context
1515/// - **Analyze**: Semantic variants drive diagnostics and refactoring
1516///
1517/// # Examples
1518///
1519/// Pattern-match on node kinds to extract semantic information:
1520///
1521/// ```
1522/// use perl_ast::{Node, NodeKind, SourceLocation};
1523///
1524/// let loc = SourceLocation { start: 0, end: 5 };
1525/// let node = Node::new(
1526/// NodeKind::Variable { sigil: "$".to_string(), name: "foo".to_string() },
1527/// loc,
1528/// );
1529///
1530/// assert!(matches!(
1531/// &node.kind,
1532/// NodeKind::Variable { sigil, name } if sigil == "$" && name == "foo"
1533/// ));
1534/// ```
1535///
1536/// Use [`kind_name()`](NodeKind::kind_name) for debugging and diagnostics:
1537///
1538/// ```
1539/// use perl_ast::NodeKind;
1540///
1541/// let kind = NodeKind::Number { value: "99".to_string() };
1542/// assert_eq!(kind.kind_name(), "Number");
1543///
1544/// let kind = NodeKind::Variable { sigil: "@".to_string(), name: "list".to_string() };
1545/// assert_eq!(kind.kind_name(), "Variable");
1546/// ```
1547///
1548/// # Performance Considerations
1549///
1550/// The enum design optimizes for large codebases:
1551/// - Box pointers minimize stack usage for recursive structures
1552/// - Vector storage enables efficient bulk operations on child nodes
1553/// - Clone operations optimized for concurrent analysis workflows
1554/// - Pattern matching performance tuned for common Perl constructs
1555#[derive(Debug, Clone, PartialEq)]
1556pub enum NodeKind {
1557 /// Top-level program containing all statements in an Perl script
1558 ///
1559 /// This is the root node for any parsed Perl script content, containing all
1560 /// top-level statements found during the Parse stage of LSP workflow.
1561 Program {
1562 /// All top-level statements in the Perl script
1563 statements: Vec<Node>,
1564 },
1565
1566 /// Statement wrapper for expressions that appear at statement level
1567 ///
1568 /// Used during Analyze stage to distinguish between expressions used as
1569 /// statements versus expressions within other contexts during Perl parsing.
1570 ExpressionStatement {
1571 /// The expression being used as a statement
1572 expression: Box<Node>,
1573 },
1574
1575 /// Variable declaration with scope declarator in Perl script processing
1576 ///
1577 /// Represents declarations like `my $var`, `our $global`, `local $dynamic`, etc.
1578 /// Critical for Analyze stage symbol table construction during Perl parsing.
1579 VariableDeclaration {
1580 /// Scope declarator: "my", "our", "local", "state"
1581 declarator: String,
1582 /// The variable being declared
1583 variable: Box<Node>,
1584 /// Variable attributes (e.g., ":shared", ":locked")
1585 attributes: Vec<String>,
1586 /// Optional initializer expression
1587 initializer: Option<Box<Node>>,
1588 },
1589
1590 /// Multiple variable declaration in a single statement
1591 ///
1592 /// Handles constructs like `my ($x, $y) = @values` common in Perl script processing.
1593 /// Supports efficient bulk variable analysis during Navigate stage operations.
1594 VariableListDeclaration {
1595 /// Scope declarator for all variables in the list
1596 declarator: String,
1597 /// All variables being declared in the list
1598 variables: Vec<Node>,
1599 /// Attributes applied to the variable list
1600 attributes: Vec<String>,
1601 /// Optional initializer for the entire variable list
1602 initializer: Option<Box<Node>>,
1603 },
1604
1605 /// Perl variable reference (scalar, array, hash, etc.) in Perl parsing workflow
1606 Variable {
1607 /// Variable sigil indicating type: $, @, %, &, *
1608 sigil: String, // $, @, %, &, *
1609 /// Variable name without sigil
1610 name: String,
1611 },
1612
1613 /// Variable with additional attributes for enhanced LSP workflow
1614 VariableWithAttributes {
1615 /// The base variable node
1616 variable: Box<Node>,
1617 /// List of attribute names applied to the variable
1618 attributes: Vec<String>,
1619 },
1620
1621 /// Assignment operation for LSP data processing workflows
1622 Assignment {
1623 /// Left-hand side of assignment
1624 lhs: Box<Node>,
1625 /// Right-hand side of assignment
1626 rhs: Box<Node>,
1627 /// Assignment operator: =, +=, -=, etc.
1628 op: String, // =, +=, -=, etc.
1629 },
1630
1631 // Expressions
1632 /// Binary operation for Perl parsing workflow calculations
1633 Binary {
1634 /// Binary operator
1635 op: String,
1636 /// Left operand
1637 left: Box<Node>,
1638 /// Right operand
1639 right: Box<Node>,
1640 },
1641
1642 /// Ternary conditional expression for Perl parsing workflow logic
1643 Ternary {
1644 /// Condition to evaluate
1645 condition: Box<Node>,
1646 /// Expression when condition is true
1647 then_expr: Box<Node>,
1648 /// Expression when condition is false
1649 else_expr: Box<Node>,
1650 },
1651
1652 /// Unary operation for Perl parsing workflow
1653 Unary {
1654 /// Unary operator
1655 op: String,
1656 /// Operand to apply operator to
1657 operand: Box<Node>,
1658 },
1659
1660 // I/O operations
1661 /// Diamond operator for file input in Perl parsing workflow
1662 Diamond, // <>
1663
1664 /// Ellipsis operator for Perl parsing workflow
1665 Ellipsis, // ...
1666
1667 /// Undef value for Perl parsing workflow
1668 Undef, // undef
1669
1670 /// Readline operation for LSP file processing
1671 Readline {
1672 /// Optional filehandle: `<STDIN>`, `<$fh>`, etc.
1673 filehandle: Option<String>, // <STDIN>, <$fh>, etc.
1674 },
1675
1676 /// Glob pattern for LSP workspace file matching
1677 Glob {
1678 /// Pattern string for file matching
1679 pattern: String, // <*.txt>
1680 },
1681
1682 /// Typeglob expression: `*foo` or `*main::bar`
1683 ///
1684 /// Provides access to all symbol table entries for a given name.
1685 Typeglob {
1686 /// Name of the symbol (including package qualification)
1687 name: String,
1688 },
1689
1690 /// Numeric literal in Perl code (integer, float, hex, octal, binary)
1691 ///
1692 /// Represents all numeric literal forms: `42`, `3.14`, `0x1A`, `0o755`, `0b1010`.
1693 Number {
1694 /// String representation preserving original format
1695 value: String,
1696 },
1697
1698 /// String literal with optional interpolation
1699 ///
1700 /// Handles both single-quoted (`'literal'`) and double-quoted (`"$interpolated"`) strings.
1701 String {
1702 /// String content (after quote processing)
1703 value: String,
1704 /// Whether the string supports variable interpolation
1705 interpolated: bool,
1706 },
1707
1708 /// Heredoc string literal for multi-line content
1709 ///
1710 /// Supports all heredoc forms: `<<EOF`, `<<'EOF'`, `<<"EOF"`, `<<~EOF` (indented).
1711 Heredoc {
1712 /// Delimiter marking heredoc boundaries
1713 delimiter: String,
1714 /// Content between delimiters
1715 content: String,
1716 /// Whether content supports variable interpolation
1717 interpolated: bool,
1718 /// Whether leading whitespace is stripped (<<~ form)
1719 indented: bool,
1720 /// Whether this is a command execution heredoc (<<`EOF`)
1721 command: bool,
1722 /// Body span for breakpoint detection (populated by drain_pending_heredocs)
1723 body_span: Option<SourceLocation>,
1724 },
1725
1726 /// Array literal expression: `(1, 2, 3)` or `[1, 2, 3]`
1727 ArrayLiteral {
1728 /// Elements in the array
1729 elements: Vec<Node>,
1730 },
1731
1732 /// Hash literal expression: `(key => 'value')` or `{key => 'value'}`
1733 HashLiteral {
1734 /// Key-value pairs in the hash
1735 pairs: Vec<(Node, Node)>,
1736 },
1737
1738 /// Block of statements: `{ ... }`
1739 ///
1740 /// Used for control structures, subroutine bodies, and bare blocks.
1741 Block {
1742 /// Statements within the block
1743 statements: Vec<Node>,
1744 },
1745
1746 /// Eval block for exception handling: `eval { ... }`
1747 Eval {
1748 /// Block to evaluate with exception trapping
1749 block: Box<Node>,
1750 },
1751
1752 /// Do block for file inclusion or expression evaluation: `do { ... }` or `do "file"`
1753 Do {
1754 /// Block to execute or file expression
1755 block: Box<Node>,
1756 },
1757
1758 /// Defer block for deferred cleanup on scope exit (Perl 5.36+ experimental, stable in 5.40)
1759 Defer {
1760 /// Block to execute on scope exit
1761 block: Box<Node>,
1762 },
1763
1764 /// Try-catch-finally for modern exception handling (Syntax::Keyword::Try style)
1765 Try {
1766 /// Try block body
1767 body: Box<Node>,
1768 /// Catch blocks: (optional exception variable, handler block)
1769 catch_blocks: Vec<(Option<String>, Box<Node>)>,
1770 /// Optional finally block
1771 finally_block: Option<Box<Node>>,
1772 },
1773
1774 /// If-elsif-else conditional statement
1775 If {
1776 /// Condition expression
1777 condition: Box<Node>,
1778 /// Then branch block
1779 then_branch: Box<Node>,
1780 /// Elsif branches: (condition, block) pairs
1781 elsif_branches: Vec<(Box<Node>, Box<Node>)>,
1782 /// Optional else branch
1783 else_branch: Option<Box<Node>>,
1784 /// Original keyword: None for 'if', Some("unless") for 'unless' block form.
1785 keyword: Option<String>,
1786 },
1787
1788 /// Statement with a label for loop control: `LABEL: while (...)`
1789 LabeledStatement {
1790 /// Label name (e.g., "OUTER", "LINE")
1791 label: String,
1792 /// Labeled statement (typically a loop)
1793 statement: Box<Node>,
1794 },
1795
1796 /// While loop: `while (condition) { ... }`
1797 While {
1798 /// Loop condition
1799 condition: Box<Node>,
1800 /// Loop body
1801 body: Box<Node>,
1802 /// Optional continue block
1803 continue_block: Option<Box<Node>>,
1804 /// Original keyword: None for 'while', Some("until") for 'until' block form.
1805 keyword: Option<String>,
1806 },
1807
1808 /// Tie operation for binding variables to objects: `tie %hash, 'Package', @args`
1809 Tie {
1810 /// Variable being tied
1811 variable: Box<Node>,
1812 /// Class/package name to tie to
1813 package: Box<Node>,
1814 /// Arguments passed to TIE* method
1815 args: Vec<Node>,
1816 },
1817
1818 /// Untie operation for unbinding variables: `untie %hash`
1819 Untie {
1820 /// Variable being untied
1821 variable: Box<Node>,
1822 },
1823
1824 /// C-style for loop: `for (init; cond; update) { ... }`
1825 For {
1826 /// Initialization expression
1827 init: Option<Box<Node>>,
1828 /// Loop condition
1829 condition: Option<Box<Node>>,
1830 /// Update expression
1831 update: Option<Box<Node>>,
1832 /// Loop body
1833 body: Box<Node>,
1834 /// Optional continue block
1835 continue_block: Option<Box<Node>>,
1836 },
1837
1838 /// Foreach loop: `foreach my $item (@list) { ... }`
1839 Foreach {
1840 /// Iterator variable
1841 variable: Box<Node>,
1842 /// List to iterate
1843 list: Box<Node>,
1844 /// Loop body
1845 body: Box<Node>,
1846 /// Optional continue block
1847 continue_block: Option<Box<Node>>,
1848 },
1849
1850 /// Given statement for switch-like matching (Perl 5.10+)
1851 Given {
1852 /// Expression to match against
1853 expr: Box<Node>,
1854 /// Body containing when/default blocks
1855 body: Box<Node>,
1856 },
1857
1858 /// When clause in given/switch: `when ($pattern) { ... }`
1859 When {
1860 /// Pattern to match
1861 condition: Box<Node>,
1862 /// Handler block
1863 body: Box<Node>,
1864 },
1865
1866 /// Default clause in given/switch: `default { ... }`
1867 Default {
1868 /// Handler block for unmatched cases
1869 body: Box<Node>,
1870 },
1871
1872 /// Statement modifier syntax: `print "ok" if $condition`
1873 StatementModifier {
1874 /// Statement to conditionally execute
1875 statement: Box<Node>,
1876 /// Modifier keyword: if, unless, while, until, for, foreach
1877 modifier: String,
1878 /// Modifier condition
1879 condition: Box<Node>,
1880 },
1881
1882 // Functions
1883 /// Subroutine declaration (function) including name, prototype, signature and body.
1884 Subroutine {
1885 /// Name of the subroutine
1886 ///
1887 /// # Precise Navigation Support
1888 /// - Added name_span for exact LSP navigation
1889 /// - Enables precise go-to-definition and hover behavior
1890 /// - O(1) span lookup in workspace symbols
1891 ///
1892 /// ## Integration Points
1893 /// - Semantic token providers
1894 /// - Cross-reference generation
1895 /// - Symbol renaming
1896 name: Option<String>,
1897
1898 /// Source location span of the subroutine name
1899 ///
1900 /// ## Usage Notes
1901 /// - Always corresponds to the name field
1902 /// - Provides constant-time position information
1903 /// - Essential for precise editor interactions
1904 name_span: Option<SourceLocation>,
1905
1906 /// Optional prototype node (e.g. `($;@)`).
1907 prototype: Option<Box<Node>>,
1908 /// Optional signature node (Perl 5.20+ feature).
1909 signature: Option<Box<Node>>,
1910 /// Attributes attached to the subroutine (`:lvalue`, etc.).
1911 attributes: Vec<String>,
1912 /// The body block of the subroutine.
1913 body: Box<Node>,
1914 },
1915
1916 /// Subroutine prototype specification: `sub foo ($;@) { ... }`
1917 Prototype {
1918 /// Prototype string defining argument behavior
1919 content: String,
1920 },
1921
1922 /// Subroutine signature (Perl 5.20+): `sub foo ($x, $y = 0) { ... }`
1923 Signature {
1924 /// List of signature parameters
1925 parameters: Vec<Node>,
1926 },
1927
1928 /// Mandatory signature parameter: `$x` in `sub foo ($x) { }`
1929 MandatoryParameter {
1930 /// Variable being bound
1931 variable: Box<Node>,
1932 },
1933
1934 /// Optional signature parameter with default: `$y = 0` in `sub foo ($y = 0) { }`
1935 OptionalParameter {
1936 /// Variable being bound
1937 variable: Box<Node>,
1938 /// Default value expression
1939 default_value: Box<Node>,
1940 },
1941
1942 /// Slurpy parameter collecting remaining args: `@rest` or `%opts` in signature
1943 SlurpyParameter {
1944 /// Array or hash variable to receive remaining arguments
1945 variable: Box<Node>,
1946 },
1947
1948 /// Named parameter placeholder in signature (future Perl feature)
1949 NamedParameter {
1950 /// Variable for named parameter binding
1951 variable: Box<Node>,
1952 },
1953
1954 /// Method declaration (Perl 5.38+ with `use feature 'class'`)
1955 Method {
1956 /// Method name
1957 name: String,
1958 /// Optional signature
1959 signature: Option<Box<Node>>,
1960 /// Method attributes (e.g., `:lvalue`)
1961 attributes: Vec<String>,
1962 /// Method body
1963 body: Box<Node>,
1964 },
1965
1966 /// Return statement: `return;` or `return $value;`
1967 Return {
1968 /// Optional return value
1969 value: Option<Box<Node>>,
1970 },
1971
1972 /// Loop control statement: `next`, `last`, or `redo`
1973 LoopControl {
1974 /// Control keyword: "next", "last", or "redo"
1975 op: String,
1976 /// Optional label: `next LABEL`
1977 label: Option<String>,
1978 },
1979
1980 /// Goto statement: `goto LABEL`, `goto &sub`, or `goto $expr`
1981 Goto {
1982 /// The target of the goto (label identifier, sub reference, or expression)
1983 target: Box<Node>,
1984 },
1985
1986 /// Method call: `$obj->method(@args)` or `$obj->method`
1987 MethodCall {
1988 /// Object or class expression
1989 object: Box<Node>,
1990 /// Method name being called
1991 method: String,
1992 /// Method arguments
1993 args: Vec<Node>,
1994 },
1995
1996 /// Function call: `foo(@args)` or `foo()`
1997 FunctionCall {
1998 /// Function name (may be qualified: `Package::func`)
1999 name: String,
2000 /// Function arguments
2001 args: Vec<Node>,
2002 },
2003
2004 /// Indirect object call (legacy syntax): `new Class @args`
2005 IndirectCall {
2006 /// Method name
2007 method: String,
2008 /// Object or class
2009 object: Box<Node>,
2010 /// Arguments
2011 args: Vec<Node>,
2012 },
2013
2014 /// Regex literal: `/pattern/modifiers` or `qr/pattern/modifiers`
2015 Regex {
2016 /// Regular expression pattern
2017 pattern: String,
2018 /// Replacement string (for s/// when parsed as regex)
2019 replacement: Option<String>,
2020 /// Regex modifiers (i, m, s, x, g, etc.)
2021 modifiers: String,
2022 /// Whether the regex contains embedded code `(?{...})`
2023 has_embedded_code: bool,
2024 },
2025
2026 /// Match operation: `$str =~ /pattern/modifiers` or `$str !~ /pattern/modifiers`
2027 Match {
2028 /// Expression to match against
2029 expr: Box<Node>,
2030 /// Pattern to match
2031 pattern: String,
2032 /// Match modifiers
2033 modifiers: String,
2034 /// Whether the regex contains embedded code `(?{...})`
2035 has_embedded_code: bool,
2036 /// Whether the binding operator was `!~` (negated match)
2037 negated: bool,
2038 },
2039
2040 /// Substitution operation: `$str =~ s/pattern/replacement/modifiers`
2041 Substitution {
2042 /// Expression to substitute in
2043 expr: Box<Node>,
2044 /// Pattern to find
2045 pattern: String,
2046 /// Replacement string
2047 replacement: String,
2048 /// Substitution modifiers (g, e, r, etc.)
2049 modifiers: String,
2050 /// Whether the regex contains embedded code `(?{...})`
2051 has_embedded_code: bool,
2052 /// Whether the binding operator was `!~` (negated match)
2053 negated: bool,
2054 },
2055
2056 /// Transliteration operation: `$str =~ tr/search/replace/` or `y///`
2057 Transliteration {
2058 /// Expression to transliterate
2059 expr: Box<Node>,
2060 /// Characters to search for
2061 search: String,
2062 /// Replacement characters
2063 replace: String,
2064 /// Transliteration modifiers (c, d, s, r)
2065 modifiers: String,
2066 /// Whether the binding operator was `!~` (negated match)
2067 negated: bool,
2068 },
2069
2070 // Package system
2071 /// Package declaration (e.g. `package Foo;`) and optional inline block form.
2072 Package {
2073 /// Name of the package
2074 ///
2075 /// # Precise Navigation Support
2076 /// - Added name_span for exact LSP navigation
2077 /// - Enables precise go-to-definition and hover behavior
2078 /// - O(1) span lookup in workspace symbols
2079 ///
2080 /// ## Integration Points
2081 /// - Workspace indexing
2082 /// - Cross-module symbol resolution
2083 /// - Code action providers
2084 name: String,
2085
2086 /// Source location span of the package name
2087 ///
2088 /// ## Usage Notes
2089 /// - Always corresponds to the name field
2090 /// - Provides constant-time position information
2091 /// - Essential for precise editor interactions
2092 name_span: SourceLocation,
2093
2094 /// Optional inline block for `package Foo { ... }` declarations.
2095 block: Option<Box<Node>>,
2096 },
2097
2098 /// Use statement for module loading: `use Module qw(imports);`
2099 Use {
2100 /// Module name to load
2101 module: String,
2102 /// Import arguments (symbols to import)
2103 args: Vec<String>,
2104 /// Whether this module is a known source filter (security risk)
2105 has_filter_risk: bool,
2106 },
2107
2108 /// No statement for disabling features: `no strict;`
2109 No {
2110 /// Module/pragma name to disable
2111 module: String,
2112 /// Arguments for the no statement
2113 args: Vec<String>,
2114 /// Whether this module is a known source filter (security risk)
2115 has_filter_risk: bool,
2116 },
2117
2118 /// Phase block for compile/runtime hooks: `BEGIN`, `END`, `CHECK`, `INIT`, `UNITCHECK`
2119 PhaseBlock {
2120 /// Phase name: BEGIN, END, CHECK, INIT, UNITCHECK
2121 phase: String,
2122 /// Source location span of the phase block name for precise navigation
2123 phase_span: Option<SourceLocation>,
2124 /// Block to execute during the specified phase
2125 block: Box<Node>,
2126 },
2127
2128 /// Data section marker: `__DATA__` or `__END__`
2129 DataSection {
2130 /// Section marker (__DATA__ or __END__)
2131 marker: String,
2132 /// Content following the marker (if any)
2133 body: Option<String>,
2134 },
2135
2136 /// Class declaration (Perl 5.38+ with `use feature 'class'`)
2137 Class {
2138 /// Class name
2139 name: String,
2140 /// Parent class names from `:isa(Parent)` attributes
2141 parents: Vec<String>,
2142 /// Class body containing methods and attributes
2143 body: Box<Node>,
2144 },
2145
2146 /// Format declaration for legacy report generation
2147 Format {
2148 /// Format name (defaults to filehandle name)
2149 name: String,
2150 /// Format specification body
2151 body: String,
2152 },
2153
2154 /// Bare identifier (bareword or package-qualified name)
2155 Identifier {
2156 /// Identifier string
2157 name: String,
2158 },
2159
2160 /// Parse error placeholder with error message and recovery context
2161 Error {
2162 /// Error description
2163 message: String,
2164 /// Expected token types (if any)
2165 expected: Vec<TokenKind>,
2166 /// The token actually found (if any)
2167 found: Option<Token>,
2168 /// Partial AST node parsed before error (if any)
2169 partial: Option<Box<Node>>,
2170 },
2171
2172 /// Missing expression where one was expected
2173 MissingExpression,
2174 /// Missing statement where one was expected
2175 MissingStatement,
2176 /// Missing identifier where one was expected
2177 MissingIdentifier,
2178 /// Missing block where one was expected
2179 MissingBlock,
2180
2181 /// Lexer budget exceeded marker preserving partial parse results
2182 ///
2183 /// Used when recursion or token limits are hit to preserve already-parsed content.
2184 UnknownRest,
2185}
2186
2187impl NodeKind {
2188 /// Get the name of this `NodeKind` as a static string.
2189 ///
2190 /// Useful for diagnostics, logging, and human-readable AST dumps.
2191 ///
2192 /// # Examples
2193 ///
2194 /// ```
2195 /// use perl_ast::NodeKind;
2196 ///
2197 /// let kind = NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() };
2198 /// assert_eq!(kind.kind_name(), "Variable");
2199 ///
2200 /// let kind = NodeKind::Program { statements: vec![] };
2201 /// assert_eq!(kind.kind_name(), "Program");
2202 /// ```
2203 pub fn kind_name(&self) -> &'static str {
2204 match self {
2205 NodeKind::Program { .. } => "Program",
2206 NodeKind::ExpressionStatement { .. } => "ExpressionStatement",
2207 NodeKind::VariableDeclaration { .. } => "VariableDeclaration",
2208 NodeKind::VariableListDeclaration { .. } => "VariableListDeclaration",
2209 NodeKind::Variable { .. } => "Variable",
2210 NodeKind::VariableWithAttributes { .. } => "VariableWithAttributes",
2211 NodeKind::Assignment { .. } => "Assignment",
2212 NodeKind::Binary { .. } => "Binary",
2213 NodeKind::Ternary { .. } => "Ternary",
2214 NodeKind::Unary { .. } => "Unary",
2215 NodeKind::Diamond => "Diamond",
2216 NodeKind::Ellipsis => "Ellipsis",
2217 NodeKind::Undef => "Undef",
2218 NodeKind::Readline { .. } => "Readline",
2219 NodeKind::Glob { .. } => "Glob",
2220 NodeKind::Typeglob { .. } => "Typeglob",
2221 NodeKind::Number { .. } => "Number",
2222 NodeKind::String { .. } => "String",
2223 NodeKind::Heredoc { .. } => "Heredoc",
2224 NodeKind::ArrayLiteral { .. } => "ArrayLiteral",
2225 NodeKind::HashLiteral { .. } => "HashLiteral",
2226 NodeKind::Block { .. } => "Block",
2227 NodeKind::Eval { .. } => "Eval",
2228 NodeKind::Do { .. } => "Do",
2229 NodeKind::Defer { .. } => "Defer",
2230 NodeKind::Try { .. } => "Try",
2231 NodeKind::If { .. } => "If",
2232 NodeKind::LabeledStatement { .. } => "LabeledStatement",
2233 NodeKind::While { .. } => "While",
2234 NodeKind::Tie { .. } => "Tie",
2235 NodeKind::Untie { .. } => "Untie",
2236 NodeKind::For { .. } => "For",
2237 NodeKind::Foreach { .. } => "Foreach",
2238 NodeKind::Given { .. } => "Given",
2239 NodeKind::When { .. } => "When",
2240 NodeKind::Default { .. } => "Default",
2241 NodeKind::StatementModifier { .. } => "StatementModifier",
2242 NodeKind::Subroutine { .. } => "Subroutine",
2243 NodeKind::Prototype { .. } => "Prototype",
2244 NodeKind::Signature { .. } => "Signature",
2245 NodeKind::MandatoryParameter { .. } => "MandatoryParameter",
2246 NodeKind::OptionalParameter { .. } => "OptionalParameter",
2247 NodeKind::SlurpyParameter { .. } => "SlurpyParameter",
2248 NodeKind::NamedParameter { .. } => "NamedParameter",
2249 NodeKind::Method { .. } => "Method",
2250 NodeKind::Return { .. } => "Return",
2251 NodeKind::LoopControl { .. } => "LoopControl",
2252 NodeKind::Goto { .. } => "Goto",
2253 NodeKind::MethodCall { .. } => "MethodCall",
2254 NodeKind::FunctionCall { .. } => "FunctionCall",
2255 NodeKind::IndirectCall { .. } => "IndirectCall",
2256 NodeKind::Regex { .. } => "Regex",
2257 NodeKind::Match { .. } => "Match",
2258 NodeKind::Substitution { .. } => "Substitution",
2259 NodeKind::Transliteration { .. } => "Transliteration",
2260 NodeKind::Package { .. } => "Package",
2261 NodeKind::Use { .. } => "Use",
2262 NodeKind::No { .. } => "No",
2263 NodeKind::PhaseBlock { .. } => "PhaseBlock",
2264 NodeKind::DataSection { .. } => "DataSection",
2265 NodeKind::Class { .. } => "Class",
2266 NodeKind::Format { .. } => "Format",
2267 NodeKind::Identifier { .. } => "Identifier",
2268 NodeKind::Error { .. } => "Error",
2269 NodeKind::MissingExpression => "MissingExpression",
2270 NodeKind::MissingStatement => "MissingStatement",
2271 NodeKind::MissingIdentifier => "MissingIdentifier",
2272 NodeKind::MissingBlock => "MissingBlock",
2273 NodeKind::UnknownRest => "UnknownRest",
2274 }
2275 }
2276
2277 /// Canonical list of **all** `kind_name()` strings, in alphabetical order.
2278 ///
2279 /// Every consumer that needs the full set of NodeKind names should reference
2280 /// this constant instead of maintaining a hand-written copy.
2281 pub const ALL_KIND_NAMES: &[&'static str] = &[
2282 "ArrayLiteral",
2283 "Assignment",
2284 "Binary",
2285 "Block",
2286 "Class",
2287 "DataSection",
2288 "Default",
2289 "Defer",
2290 "Diamond",
2291 "Do",
2292 "Ellipsis",
2293 "Error",
2294 "Eval",
2295 "ExpressionStatement",
2296 "For",
2297 "Foreach",
2298 "Format",
2299 "FunctionCall",
2300 "Given",
2301 "Glob",
2302 "Goto",
2303 "HashLiteral",
2304 "Heredoc",
2305 "Identifier",
2306 "If",
2307 "IndirectCall",
2308 "LabeledStatement",
2309 "LoopControl",
2310 "MandatoryParameter",
2311 "Match",
2312 "Method",
2313 "MethodCall",
2314 "MissingBlock",
2315 "MissingExpression",
2316 "MissingIdentifier",
2317 "MissingStatement",
2318 "NamedParameter",
2319 "No",
2320 "Number",
2321 "OptionalParameter",
2322 "Package",
2323 "PhaseBlock",
2324 "Program",
2325 "Prototype",
2326 "Readline",
2327 "Regex",
2328 "Return",
2329 "Signature",
2330 "SlurpyParameter",
2331 "StatementModifier",
2332 "String",
2333 "Subroutine",
2334 "Substitution",
2335 "Ternary",
2336 "Tie",
2337 "Transliteration",
2338 "Try",
2339 "Typeglob",
2340 "Unary",
2341 "Undef",
2342 "UnknownRest",
2343 "Untie",
2344 "Use",
2345 "Variable",
2346 "VariableDeclaration",
2347 "VariableListDeclaration",
2348 "VariableWithAttributes",
2349 "When",
2350 "While",
2351 ];
2352
2353 /// Subset of `ALL_KIND_NAMES` that represent synthetic/recovery nodes.
2354 ///
2355 /// These kinds are only produced by `parse_with_recovery()` on malformed
2356 /// input and should not be expected in clean parses.
2357 pub const RECOVERY_KIND_NAMES: &[&'static str] = &[
2358 "Error",
2359 "MissingBlock",
2360 "MissingExpression",
2361 "MissingIdentifier",
2362 "MissingStatement",
2363 "UnknownRest",
2364 ];
2365}
2366
2367impl fmt::Display for NodeKind {
2368 /// Formats as the canonical `kind_name()` string.
2369 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2370 f.write_str(self.kind_name())
2371 }
2372}
2373
2374impl fmt::Display for Node {
2375 /// Formats as the tree-sitter compatible S-expression.
2376 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2377 f.write_str(&self.to_sexp())
2378 }
2379}
2380
2381/// Format unary operator for S-expression output
2382fn format_unary_operator(op: &str) -> String {
2383 match op {
2384 // Arithmetic unary operators
2385 "+" => "unary_+".to_string(),
2386 "-" => "unary_-".to_string(),
2387
2388 // Logical unary operators
2389 "!" => "unary_not".to_string(),
2390 "not" => "unary_not".to_string(),
2391
2392 // Bitwise complement
2393 "~" => "unary_complement".to_string(),
2394
2395 // Reference operator
2396 "\\" => "unary_ref".to_string(),
2397
2398 // Postfix operators
2399 "++" => "unary_++".to_string(),
2400 "--" => "unary_--".to_string(),
2401
2402 // File test operators
2403 "-f" => "unary_-f".to_string(),
2404 "-d" => "unary_-d".to_string(),
2405 "-e" => "unary_-e".to_string(),
2406 "-r" => "unary_-r".to_string(),
2407 "-w" => "unary_-w".to_string(),
2408 "-x" => "unary_-x".to_string(),
2409 "-o" => "unary_-o".to_string(),
2410 "-R" => "unary_-R".to_string(),
2411 "-W" => "unary_-W".to_string(),
2412 "-X" => "unary_-X".to_string(),
2413 "-O" => "unary_-O".to_string(),
2414 "-s" => "unary_-s".to_string(),
2415 "-p" => "unary_-p".to_string(),
2416 "-S" => "unary_-S".to_string(),
2417 "-b" => "unary_-b".to_string(),
2418 "-c" => "unary_-c".to_string(),
2419 "-t" => "unary_-t".to_string(),
2420 "-u" => "unary_-u".to_string(),
2421 "-g" => "unary_-g".to_string(),
2422 "-k" => "unary_-k".to_string(),
2423 "-T" => "unary_-T".to_string(),
2424 "-B" => "unary_-B".to_string(),
2425 "-M" => "unary_-M".to_string(),
2426 "-A" => "unary_-A".to_string(),
2427 "-C" => "unary_-C".to_string(),
2428 "-l" => "unary_-l".to_string(),
2429 "-z" => "unary_-z".to_string(),
2430
2431 // Postfix dereferencing
2432 "->@*" => "unary_->@*".to_string(),
2433 "->%*" => "unary_->%*".to_string(),
2434 "->$*" => "unary_->$*".to_string(),
2435 "->&*" => "unary_->&*".to_string(),
2436 "->**" => "unary_->**".to_string(),
2437
2438 // Defined operator
2439 "defined" => "unary_defined".to_string(),
2440
2441 // Default case for unknown operators
2442 _ => format!("unary_{}", op.replace(' ', "_")),
2443 }
2444}
2445
2446/// Format binary operator for S-expression output
2447fn format_binary_operator(op: &str) -> String {
2448 match op {
2449 // Arithmetic operators
2450 "+" => "binary_+".to_string(),
2451 "-" => "binary_-".to_string(),
2452 "*" => "binary_*".to_string(),
2453 "/" => "binary_/".to_string(),
2454 "%" => "binary_%".to_string(),
2455 "**" => "binary_**".to_string(),
2456
2457 // Comparison operators
2458 "==" => "binary_==".to_string(),
2459 "!=" => "binary_!=".to_string(),
2460 "<" => "binary_<".to_string(),
2461 ">" => "binary_>".to_string(),
2462 "<=" => "binary_<=".to_string(),
2463 ">=" => "binary_>=".to_string(),
2464 "<=>" => "binary_<=>".to_string(),
2465
2466 // String comparison
2467 "eq" => "binary_eq".to_string(),
2468 "ne" => "binary_ne".to_string(),
2469 "lt" => "binary_lt".to_string(),
2470 "le" => "binary_le".to_string(),
2471 "gt" => "binary_gt".to_string(),
2472 "ge" => "binary_ge".to_string(),
2473 "cmp" => "binary_cmp".to_string(),
2474
2475 // Logical operators
2476 "&&" => "binary_&&".to_string(),
2477 "||" => "binary_||".to_string(),
2478 "and" => "binary_and".to_string(),
2479 "or" => "binary_or".to_string(),
2480 "xor" => "binary_xor".to_string(),
2481
2482 // Bitwise operators
2483 "&" => "binary_&".to_string(),
2484 "|" => "binary_|".to_string(),
2485 "^" => "binary_^".to_string(),
2486 "<<" => "binary_<<".to_string(),
2487 ">>" => "binary_>>".to_string(),
2488
2489 // Pattern matching
2490 "=~" => "binary_=~".to_string(),
2491 "!~" => "binary_!~".to_string(),
2492
2493 // Smart match
2494 "~~" => "binary_~~".to_string(),
2495
2496 // String repetition
2497 "x" => "binary_x".to_string(),
2498
2499 // Concatenation
2500 "." => "binary_.".to_string(),
2501
2502 // Range operators
2503 ".." => "binary_..".to_string(),
2504 "..." => "binary_...".to_string(),
2505
2506 // Type checking
2507 "isa" => "binary_isa".to_string(),
2508
2509 // Assignment operators
2510 "=" => "binary_=".to_string(),
2511 "+=" => "binary_+=".to_string(),
2512 "-=" => "binary_-=".to_string(),
2513 "*=" => "binary_*=".to_string(),
2514 "/=" => "binary_/=".to_string(),
2515 "%=" => "binary_%=".to_string(),
2516 "**=" => "binary_**=".to_string(),
2517 ".=" => "binary_.=".to_string(),
2518 "&=" => "binary_&=".to_string(),
2519 "|=" => "binary_|=".to_string(),
2520 "^=" => "binary_^=".to_string(),
2521 "<<=" => "binary_<<=".to_string(),
2522 ">>=" => "binary_>>=".to_string(),
2523 "&&=" => "binary_&&=".to_string(),
2524 "||=" => "binary_||=".to_string(),
2525 "//=" => "binary_//=".to_string(),
2526
2527 // Defined-or operator
2528 "//" => "binary_//".to_string(),
2529
2530 // Method calls and dereferencing
2531 "->" => "binary_->".to_string(),
2532
2533 // Hash/array access
2534 "{}" => "binary_{}".to_string(),
2535 "[]" => "binary_[]".to_string(),
2536
2537 // Arrow hash/array dereference
2538 "->{}" => "arrow_hash_deref".to_string(),
2539 "->[]" => "arrow_array_deref".to_string(),
2540
2541 // Default case for unknown operators
2542 _ => format!("binary_{}", op.replace(' ', "_")),
2543 }
2544}
2545
2546// SourceLocation is now provided by perl-position-tracking crate
2547// See the re-export at the top of this file
2548
2549#[cfg(test)]
2550mod tests {
2551 use super::*;
2552 use std::collections::BTreeSet;
2553
2554 /// Build a dummy instance for every `NodeKind` variant and return its
2555 /// `kind_name()`. This ensures the compiler forces us to update here
2556 /// whenever a variant is added/removed.
2557 fn all_kind_names_from_variants() -> BTreeSet<&'static str> {
2558 let loc = SourceLocation { start: 0, end: 0 };
2559 let dummy_node = || Node::new(NodeKind::Undef, loc);
2560
2561 let variants: Vec<NodeKind> = vec![
2562 NodeKind::Program { statements: vec![] },
2563 NodeKind::ExpressionStatement { expression: Box::new(dummy_node()) },
2564 NodeKind::VariableDeclaration {
2565 declarator: String::new(),
2566 variable: Box::new(dummy_node()),
2567 attributes: vec![],
2568 initializer: None,
2569 },
2570 NodeKind::VariableListDeclaration {
2571 declarator: String::new(),
2572 variables: vec![],
2573 attributes: vec![],
2574 initializer: None,
2575 },
2576 NodeKind::Variable { sigil: String::new(), name: String::new() },
2577 NodeKind::VariableWithAttributes {
2578 variable: Box::new(dummy_node()),
2579 attributes: vec![],
2580 },
2581 NodeKind::Assignment {
2582 lhs: Box::new(dummy_node()),
2583 rhs: Box::new(dummy_node()),
2584 op: String::new(),
2585 },
2586 NodeKind::Binary {
2587 op: String::new(),
2588 left: Box::new(dummy_node()),
2589 right: Box::new(dummy_node()),
2590 },
2591 NodeKind::Ternary {
2592 condition: Box::new(dummy_node()),
2593 then_expr: Box::new(dummy_node()),
2594 else_expr: Box::new(dummy_node()),
2595 },
2596 NodeKind::Unary { op: String::new(), operand: Box::new(dummy_node()) },
2597 NodeKind::Diamond,
2598 NodeKind::Ellipsis,
2599 NodeKind::Undef,
2600 NodeKind::Readline { filehandle: None },
2601 NodeKind::Glob { pattern: String::new() },
2602 NodeKind::Typeglob { name: String::new() },
2603 NodeKind::Number { value: String::new() },
2604 NodeKind::String { value: String::new(), interpolated: false },
2605 NodeKind::Heredoc {
2606 delimiter: String::new(),
2607 content: String::new(),
2608 interpolated: false,
2609 indented: false,
2610 command: false,
2611 body_span: None,
2612 },
2613 NodeKind::ArrayLiteral { elements: vec![] },
2614 NodeKind::HashLiteral { pairs: vec![] },
2615 NodeKind::Block { statements: vec![] },
2616 NodeKind::Eval { block: Box::new(dummy_node()) },
2617 NodeKind::Do { block: Box::new(dummy_node()) },
2618 NodeKind::Defer { block: Box::new(dummy_node()) },
2619 NodeKind::Try {
2620 body: Box::new(dummy_node()),
2621 catch_blocks: vec![],
2622 finally_block: None,
2623 },
2624 NodeKind::If {
2625 condition: Box::new(dummy_node()),
2626 then_branch: Box::new(dummy_node()),
2627 elsif_branches: vec![],
2628 else_branch: None,
2629 keyword: None,
2630 },
2631 NodeKind::LabeledStatement { label: String::new(), statement: Box::new(dummy_node()) },
2632 NodeKind::While {
2633 condition: Box::new(dummy_node()),
2634 body: Box::new(dummy_node()),
2635 continue_block: None,
2636 keyword: None,
2637 },
2638 NodeKind::Tie {
2639 variable: Box::new(dummy_node()),
2640 package: Box::new(dummy_node()),
2641 args: vec![],
2642 },
2643 NodeKind::Untie { variable: Box::new(dummy_node()) },
2644 NodeKind::For {
2645 init: None,
2646 condition: None,
2647 update: None,
2648 body: Box::new(dummy_node()),
2649 continue_block: None,
2650 },
2651 NodeKind::Foreach {
2652 variable: Box::new(dummy_node()),
2653 list: Box::new(dummy_node()),
2654 body: Box::new(dummy_node()),
2655 continue_block: None,
2656 },
2657 NodeKind::Given { expr: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2658 NodeKind::When { condition: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2659 NodeKind::Default { body: Box::new(dummy_node()) },
2660 NodeKind::StatementModifier {
2661 statement: Box::new(dummy_node()),
2662 modifier: String::new(),
2663 condition: Box::new(dummy_node()),
2664 },
2665 NodeKind::Subroutine {
2666 name: None,
2667 name_span: None,
2668 prototype: None,
2669 signature: None,
2670 attributes: vec![],
2671 body: Box::new(dummy_node()),
2672 },
2673 NodeKind::Prototype { content: String::new() },
2674 NodeKind::Signature { parameters: vec![] },
2675 NodeKind::MandatoryParameter { variable: Box::new(dummy_node()) },
2676 NodeKind::OptionalParameter {
2677 variable: Box::new(dummy_node()),
2678 default_value: Box::new(dummy_node()),
2679 },
2680 NodeKind::SlurpyParameter { variable: Box::new(dummy_node()) },
2681 NodeKind::NamedParameter { variable: Box::new(dummy_node()) },
2682 NodeKind::Method {
2683 name: String::new(),
2684 signature: None,
2685 attributes: vec![],
2686 body: Box::new(dummy_node()),
2687 },
2688 NodeKind::Return { value: None },
2689 NodeKind::LoopControl { op: String::new(), label: None },
2690 NodeKind::Goto { target: Box::new(dummy_node()) },
2691 NodeKind::MethodCall {
2692 object: Box::new(dummy_node()),
2693 method: String::new(),
2694 args: vec![],
2695 },
2696 NodeKind::FunctionCall { name: String::new(), args: vec![] },
2697 NodeKind::IndirectCall {
2698 method: String::new(),
2699 object: Box::new(dummy_node()),
2700 args: vec![],
2701 },
2702 NodeKind::Regex {
2703 pattern: String::new(),
2704 replacement: None,
2705 modifiers: String::new(),
2706 has_embedded_code: false,
2707 },
2708 NodeKind::Match {
2709 expr: Box::new(dummy_node()),
2710 pattern: String::new(),
2711 modifiers: String::new(),
2712 has_embedded_code: false,
2713 negated: false,
2714 },
2715 NodeKind::Substitution {
2716 expr: Box::new(dummy_node()),
2717 pattern: String::new(),
2718 replacement: String::new(),
2719 modifiers: String::new(),
2720 has_embedded_code: false,
2721 negated: false,
2722 },
2723 NodeKind::Transliteration {
2724 expr: Box::new(dummy_node()),
2725 search: String::new(),
2726 replace: String::new(),
2727 modifiers: String::new(),
2728 negated: false,
2729 },
2730 NodeKind::Package { name: String::new(), name_span: loc, block: None },
2731 NodeKind::Use { module: String::new(), args: vec![], has_filter_risk: false },
2732 NodeKind::No { module: String::new(), args: vec![], has_filter_risk: false },
2733 NodeKind::PhaseBlock {
2734 phase: String::new(),
2735 phase_span: None,
2736 block: Box::new(dummy_node()),
2737 },
2738 NodeKind::DataSection { marker: String::new(), body: None },
2739 NodeKind::Class { name: String::new(), parents: vec![], body: Box::new(dummy_node()) },
2740 NodeKind::Format { name: String::new(), body: String::new() },
2741 NodeKind::Identifier { name: String::new() },
2742 NodeKind::Error {
2743 message: String::new(),
2744 expected: vec![],
2745 found: None,
2746 partial: None,
2747 },
2748 NodeKind::MissingExpression,
2749 NodeKind::MissingStatement,
2750 NodeKind::MissingIdentifier,
2751 NodeKind::MissingBlock,
2752 NodeKind::UnknownRest,
2753 ];
2754
2755 variants.iter().map(|v| v.kind_name()).collect()
2756 }
2757
2758 #[test]
2759 fn all_kind_names_is_consistent_with_kind_name() {
2760 let from_enum = all_kind_names_from_variants();
2761 let from_const: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2762
2763 // Check for duplicates in the const array
2764 assert_eq!(
2765 NodeKind::ALL_KIND_NAMES.len(),
2766 from_const.len(),
2767 "ALL_KIND_NAMES contains duplicates"
2768 );
2769
2770 let only_in_enum: Vec<_> = from_enum.difference(&from_const).collect();
2771 let only_in_const: Vec<_> = from_const.difference(&from_enum).collect();
2772
2773 assert!(
2774 only_in_enum.is_empty() && only_in_const.is_empty(),
2775 "ALL_KIND_NAMES is out of sync with NodeKind variants:\n \
2776 in enum but not in ALL_KIND_NAMES: {only_in_enum:?}\n \
2777 in ALL_KIND_NAMES but not in enum: {only_in_const:?}"
2778 );
2779 }
2780
2781 #[test]
2782 fn recovery_kind_names_is_subset_of_all() {
2783 let all: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2784 let recovery: BTreeSet<&str> = NodeKind::RECOVERY_KIND_NAMES.iter().copied().collect();
2785
2786 // No duplicates
2787 assert_eq!(
2788 NodeKind::RECOVERY_KIND_NAMES.len(),
2789 recovery.len(),
2790 "RECOVERY_KIND_NAMES contains duplicates"
2791 );
2792
2793 let not_in_all: Vec<_> = recovery.difference(&all).collect();
2794 assert!(
2795 not_in_all.is_empty(),
2796 "RECOVERY_KIND_NAMES contains entries not in ALL_KIND_NAMES: {not_in_all:?}"
2797 );
2798 }
2799}