perl_ast/ast.rs
1//! Abstract Syntax Tree definitions for Perl within the parsing and LSP workflow.
2//!
3//! This module defines the comprehensive AST node types that represent parsed Perl code
4//! during the Parse → Index → Navigate → Complete → Analyze stages. The design is optimized
5//! for both direct use in Rust analysis and for generating tree-sitter compatible
6//! S-expressions during large workspace processing operations.
7//!
8//! # LSP Workflow Integration
9//!
10//! The AST structures support Perl tooling workflows by:
11//! - **Parse**: Produced by the parser as the canonical syntax tree
12//! - **Index**: Traversed to build symbol and reference tables
13//! - **Navigate**: Provides locations for definition and reference lookups
14//! - **Complete**: Supplies context for completion, hover, and signature help
15//! - **Analyze**: Feeds semantic analysis, diagnostics, and refactoring
16//!
17//! # Performance Characteristics
18//!
19//! AST structures are optimized for large codebases with:
20//! - Memory-efficient node representation using `Box<Node>` for recursive structures
21//! - Fast pattern matching via enum variants for common Perl constructs
22//! - Location tracking for precise error reporting in large files
23//! - Cheap cloning for parallel analysis tasks
24//!
25//! # Usage Examples
26//!
27//! ## Basic AST Construction
28//!
29//! ```rust
30//! use perl_ast::{Node, NodeKind, SourceLocation};
31//!
32//! // Create a simple variable declaration node
33//! let location = SourceLocation { start: 0, end: 10 };
34//! let node = Node::new(
35//! NodeKind::VariableDeclaration {
36//! declarator: "my".to_string(),
37//! variable: Box::new(Node::new(
38//! NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
39//! location,
40//! )),
41//! attributes: vec![],
42//! initializer: None,
43//! },
44//! location,
45//! );
46//! assert_eq!(node.kind.kind_name(), "VariableDeclaration");
47//! ```
48//!
49//! ## Tree-sitter S-expression Generation
50//!
51//! ```rust
52//! use perl_ast::{Node, NodeKind, SourceLocation};
53//!
54//! let loc = SourceLocation { start: 0, end: 2 };
55//! let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
56//! let program = Node::new(NodeKind::Program { statements: vec![num] }, loc);
57//!
58//! let sexp = program.to_sexp();
59//! assert!(sexp.starts_with("(source_file"));
60//! ```
61//!
62//! ## AST Traversal and Analysis
63//!
64//! ```rust
65//! use perl_ast::{Node, NodeKind, SourceLocation};
66//!
67//! fn count_variables(node: &Node) -> usize {
68//! let mut count = 0;
69//! match &node.kind {
70//! NodeKind::Variable { .. } => count += 1,
71//! NodeKind::Program { statements } => {
72//! for stmt in statements {
73//! count += count_variables(stmt);
74//! }
75//! }
76//! _ => {} // Handle other node types as needed
77//! }
78//! count
79//! }
80//!
81//! let loc = SourceLocation { start: 0, end: 5 };
82//! let var = Node::new(
83//! NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
84//! loc,
85//! );
86//! let program = Node::new(NodeKind::Program { statements: vec![var] }, loc);
87//! assert_eq!(count_variables(&program), 1);
88//! ```
89//!
90//! ## Parsing Integration
91//!
92//! In practice the AST is produced by the parser rather than built by hand
93//! (requires `perl-parser-core`):
94//!
95//! ```rust,ignore
96//! use perl_parser_core::Parser;
97//! use perl_ast::NodeKind;
98//!
99//! let mut parser = Parser::new("my $x = 42;");
100//! let ast = parser.parse().expect("should parse");
101//! assert!(matches!(ast.kind, NodeKind::Program { .. }));
102//! ```
103
104// Re-export SourceLocation from perl-position-tracking for unified span handling
105pub use perl_position_tracking::SourceLocation;
106// Re-export Token and TokenKind from perl-token for AST error nodes
107pub use perl_token::{Token, TokenKind};
108use std::fmt;
109
110/// Core AST node representing any Perl language construct within parsing workflows.
111///
112/// This is the fundamental building block for representing parsed Perl code. Each node
113/// contains both the semantic information (kind) and positional information (location)
114/// necessary for comprehensive script analysis.
115///
116/// # LSP Workflow Role
117///
118/// Nodes flow through tooling stages:
119/// - **Parse**: Created by the parser as it builds the syntax tree
120/// - **Index**: Visited to build symbol and reference tables
121/// - **Navigate**: Used to resolve definitions, references, and call hierarchy
122/// - **Complete**: Provides contextual information for completion and hover
123/// - **Analyze**: Drives semantic analysis and diagnostics
124///
125/// # Memory Optimization
126///
127/// The structure is designed for efficient memory usage during large-scale parsing:
128/// - `SourceLocation` uses compact position encoding for large files
129/// - `NodeKind` enum variants minimize memory overhead for common constructs
130/// - Clone operations are optimized for shared analysis workflows
131///
132/// # Examples
133///
134/// Construct a variable declaration node manually:
135///
136/// ```
137/// use perl_ast::{Node, NodeKind, SourceLocation};
138///
139/// let loc = SourceLocation { start: 0, end: 11 };
140/// let var = Node::new(
141/// NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
142/// loc,
143/// );
144/// let decl = Node::new(
145/// NodeKind::VariableDeclaration {
146/// declarator: "my".to_string(),
147/// variable: Box::new(var),
148/// attributes: vec![],
149/// initializer: None,
150/// },
151/// loc,
152/// );
153/// assert_eq!(decl.kind.kind_name(), "VariableDeclaration");
154/// ```
155///
156/// Typically you obtain nodes from the parser rather than constructing them by hand:
157///
158/// ```ignore
159/// use perl_parser::Parser;
160///
161/// let mut parser = Parser::new("my $x = 42;");
162/// let ast = parser.parse()?;
163/// println!("AST: {}", ast.to_sexp());
164/// ```
165#[derive(Debug, Clone, PartialEq)]
166pub struct Node {
167 /// The specific type and semantic content of this AST node
168 pub kind: NodeKind,
169 /// Source position information for error reporting and code navigation
170 pub location: SourceLocation,
171}
172
173impl Node {
174 /// Create a new AST node with the given kind and source location.
175 ///
176 /// # Examples
177 ///
178 /// ```
179 /// use perl_ast::{Node, NodeKind, SourceLocation};
180 ///
181 /// let node = Node::new(
182 /// NodeKind::Number { value: "42".to_string() },
183 /// SourceLocation { start: 0, end: 2 },
184 /// );
185 /// assert_eq!(node.kind.kind_name(), "Number");
186 /// assert_eq!(node.location.start, 0);
187 /// ```
188 pub fn new(kind: NodeKind, location: SourceLocation) -> Self {
189 Node { kind, location }
190 }
191
192 /// Convert the AST to a tree-sitter compatible S-expression.
193 ///
194 /// Produces a parenthesized representation compatible with tree-sitter's
195 /// S-expression format, useful for debugging and snapshot testing.
196 ///
197 /// # Examples
198 ///
199 /// ```
200 /// use perl_ast::{Node, NodeKind, SourceLocation};
201 ///
202 /// let loc = SourceLocation { start: 0, end: 2 };
203 /// let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
204 /// let program = Node::new(
205 /// NodeKind::Program { statements: vec![num] },
206 /// loc,
207 /// );
208 /// let sexp = program.to_sexp();
209 /// assert!(sexp.starts_with("(source_file"));
210 /// ```
211 pub fn to_sexp(&self) -> String {
212 match &self.kind {
213 NodeKind::Program { statements } => {
214 let stmts =
215 statements.iter().map(|s| s.to_sexp_inner()).collect::<Vec<_>>().join(" ");
216 format!("(source_file {})", stmts)
217 }
218
219 NodeKind::ExpressionStatement { expression } => {
220 format!("(expression_statement {})", expression.to_sexp())
221 }
222
223 NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
224 let attrs_str = if attributes.is_empty() {
225 String::new()
226 } else {
227 format!(" (attributes {})", attributes.join(" "))
228 };
229 if let Some(init) = initializer {
230 format!(
231 "({}_declaration {}{}{})",
232 declarator,
233 variable.to_sexp(),
234 attrs_str,
235 init.to_sexp()
236 )
237 } else {
238 format!("({}_declaration {}{})", declarator, variable.to_sexp(), attrs_str)
239 }
240 }
241
242 NodeKind::VariableListDeclaration {
243 declarator,
244 variables,
245 attributes,
246 initializer,
247 } => {
248 let vars = variables.iter().map(|v| v.to_sexp()).collect::<Vec<_>>().join(" ");
249 let attrs_str = if attributes.is_empty() {
250 String::new()
251 } else {
252 format!(" (attributes {})", attributes.join(" "))
253 };
254 if let Some(init) = initializer {
255 format!(
256 "({}_declaration ({}){}{})",
257 declarator,
258 vars,
259 attrs_str,
260 init.to_sexp()
261 )
262 } else {
263 format!("({}_declaration ({}){})", declarator, vars, attrs_str)
264 }
265 }
266
267 NodeKind::Variable { sigil, name } => {
268 // Format expected by bless parsing tests: (variable $ name)
269 format!("(variable {} {})", sigil, name)
270 }
271
272 NodeKind::VariableWithAttributes { variable, attributes } => {
273 let attrs = attributes.join(" ");
274 format!("({} (attributes {}))", variable.to_sexp(), attrs)
275 }
276
277 NodeKind::Assignment { lhs, rhs, op } => {
278 format!(
279 "(assignment_{} {} {})",
280 op.replace("=", "assign"),
281 lhs.to_sexp(),
282 rhs.to_sexp()
283 )
284 }
285
286 NodeKind::Binary { op, left, right } => {
287 // Tree-sitter format: (binary_op left right)
288 let op_name = format_binary_operator(op);
289 format!("({} {} {})", op_name, left.to_sexp(), right.to_sexp())
290 }
291
292 NodeKind::Ternary { condition, then_expr, else_expr } => {
293 format!(
294 "(ternary {} {} {})",
295 condition.to_sexp(),
296 then_expr.to_sexp(),
297 else_expr.to_sexp()
298 )
299 }
300
301 NodeKind::Unary { op, operand } => {
302 // Tree-sitter format: (unary_op operand)
303 let op_name = format_unary_operator(op);
304 format!("({} {})", op_name, operand.to_sexp())
305 }
306
307 NodeKind::Diamond => "(diamond)".to_string(),
308
309 NodeKind::Ellipsis => "(ellipsis)".to_string(),
310
311 NodeKind::Undef => "(undef)".to_string(),
312
313 NodeKind::Readline { filehandle } => {
314 if let Some(fh) = filehandle {
315 format!("(readline {})", fh)
316 } else {
317 "(readline)".to_string()
318 }
319 }
320
321 NodeKind::Glob { pattern } => {
322 format!("(glob {})", pattern)
323 }
324 NodeKind::Typeglob { name } => {
325 format!("(typeglob {})", name)
326 }
327
328 NodeKind::Number { value } => {
329 // Format expected by bless parsing tests: (number value)
330 format!("(number {})", value)
331 }
332
333 NodeKind::String { value, interpolated } => {
334 // Escape quotes in string value to prevent S-expression parsing issues
335 let escaped_value = value.replace('\\', "\\\\").replace('"', "\\\"");
336
337 // Format based on interpolation status
338 if *interpolated {
339 format!("(string_interpolated \"{}\")", escaped_value)
340 } else {
341 format!("(string \"{}\")", escaped_value)
342 }
343 }
344
345 NodeKind::Heredoc { delimiter, content, interpolated, indented, command, .. } => {
346 let type_str = if *command {
347 "heredoc_command"
348 } else if *indented {
349 if *interpolated { "heredoc_indented_interpolated" } else { "heredoc_indented" }
350 } else if *interpolated {
351 "heredoc_interpolated"
352 } else {
353 "heredoc"
354 };
355 format!("({} {:?} {:?})", type_str, delimiter, content)
356 }
357
358 NodeKind::ArrayLiteral { elements } => {
359 let elems = elements.iter().map(|e| e.to_sexp()).collect::<Vec<_>>().join(" ");
360 format!("(array {})", elems)
361 }
362
363 NodeKind::HashLiteral { pairs } => {
364 let kvs = pairs
365 .iter()
366 .map(|(k, v)| format!("({} {})", k.to_sexp(), v.to_sexp()))
367 .collect::<Vec<_>>()
368 .join(" ");
369 format!("(hash {})", kvs)
370 }
371
372 NodeKind::Block { statements } => {
373 let stmts = statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ");
374 format!("(block {})", stmts)
375 }
376
377 NodeKind::Eval { block } => {
378 format!("(eval {})", block.to_sexp())
379 }
380
381 NodeKind::Do { block } => {
382 format!("(do {})", block.to_sexp())
383 }
384
385 NodeKind::Defer { block } => {
386 format!("(defer {})", block.to_sexp())
387 }
388
389 NodeKind::Try { body, catch_blocks, finally_block } => {
390 let mut parts = vec![format!("(try {})", body.to_sexp())];
391
392 for (var, block) in catch_blocks {
393 if let Some(v) = var {
394 parts.push(format!("(catch {} {})", v, block.to_sexp()));
395 } else {
396 parts.push(format!("(catch {})", block.to_sexp()));
397 }
398 }
399
400 if let Some(finally) = finally_block {
401 parts.push(format!("(finally {})", finally.to_sexp()));
402 }
403
404 parts.join(" ")
405 }
406
407 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
408 let mut parts =
409 vec![format!("(if {} {})", condition.to_sexp(), then_branch.to_sexp())];
410
411 for (cond, block) in elsif_branches {
412 parts.push(format!("(elsif {} {})", cond.to_sexp(), block.to_sexp()));
413 }
414
415 if let Some(else_block) = else_branch {
416 parts.push(format!("(else {})", else_block.to_sexp()));
417 }
418
419 parts.join(" ")
420 }
421
422 NodeKind::LabeledStatement { label, statement } => {
423 format!("(labeled_statement {} {})", label, statement.to_sexp())
424 }
425
426 NodeKind::While { condition, body, continue_block } => {
427 let mut s = format!("(while {} {})", condition.to_sexp(), body.to_sexp());
428 if let Some(cont) = continue_block {
429 s.push_str(&format!(" (continue {})", cont.to_sexp()));
430 }
431 s
432 }
433 NodeKind::Tie { variable, package, args } => {
434 let mut s = format!("(tie {} {}", variable.to_sexp(), package.to_sexp());
435 for arg in args {
436 s.push_str(&format!(" {}", arg.to_sexp()));
437 }
438 s.push(')');
439 s
440 }
441 NodeKind::Untie { variable } => {
442 format!("(untie {})", variable.to_sexp())
443 }
444 NodeKind::For { init, condition, update, body, continue_block } => {
445 let init_str =
446 init.as_ref().map(|i| i.to_sexp()).unwrap_or_else(|| "()".to_string());
447 let cond_str =
448 condition.as_ref().map(|c| c.to_sexp()).unwrap_or_else(|| "()".to_string());
449 let update_str =
450 update.as_ref().map(|u| u.to_sexp()).unwrap_or_else(|| "()".to_string());
451 let mut result =
452 format!("(for {} {} {} {})", init_str, cond_str, update_str, body.to_sexp());
453 if let Some(cont) = continue_block {
454 result.push_str(&format!(" (continue {})", cont.to_sexp()));
455 }
456 result
457 }
458
459 NodeKind::Foreach { variable, list, body, continue_block } => {
460 let cont = if let Some(cb) = continue_block {
461 format!(" {}", cb.to_sexp())
462 } else {
463 String::new()
464 };
465 format!(
466 "(foreach {} {} {}{})",
467 variable.to_sexp(),
468 list.to_sexp(),
469 body.to_sexp(),
470 cont
471 )
472 }
473
474 NodeKind::Given { expr, body } => {
475 format!("(given {} {})", expr.to_sexp(), body.to_sexp())
476 }
477
478 NodeKind::When { condition, body } => {
479 format!("(when {} {})", condition.to_sexp(), body.to_sexp())
480 }
481
482 NodeKind::Default { body } => {
483 format!("(default {})", body.to_sexp())
484 }
485
486 NodeKind::StatementModifier { statement, modifier, condition } => {
487 format!(
488 "(statement_modifier_{} {} {})",
489 modifier,
490 statement.to_sexp(),
491 condition.to_sexp()
492 )
493 }
494
495 NodeKind::Subroutine { name, prototype, signature, attributes, body, name_span: _ } => {
496 if let Some(sub_name) = name {
497 // Named subroutine - bless test expected format: (sub name () block)
498 let mut parts = vec![sub_name.clone()];
499
500 // Add attributes if present (before prototype/signature)
501 if !attributes.is_empty() {
502 for attr in attributes {
503 parts.push(format!(":{}", attr));
504 }
505 }
506
507 // Add prototype/signature - use () for empty prototype
508 if let Some(proto) = prototype {
509 parts.push(format!("({})", proto.to_sexp()));
510 } else if signature.is_some() {
511 // If there's a signature but no prototype, still show ()
512 parts.push("()".to_string());
513 } else {
514 parts.push("()".to_string());
515 }
516
517 // Add body
518 parts.push(body.to_sexp());
519
520 // Format: (sub name [attrs...] ()(block ...)) - space between name and (), no space between () and block
521 if parts.len() >= 3 && parts[parts.len() - 2] == "()" {
522 let name_and_attrs = parts[0..parts.len() - 2].join(" ");
523 let proto = &parts[parts.len() - 2];
524 let body = &parts[parts.len() - 1];
525 format!("(sub {} {}{})", name_and_attrs, proto, body)
526 } else {
527 format!("(sub {})", parts.join(" "))
528 }
529 } else {
530 // Anonymous subroutine - tree-sitter format
531 let mut parts = Vec::new();
532
533 // Add attributes if present
534 if !attributes.is_empty() {
535 let attrs: Vec<String> = attributes
536 .iter()
537 .map(|_attr| "(attribute (attribute_name))".to_string())
538 .collect();
539 parts.push(format!("(attrlist {})", attrs.join("")));
540 }
541
542 // Add prototype if present
543 if let Some(proto) = prototype {
544 parts.push(proto.to_sexp());
545 }
546
547 // Add signature if present
548 if let Some(sig) = signature {
549 parts.push(sig.to_sexp());
550 }
551
552 // Add body
553 parts.push(body.to_sexp());
554
555 format!("(anonymous_subroutine_expression {})", parts.join(""))
556 }
557 }
558
559 NodeKind::Prototype { content: _ } => "(prototype)".to_string(),
560
561 NodeKind::Signature { parameters } => {
562 let params = parameters.iter().map(|p| p.to_sexp()).collect::<Vec<_>>().join(" ");
563 format!("(signature {})", params)
564 }
565
566 NodeKind::MandatoryParameter { variable } => {
567 format!("(mandatory_parameter {})", variable.to_sexp())
568 }
569
570 NodeKind::OptionalParameter { variable, default_value } => {
571 format!("(optional_parameter {} {})", variable.to_sexp(), default_value.to_sexp())
572 }
573
574 NodeKind::SlurpyParameter { variable } => {
575 format!("(slurpy_parameter {})", variable.to_sexp())
576 }
577
578 NodeKind::NamedParameter { variable } => {
579 format!("(named_parameter {})", variable.to_sexp())
580 }
581
582 NodeKind::Method { name: _, signature, attributes, body } => {
583 let block_contents = match &body.kind {
584 NodeKind::Block { statements } => {
585 statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ")
586 }
587 _ => body.to_sexp(),
588 };
589
590 let mut parts = vec!["(bareword)".to_string()];
591
592 // Add signature if present
593 if let Some(sig) = signature {
594 parts.push(sig.to_sexp());
595 }
596
597 // Add attributes if present
598 if !attributes.is_empty() {
599 let attrs: Vec<String> = attributes
600 .iter()
601 .map(|_attr| "(attribute (attribute_name))".to_string())
602 .collect();
603 parts.push(format!("(attrlist {})", attrs.join("")));
604 }
605
606 parts.push(format!("(block {})", block_contents));
607 format!("(method_declaration_statement {})", parts.join(" "))
608 }
609
610 NodeKind::Return { value } => {
611 if let Some(val) = value {
612 format!("(return {})", val.to_sexp())
613 } else {
614 "(return)".to_string()
615 }
616 }
617
618 NodeKind::LoopControl { op, label } => {
619 if let Some(l) = label {
620 format!("({} {})", op, l)
621 } else {
622 format!("({})", op)
623 }
624 }
625
626 NodeKind::Goto { target } => {
627 format!("(goto {})", target.to_sexp())
628 }
629
630 NodeKind::MethodCall { object, method, args } => {
631 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
632 format!("(method_call {} {} ({}))", object.to_sexp(), method, args_str)
633 }
634
635 NodeKind::FunctionCall { name, args } => {
636 // Special handling for functions that should use call format in tree-sitter tests
637 if matches!(
638 name.as_str(),
639 "bless"
640 | "shift"
641 | "unshift"
642 | "open"
643 | "die"
644 | "warn"
645 | "print"
646 | "printf"
647 | "say"
648 | "push"
649 | "pop"
650 | "map"
651 | "sort"
652 | "grep"
653 | "keys"
654 | "values"
655 | "each"
656 | "defined"
657 | "scalar"
658 | "ref"
659 ) {
660 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
661 if args.is_empty() {
662 format!("(call {} ())", name)
663 } else {
664 format!("(call {} ({}))", name, args_str)
665 }
666 } else {
667 // Tree-sitter format varies by context
668 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
669 if args.is_empty() {
670 "(function_call_expression (function))".to_string()
671 } else {
672 format!("(ambiguous_function_call_expression (function) {})", args_str)
673 }
674 }
675 }
676
677 NodeKind::IndirectCall { method, object, args } => {
678 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
679 format!("(indirect_call {} {} ({}))", method, object.to_sexp(), args_str)
680 }
681
682 NodeKind::Regex { pattern, replacement, modifiers, has_embedded_code } => {
683 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
684 format!("(regex {:?} {:?} {:?}{})", pattern, replacement, modifiers, risk_marker)
685 }
686
687 NodeKind::Match { expr, pattern, modifiers, has_embedded_code, negated } => {
688 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
689 let op = if *negated { "not_match" } else { "match" };
690 format!(
691 "({} {} (regex {:?} {:?}{}))",
692 op,
693 expr.to_sexp(),
694 pattern,
695 modifiers,
696 risk_marker
697 )
698 }
699
700 NodeKind::Substitution {
701 expr,
702 pattern,
703 replacement,
704 modifiers,
705 has_embedded_code,
706 negated,
707 } => {
708 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
709 let neg_marker = if *negated { " (negated)" } else { "" };
710 format!(
711 "(substitution {} {:?} {:?} {:?}{}{})",
712 expr.to_sexp(),
713 pattern,
714 replacement,
715 modifiers,
716 risk_marker,
717 neg_marker
718 )
719 }
720
721 NodeKind::Transliteration { expr, search, replace, modifiers, negated } => {
722 let neg_marker = if *negated { " (negated)" } else { "" };
723 format!(
724 "(transliteration {} {:?} {:?} {:?}{})",
725 expr.to_sexp(),
726 search,
727 replace,
728 modifiers,
729 neg_marker
730 )
731 }
732
733 NodeKind::Package { name, block, name_span: _ } => {
734 if let Some(blk) = block {
735 format!("(package {} {})", name, blk.to_sexp())
736 } else {
737 format!("(package {})", name)
738 }
739 }
740
741 NodeKind::Use { module, args, has_filter_risk } => {
742 let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
743 if args.is_empty() {
744 format!("(use {}{})", module, risk_marker)
745 } else {
746 let args_str = args.join(" ");
747 format!("(use {} ({}){})", module, args_str, risk_marker)
748 }
749 }
750
751 NodeKind::No { module, args, has_filter_risk } => {
752 let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
753 if args.is_empty() {
754 format!("(no {}{})", module, risk_marker)
755 } else {
756 let args_str = args.join(" ");
757 format!("(no {} ({}){})", module, args_str, risk_marker)
758 }
759 }
760
761 NodeKind::PhaseBlock { phase, phase_span: _, block } => {
762 format!("({} {})", phase, block.to_sexp())
763 }
764
765 NodeKind::DataSection { marker, body } => {
766 if let Some(body_text) = body {
767 format!("(data_section {} \"{}\")", marker, body_text.escape_default())
768 } else {
769 format!("(data_section {})", marker)
770 }
771 }
772
773 NodeKind::Class { name, parents, body } => {
774 if parents.is_empty() {
775 format!("(class {} {})", name, body.to_sexp())
776 } else {
777 format!("(class {} :isa({}) {})", name, parents.join(","), body.to_sexp())
778 }
779 }
780
781 NodeKind::Format { name, body } => {
782 format!("(format {} {:?})", name, body)
783 }
784
785 NodeKind::Identifier { name } => {
786 // Format expected by tests: (identifier name)
787 format!("(identifier {})", name)
788 }
789
790 NodeKind::Error { message, partial, .. } => {
791 if let Some(node) = partial {
792 format!("(ERROR \"{}\" {})", message.escape_default(), node.to_sexp())
793 } else {
794 format!("(ERROR \"{}\")", message.escape_default())
795 }
796 }
797 NodeKind::MissingExpression => "(missing_expression)".to_string(),
798 NodeKind::MissingStatement => "(missing_statement)".to_string(),
799 NodeKind::MissingIdentifier => "(missing_identifier)".to_string(),
800 NodeKind::MissingBlock => "(missing_block)".to_string(),
801 NodeKind::UnknownRest => "(UNKNOWN_REST)".to_string(),
802 }
803 }
804
805 /// Convert the AST to S-expression format that unwraps expression statements in programs
806 pub fn to_sexp_inner(&self) -> String {
807 match &self.kind {
808 NodeKind::ExpressionStatement { expression } => {
809 // Check if this is an anonymous subroutine - if so, keep it wrapped
810 match &expression.kind {
811 NodeKind::Subroutine { name, .. } if name.is_none() => {
812 // Anonymous subroutine should remain wrapped in expression statement
813 self.to_sexp()
814 }
815 _ => {
816 // In the inner format, other expression statements are unwrapped
817 expression.to_sexp()
818 }
819 }
820 }
821 _ => {
822 // For all other node types, use regular to_sexp
823 self.to_sexp()
824 }
825 }
826 }
827
828 /// Call a function on every direct child node of this node.
829 ///
830 /// This enables depth-first traversal for operations like heredoc content attachment.
831 /// The closure receives a mutable reference to each child node.
832 #[inline]
833 pub fn for_each_child_mut<F: FnMut(&mut Node)>(&mut self, mut f: F) {
834 match &mut self.kind {
835 NodeKind::Tie { variable, package, args } => {
836 f(variable);
837 f(package);
838 for arg in args {
839 f(arg);
840 }
841 }
842 NodeKind::Untie { variable } => f(variable),
843
844 // Root program node
845 NodeKind::Program { statements } => {
846 for stmt in statements {
847 f(stmt);
848 }
849 }
850
851 // Statement wrappers
852 NodeKind::ExpressionStatement { expression } => f(expression),
853
854 // Variable declarations
855 NodeKind::VariableDeclaration { variable, initializer, .. } => {
856 f(variable);
857 if let Some(init) = initializer {
858 f(init);
859 }
860 }
861 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
862 for var in variables {
863 f(var);
864 }
865 if let Some(init) = initializer {
866 f(init);
867 }
868 }
869 NodeKind::VariableWithAttributes { variable, .. } => f(variable),
870
871 // Binary operations
872 NodeKind::Binary { left, right, .. } => {
873 f(left);
874 f(right);
875 }
876 NodeKind::Ternary { condition, then_expr, else_expr } => {
877 f(condition);
878 f(then_expr);
879 f(else_expr);
880 }
881 NodeKind::Unary { operand, .. } => f(operand),
882 NodeKind::Assignment { lhs, rhs, .. } => {
883 f(lhs);
884 f(rhs);
885 }
886
887 // Control flow
888 NodeKind::Block { statements } => {
889 for stmt in statements {
890 f(stmt);
891 }
892 }
893 NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
894 f(condition);
895 f(then_branch);
896 for (elsif_cond, elsif_body) in elsif_branches {
897 f(elsif_cond);
898 f(elsif_body);
899 }
900 if let Some(else_body) = else_branch {
901 f(else_body);
902 }
903 }
904 NodeKind::While { condition, body, continue_block, .. } => {
905 f(condition);
906 f(body);
907 if let Some(cont) = continue_block {
908 f(cont);
909 }
910 }
911 NodeKind::For { init, condition, update, body, continue_block, .. } => {
912 if let Some(i) = init {
913 f(i);
914 }
915 if let Some(c) = condition {
916 f(c);
917 }
918 if let Some(u) = update {
919 f(u);
920 }
921 f(body);
922 if let Some(cont) = continue_block {
923 f(cont);
924 }
925 }
926 NodeKind::Foreach { variable, list, body, continue_block } => {
927 f(variable);
928 f(list);
929 f(body);
930 if let Some(cb) = continue_block {
931 f(cb);
932 }
933 }
934 NodeKind::Given { expr, body } => {
935 f(expr);
936 f(body);
937 }
938 NodeKind::When { condition, body } => {
939 f(condition);
940 f(body);
941 }
942 NodeKind::Default { body } => f(body),
943 NodeKind::StatementModifier { statement, condition, .. } => {
944 f(statement);
945 f(condition);
946 }
947 NodeKind::LabeledStatement { statement, .. } => f(statement),
948
949 // Eval and Do blocks
950 NodeKind::Eval { block } => f(block),
951 NodeKind::Do { block } => f(block),
952 NodeKind::Defer { block } => f(block),
953 NodeKind::Try { body, catch_blocks, finally_block } => {
954 f(body);
955 for (_, catch_body) in catch_blocks {
956 f(catch_body);
957 }
958 if let Some(finally) = finally_block {
959 f(finally);
960 }
961 }
962
963 // Function calls
964 NodeKind::FunctionCall { args, .. } => {
965 for arg in args {
966 f(arg);
967 }
968 }
969 NodeKind::MethodCall { object, args, .. } => {
970 f(object);
971 for arg in args {
972 f(arg);
973 }
974 }
975 NodeKind::IndirectCall { object, args, .. } => {
976 f(object);
977 for arg in args {
978 f(arg);
979 }
980 }
981
982 // Functions
983 NodeKind::Subroutine { prototype, signature, body, .. } => {
984 if let Some(proto) = prototype {
985 f(proto);
986 }
987 if let Some(sig) = signature {
988 f(sig);
989 }
990 f(body);
991 }
992 NodeKind::Method { signature, body, .. } => {
993 if let Some(sig) = signature {
994 f(sig);
995 }
996 f(body);
997 }
998 NodeKind::Return { value } => {
999 if let Some(v) = value {
1000 f(v);
1001 }
1002 }
1003 NodeKind::Goto { target } => f(target),
1004 NodeKind::Signature { parameters } => {
1005 for param in parameters {
1006 f(param);
1007 }
1008 }
1009 NodeKind::MandatoryParameter { variable } => f(variable),
1010 NodeKind::OptionalParameter { variable, default_value } => {
1011 f(variable);
1012 f(default_value);
1013 }
1014 NodeKind::SlurpyParameter { variable } => f(variable),
1015 NodeKind::NamedParameter { variable } => f(variable),
1016
1017 // Pattern matching
1018 NodeKind::Match { expr, .. } => f(expr),
1019 NodeKind::Substitution { expr, .. } => f(expr),
1020 NodeKind::Transliteration { expr, .. } => f(expr),
1021
1022 // Containers
1023 NodeKind::ArrayLiteral { elements } => {
1024 for elem in elements {
1025 f(elem);
1026 }
1027 }
1028 NodeKind::HashLiteral { pairs } => {
1029 for (key, value) in pairs {
1030 f(key);
1031 f(value);
1032 }
1033 }
1034
1035 // Package system
1036 NodeKind::Package { block, .. } => {
1037 if let Some(b) = block {
1038 f(b);
1039 }
1040 }
1041 NodeKind::PhaseBlock { block, .. } => f(block),
1042 NodeKind::Class { body, .. } => f(body),
1043
1044 // Error node might have a partial valid tree
1045 NodeKind::Error { partial, .. } => {
1046 if let Some(node) = partial {
1047 f(node);
1048 }
1049 }
1050
1051 // Leaf nodes (no children to traverse)
1052 NodeKind::Variable { .. }
1053 | NodeKind::Identifier { .. }
1054 | NodeKind::Number { .. }
1055 | NodeKind::String { .. }
1056 | NodeKind::Heredoc { .. }
1057 | NodeKind::Regex { .. }
1058 | NodeKind::Readline { .. }
1059 | NodeKind::Glob { .. }
1060 | NodeKind::Typeglob { .. }
1061 | NodeKind::Diamond
1062 | NodeKind::Ellipsis
1063 | NodeKind::Undef
1064 | NodeKind::Use { .. }
1065 | NodeKind::No { .. }
1066 | NodeKind::Prototype { .. }
1067 | NodeKind::DataSection { .. }
1068 | NodeKind::Format { .. }
1069 | NodeKind::LoopControl { .. }
1070 | NodeKind::MissingExpression
1071 | NodeKind::MissingStatement
1072 | NodeKind::MissingIdentifier
1073 | NodeKind::MissingBlock
1074 | NodeKind::UnknownRest => {}
1075 }
1076 }
1077
1078 /// Call a function on every direct child node of this node (immutable version).
1079 ///
1080 /// This enables depth-first traversal for read-only operations like AST analysis.
1081 /// The closure receives an immutable reference to each child node.
1082 #[inline]
1083 pub fn for_each_child<'a, F: FnMut(&'a Node)>(&'a self, mut f: F) {
1084 match &self.kind {
1085 NodeKind::Tie { variable, package, args } => {
1086 f(variable);
1087 f(package);
1088 for arg in args {
1089 f(arg);
1090 }
1091 }
1092 NodeKind::Untie { variable } => f(variable),
1093
1094 // Root program node
1095 NodeKind::Program { statements } => {
1096 for stmt in statements {
1097 f(stmt);
1098 }
1099 }
1100
1101 // Statement wrappers
1102 NodeKind::ExpressionStatement { expression } => f(expression),
1103
1104 // Variable declarations
1105 NodeKind::VariableDeclaration { variable, initializer, .. } => {
1106 f(variable);
1107 if let Some(init) = initializer {
1108 f(init);
1109 }
1110 }
1111 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
1112 for var in variables {
1113 f(var);
1114 }
1115 if let Some(init) = initializer {
1116 f(init);
1117 }
1118 }
1119 NodeKind::VariableWithAttributes { variable, .. } => f(variable),
1120
1121 // Binary operations
1122 NodeKind::Binary { left, right, .. } => {
1123 f(left);
1124 f(right);
1125 }
1126 NodeKind::Ternary { condition, then_expr, else_expr } => {
1127 f(condition);
1128 f(then_expr);
1129 f(else_expr);
1130 }
1131 NodeKind::Unary { operand, .. } => f(operand),
1132 NodeKind::Assignment { lhs, rhs, .. } => {
1133 f(lhs);
1134 f(rhs);
1135 }
1136
1137 // Control flow
1138 NodeKind::Block { statements } => {
1139 for stmt in statements {
1140 f(stmt);
1141 }
1142 }
1143 NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
1144 f(condition);
1145 f(then_branch);
1146 for (elsif_cond, elsif_body) in elsif_branches {
1147 f(elsif_cond);
1148 f(elsif_body);
1149 }
1150 if let Some(else_body) = else_branch {
1151 f(else_body);
1152 }
1153 }
1154 NodeKind::While { condition, body, continue_block, .. } => {
1155 f(condition);
1156 f(body);
1157 if let Some(cont) = continue_block {
1158 f(cont);
1159 }
1160 }
1161 NodeKind::For { init, condition, update, body, continue_block, .. } => {
1162 if let Some(i) = init {
1163 f(i);
1164 }
1165 if let Some(c) = condition {
1166 f(c);
1167 }
1168 if let Some(u) = update {
1169 f(u);
1170 }
1171 f(body);
1172 if let Some(cont) = continue_block {
1173 f(cont);
1174 }
1175 }
1176 NodeKind::Foreach { variable, list, body, continue_block } => {
1177 f(variable);
1178 f(list);
1179 f(body);
1180 if let Some(cb) = continue_block {
1181 f(cb);
1182 }
1183 }
1184 NodeKind::Given { expr, body } => {
1185 f(expr);
1186 f(body);
1187 }
1188 NodeKind::When { condition, body } => {
1189 f(condition);
1190 f(body);
1191 }
1192 NodeKind::Default { body } => f(body),
1193 NodeKind::StatementModifier { statement, condition, .. } => {
1194 f(statement);
1195 f(condition);
1196 }
1197 NodeKind::LabeledStatement { statement, .. } => f(statement),
1198
1199 // Eval and Do blocks
1200 NodeKind::Eval { block } => f(block),
1201 NodeKind::Do { block } => f(block),
1202 NodeKind::Defer { block } => f(block),
1203 NodeKind::Try { body, catch_blocks, finally_block } => {
1204 f(body);
1205 for (_, catch_body) in catch_blocks {
1206 f(catch_body);
1207 }
1208 if let Some(finally) = finally_block {
1209 f(finally);
1210 }
1211 }
1212
1213 // Function calls
1214 NodeKind::FunctionCall { args, .. } => {
1215 for arg in args {
1216 f(arg);
1217 }
1218 }
1219 NodeKind::MethodCall { object, args, .. } => {
1220 f(object);
1221 for arg in args {
1222 f(arg);
1223 }
1224 }
1225 NodeKind::IndirectCall { object, args, .. } => {
1226 f(object);
1227 for arg in args {
1228 f(arg);
1229 }
1230 }
1231
1232 // Functions
1233 NodeKind::Subroutine { prototype, signature, body, .. } => {
1234 if let Some(proto) = prototype {
1235 f(proto);
1236 }
1237 if let Some(sig) = signature {
1238 f(sig);
1239 }
1240 f(body);
1241 }
1242 NodeKind::Method { signature, body, .. } => {
1243 if let Some(sig) = signature {
1244 f(sig);
1245 }
1246 f(body);
1247 }
1248 NodeKind::Return { value } => {
1249 if let Some(v) = value {
1250 f(v);
1251 }
1252 }
1253 NodeKind::Goto { target } => f(target),
1254 NodeKind::Signature { parameters } => {
1255 for param in parameters {
1256 f(param);
1257 }
1258 }
1259 NodeKind::MandatoryParameter { variable } => f(variable),
1260 NodeKind::OptionalParameter { variable, default_value } => {
1261 f(variable);
1262 f(default_value);
1263 }
1264 NodeKind::SlurpyParameter { variable } => f(variable),
1265 NodeKind::NamedParameter { variable } => f(variable),
1266
1267 // Pattern matching
1268 NodeKind::Match { expr, .. } => f(expr),
1269 NodeKind::Substitution { expr, .. } => f(expr),
1270 NodeKind::Transliteration { expr, .. } => f(expr),
1271
1272 // Containers
1273 NodeKind::ArrayLiteral { elements } => {
1274 for elem in elements {
1275 f(elem);
1276 }
1277 }
1278 NodeKind::HashLiteral { pairs } => {
1279 for (key, value) in pairs {
1280 f(key);
1281 f(value);
1282 }
1283 }
1284
1285 // Package system
1286 NodeKind::Package { block, .. } => {
1287 if let Some(b) = block {
1288 f(b);
1289 }
1290 }
1291 NodeKind::PhaseBlock { block, .. } => f(block),
1292 NodeKind::Class { body, .. } => f(body),
1293
1294 // Error node might have a partial valid tree
1295 NodeKind::Error { partial, .. } => {
1296 if let Some(node) = partial {
1297 f(node);
1298 }
1299 }
1300
1301 // Leaf nodes (no children to traverse)
1302 NodeKind::Variable { .. }
1303 | NodeKind::Identifier { .. }
1304 | NodeKind::Number { .. }
1305 | NodeKind::String { .. }
1306 | NodeKind::Heredoc { .. }
1307 | NodeKind::Regex { .. }
1308 | NodeKind::Readline { .. }
1309 | NodeKind::Glob { .. }
1310 | NodeKind::Typeglob { .. }
1311 | NodeKind::Diamond
1312 | NodeKind::Ellipsis
1313 | NodeKind::Undef
1314 | NodeKind::Use { .. }
1315 | NodeKind::No { .. }
1316 | NodeKind::Prototype { .. }
1317 | NodeKind::DataSection { .. }
1318 | NodeKind::Format { .. }
1319 | NodeKind::LoopControl { .. }
1320 | NodeKind::MissingExpression
1321 | NodeKind::MissingStatement
1322 | NodeKind::MissingIdentifier
1323 | NodeKind::MissingBlock
1324 | NodeKind::UnknownRest => {}
1325 }
1326 }
1327
1328 /// Count the total number of nodes in this subtree (inclusive).
1329 ///
1330 /// # Examples
1331 ///
1332 /// ```
1333 /// use perl_ast::{Node, NodeKind, SourceLocation};
1334 ///
1335 /// let loc = SourceLocation { start: 0, end: 1 };
1336 /// let leaf = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1337 /// assert_eq!(leaf.count_nodes(), 1);
1338 ///
1339 /// let program = Node::new(
1340 /// NodeKind::Program { statements: vec![leaf] },
1341 /// loc,
1342 /// );
1343 /// assert_eq!(program.count_nodes(), 2);
1344 /// ```
1345 pub fn count_nodes(&self) -> usize {
1346 let mut count = 1;
1347 self.for_each_child(|child| {
1348 count += child.count_nodes();
1349 });
1350 count
1351 }
1352
1353 /// Collect direct child nodes into a vector for convenience APIs.
1354 ///
1355 /// # Examples
1356 ///
1357 /// ```
1358 /// use perl_ast::{Node, NodeKind, SourceLocation};
1359 ///
1360 /// let loc = SourceLocation { start: 0, end: 1 };
1361 /// let stmt = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1362 /// let program = Node::new(
1363 /// NodeKind::Program { statements: vec![stmt] },
1364 /// loc,
1365 /// );
1366 /// assert_eq!(program.children().len(), 1);
1367 /// ```
1368 #[inline]
1369 pub fn children(&self) -> Vec<&Node> {
1370 let mut children = Vec::new();
1371 self.for_each_child(|child| children.push(child));
1372 children
1373 }
1374
1375 /// Count direct child nodes without allocating an intermediate vector.
1376 ///
1377 /// This is more efficient than `children().len()` when callers only need
1378 /// cardinality.
1379 #[inline]
1380 pub fn child_count(&self) -> usize {
1381 let mut count = 0;
1382 self.for_each_child(|_| count += 1);
1383 count
1384 }
1385
1386 /// Get the first direct child node, if any.
1387 ///
1388 /// Optimized to avoid allocating the children vector.
1389 #[inline]
1390 pub fn first_child(&self) -> Option<&Node> {
1391 let mut result = None;
1392 self.for_each_child(|child| {
1393 if result.is_none() {
1394 result = Some(child);
1395 }
1396 });
1397 result
1398 }
1399
1400 /// Returns `true` when this node's source span contains `offset`.
1401 ///
1402 /// The start position is inclusive and the end position is exclusive.
1403 #[inline]
1404 pub fn contains_offset(&self, offset: usize) -> bool {
1405 self.location.start <= offset && offset < self.location.end
1406 }
1407
1408 /// Find the most specific node whose source span contains `offset`.
1409 ///
1410 /// Returns `None` when `offset` is outside this node. Otherwise, returns this
1411 /// node or the deepest descendant whose span contains the offset. This is useful
1412 /// for LSP features that need to map a cursor byte offset to the smallest AST
1413 /// construct at that position.
1414 ///
1415 /// The same half-open span semantics as [`Node::contains_offset`] apply: start
1416 /// positions are inclusive and end positions are exclusive.
1417 ///
1418 /// # Examples
1419 ///
1420 /// ```
1421 /// use perl_ast::{Node, NodeKind, SourceLocation};
1422 ///
1423 /// let left = Node::new(
1424 /// NodeKind::Identifier { name: "left".to_string() },
1425 /// SourceLocation { start: 0, end: 4 },
1426 /// );
1427 /// let right = Node::new(
1428 /// NodeKind::Number { value: "1".to_string() },
1429 /// SourceLocation { start: 7, end: 8 },
1430 /// );
1431 /// let expr = Node::new(
1432 /// NodeKind::Binary {
1433 /// op: "+".to_string(),
1434 /// left: Box::new(left),
1435 /// right: Box::new(right),
1436 /// },
1437 /// SourceLocation { start: 0, end: 8 },
1438 /// );
1439 ///
1440 /// assert_eq!(
1441 /// expr.find_deepest_containing_offset(7).map(|node| node.kind.kind_name()),
1442 /// Some("Number"),
1443 /// );
1444 /// assert_eq!(expr.find_deepest_containing_offset(8), None);
1445 /// ```
1446 #[inline]
1447 pub fn find_deepest_containing_offset(&self, offset: usize) -> Option<&Node> {
1448 if !self.contains_offset(offset) {
1449 return None;
1450 }
1451
1452 let mut result = self;
1453 self.for_each_child(|child| {
1454 if let Some(descendant) = child.find_deepest_containing_offset(offset) {
1455 result = descendant;
1456 }
1457 });
1458 Some(result)
1459 }
1460
1461 /// Returns the byte length of this node's source span.
1462 ///
1463 /// Uses saturating subtraction so malformed spans never underflow.
1464 #[inline]
1465 pub fn span_len(&self) -> usize {
1466 self.location.end.saturating_sub(self.location.start)
1467 }
1468
1469 /// Get the last direct child node, if any.
1470 ///
1471 /// Optimized to avoid allocating the children vector.
1472 ///
1473 /// # Examples
1474 ///
1475 /// ```
1476 /// use perl_ast::{Node, NodeKind, SourceLocation};
1477 ///
1478 /// let loc = SourceLocation { start: 0, end: 1 };
1479 /// let first = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1480 /// let second = Node::new(NodeKind::Number { value: "2".to_string() }, loc);
1481 /// let program = Node::new(
1482 /// NodeKind::Program { statements: vec![first, second] },
1483 /// loc,
1484 /// );
1485 ///
1486 /// assert_eq!(program.last_child().map(|n| n.kind.kind_name()), Some("Number"));
1487 /// assert_eq!(Node::new(NodeKind::Block { statements: vec![] }, loc).last_child(), None);
1488 /// ```
1489 #[inline]
1490 pub fn last_child(&self) -> Option<&Node> {
1491 let mut result = None;
1492 self.for_each_child(|child| {
1493 result = Some(child);
1494 });
1495 result
1496 }
1497}
1498
1499/// Comprehensive enumeration of all Perl language constructs supported by the parser.
1500///
1501/// This enum represents every possible AST node type that can be parsed from Perl code
1502/// during the Parse → Index → Navigate → Complete → Analyze workflow. Each variant captures
1503/// the semantic meaning and structural relationships needed for complete script analysis
1504/// and transformation.
1505///
1506/// # LSP Workflow Integration
1507///
1508/// Node kinds are processed differently across workflow stages:
1509/// - **Parse**: All variants are produced by the parser
1510/// - **Index**: Symbol-bearing variants feed workspace indexing
1511/// - **Navigate**: Call and reference variants support navigation features
1512/// - **Complete**: Expression variants provide completion context
1513/// - **Analyze**: Semantic variants drive diagnostics and refactoring
1514///
1515/// # Examples
1516///
1517/// Pattern-match on node kinds to extract semantic information:
1518///
1519/// ```
1520/// use perl_ast::{Node, NodeKind, SourceLocation};
1521///
1522/// let loc = SourceLocation { start: 0, end: 5 };
1523/// let node = Node::new(
1524/// NodeKind::Variable { sigil: "$".to_string(), name: "foo".to_string() },
1525/// loc,
1526/// );
1527///
1528/// assert!(matches!(
1529/// &node.kind,
1530/// NodeKind::Variable { sigil, name } if sigil == "$" && name == "foo"
1531/// ));
1532/// ```
1533///
1534/// Use [`kind_name()`](NodeKind::kind_name) for debugging and diagnostics:
1535///
1536/// ```
1537/// use perl_ast::NodeKind;
1538///
1539/// let kind = NodeKind::Number { value: "99".to_string() };
1540/// assert_eq!(kind.kind_name(), "Number");
1541///
1542/// let kind = NodeKind::Variable { sigil: "@".to_string(), name: "list".to_string() };
1543/// assert_eq!(kind.kind_name(), "Variable");
1544/// ```
1545///
1546/// # Performance Considerations
1547///
1548/// The enum design optimizes for large codebases:
1549/// - Box pointers minimize stack usage for recursive structures
1550/// - Vector storage enables efficient bulk operations on child nodes
1551/// - Clone operations optimized for concurrent analysis workflows
1552/// - Pattern matching performance tuned for common Perl constructs
1553#[derive(Debug, Clone, PartialEq)]
1554pub enum NodeKind {
1555 /// Top-level program containing all statements in an Perl script
1556 ///
1557 /// This is the root node for any parsed Perl script content, containing all
1558 /// top-level statements found during the Parse stage of LSP workflow.
1559 Program {
1560 /// All top-level statements in the Perl script
1561 statements: Vec<Node>,
1562 },
1563
1564 /// Statement wrapper for expressions that appear at statement level
1565 ///
1566 /// Used during Analyze stage to distinguish between expressions used as
1567 /// statements versus expressions within other contexts during Perl parsing.
1568 ExpressionStatement {
1569 /// The expression being used as a statement
1570 expression: Box<Node>,
1571 },
1572
1573 /// Variable declaration with scope declarator in Perl script processing
1574 ///
1575 /// Represents declarations like `my $var`, `our $global`, `local $dynamic`, etc.
1576 /// Critical for Analyze stage symbol table construction during Perl parsing.
1577 VariableDeclaration {
1578 /// Scope declarator: "my", "our", "local", "state"
1579 declarator: String,
1580 /// The variable being declared
1581 variable: Box<Node>,
1582 /// Variable attributes (e.g., ":shared", ":locked")
1583 attributes: Vec<String>,
1584 /// Optional initializer expression
1585 initializer: Option<Box<Node>>,
1586 },
1587
1588 /// Multiple variable declaration in a single statement
1589 ///
1590 /// Handles constructs like `my ($x, $y) = @values` common in Perl script processing.
1591 /// Supports efficient bulk variable analysis during Navigate stage operations.
1592 VariableListDeclaration {
1593 /// Scope declarator for all variables in the list
1594 declarator: String,
1595 /// All variables being declared in the list
1596 variables: Vec<Node>,
1597 /// Attributes applied to the variable list
1598 attributes: Vec<String>,
1599 /// Optional initializer for the entire variable list
1600 initializer: Option<Box<Node>>,
1601 },
1602
1603 /// Perl variable reference (scalar, array, hash, etc.) in Perl parsing workflow
1604 Variable {
1605 /// Variable sigil indicating type: $, @, %, &, *
1606 sigil: String, // $, @, %, &, *
1607 /// Variable name without sigil
1608 name: String,
1609 },
1610
1611 /// Variable with additional attributes for enhanced LSP workflow
1612 VariableWithAttributes {
1613 /// The base variable node
1614 variable: Box<Node>,
1615 /// List of attribute names applied to the variable
1616 attributes: Vec<String>,
1617 },
1618
1619 /// Assignment operation for LSP data processing workflows
1620 Assignment {
1621 /// Left-hand side of assignment
1622 lhs: Box<Node>,
1623 /// Right-hand side of assignment
1624 rhs: Box<Node>,
1625 /// Assignment operator: =, +=, -=, etc.
1626 op: String, // =, +=, -=, etc.
1627 },
1628
1629 // Expressions
1630 /// Binary operation for Perl parsing workflow calculations
1631 Binary {
1632 /// Binary operator
1633 op: String,
1634 /// Left operand
1635 left: Box<Node>,
1636 /// Right operand
1637 right: Box<Node>,
1638 },
1639
1640 /// Ternary conditional expression for Perl parsing workflow logic
1641 Ternary {
1642 /// Condition to evaluate
1643 condition: Box<Node>,
1644 /// Expression when condition is true
1645 then_expr: Box<Node>,
1646 /// Expression when condition is false
1647 else_expr: Box<Node>,
1648 },
1649
1650 /// Unary operation for Perl parsing workflow
1651 Unary {
1652 /// Unary operator
1653 op: String,
1654 /// Operand to apply operator to
1655 operand: Box<Node>,
1656 },
1657
1658 // I/O operations
1659 /// Diamond operator for file input in Perl parsing workflow
1660 Diamond, // <>
1661
1662 /// Ellipsis operator for Perl parsing workflow
1663 Ellipsis, // ...
1664
1665 /// Undef value for Perl parsing workflow
1666 Undef, // undef
1667
1668 /// Readline operation for LSP file processing
1669 Readline {
1670 /// Optional filehandle: `<STDIN>`, `<$fh>`, etc.
1671 filehandle: Option<String>, // <STDIN>, <$fh>, etc.
1672 },
1673
1674 /// Glob pattern for LSP workspace file matching
1675 Glob {
1676 /// Pattern string for file matching
1677 pattern: String, // <*.txt>
1678 },
1679
1680 /// Typeglob expression: `*foo` or `*main::bar`
1681 ///
1682 /// Provides access to all symbol table entries for a given name.
1683 Typeglob {
1684 /// Name of the symbol (including package qualification)
1685 name: String,
1686 },
1687
1688 /// Numeric literal in Perl code (integer, float, hex, octal, binary)
1689 ///
1690 /// Represents all numeric literal forms: `42`, `3.14`, `0x1A`, `0o755`, `0b1010`.
1691 Number {
1692 /// String representation preserving original format
1693 value: String,
1694 },
1695
1696 /// String literal with optional interpolation
1697 ///
1698 /// Handles both single-quoted (`'literal'`) and double-quoted (`"$interpolated"`) strings.
1699 String {
1700 /// String content (after quote processing)
1701 value: String,
1702 /// Whether the string supports variable interpolation
1703 interpolated: bool,
1704 },
1705
1706 /// Heredoc string literal for multi-line content
1707 ///
1708 /// Supports all heredoc forms: `<<EOF`, `<<'EOF'`, `<<"EOF"`, `<<~EOF` (indented).
1709 Heredoc {
1710 /// Delimiter marking heredoc boundaries
1711 delimiter: String,
1712 /// Content between delimiters
1713 content: String,
1714 /// Whether content supports variable interpolation
1715 interpolated: bool,
1716 /// Whether leading whitespace is stripped (<<~ form)
1717 indented: bool,
1718 /// Whether this is a command execution heredoc (<<`EOF`)
1719 command: bool,
1720 /// Body span for breakpoint detection (populated by drain_pending_heredocs)
1721 body_span: Option<SourceLocation>,
1722 },
1723
1724 /// Array literal expression: `(1, 2, 3)` or `[1, 2, 3]`
1725 ArrayLiteral {
1726 /// Elements in the array
1727 elements: Vec<Node>,
1728 },
1729
1730 /// Hash literal expression: `(key => 'value')` or `{key => 'value'}`
1731 HashLiteral {
1732 /// Key-value pairs in the hash
1733 pairs: Vec<(Node, Node)>,
1734 },
1735
1736 /// Block of statements: `{ ... }`
1737 ///
1738 /// Used for control structures, subroutine bodies, and bare blocks.
1739 Block {
1740 /// Statements within the block
1741 statements: Vec<Node>,
1742 },
1743
1744 /// Eval block for exception handling: `eval { ... }`
1745 Eval {
1746 /// Block to evaluate with exception trapping
1747 block: Box<Node>,
1748 },
1749
1750 /// Do block for file inclusion or expression evaluation: `do { ... }` or `do "file"`
1751 Do {
1752 /// Block to execute or file expression
1753 block: Box<Node>,
1754 },
1755
1756 /// Defer block for deferred cleanup on scope exit (Perl 5.36+ experimental, stable in 5.40)
1757 Defer {
1758 /// Block to execute on scope exit
1759 block: Box<Node>,
1760 },
1761
1762 /// Try-catch-finally for modern exception handling (Syntax::Keyword::Try style)
1763 Try {
1764 /// Try block body
1765 body: Box<Node>,
1766 /// Catch blocks: (optional exception variable, handler block)
1767 catch_blocks: Vec<(Option<String>, Box<Node>)>,
1768 /// Optional finally block
1769 finally_block: Option<Box<Node>>,
1770 },
1771
1772 /// If-elsif-else conditional statement
1773 If {
1774 /// Condition expression
1775 condition: Box<Node>,
1776 /// Then branch block
1777 then_branch: Box<Node>,
1778 /// Elsif branches: (condition, block) pairs
1779 elsif_branches: Vec<(Box<Node>, Box<Node>)>,
1780 /// Optional else branch
1781 else_branch: Option<Box<Node>>,
1782 },
1783
1784 /// Statement with a label for loop control: `LABEL: while (...)`
1785 LabeledStatement {
1786 /// Label name (e.g., "OUTER", "LINE")
1787 label: String,
1788 /// Labeled statement (typically a loop)
1789 statement: Box<Node>,
1790 },
1791
1792 /// While loop: `while (condition) { ... }`
1793 While {
1794 /// Loop condition
1795 condition: Box<Node>,
1796 /// Loop body
1797 body: Box<Node>,
1798 /// Optional continue block
1799 continue_block: Option<Box<Node>>,
1800 },
1801
1802 /// Tie operation for binding variables to objects: `tie %hash, 'Package', @args`
1803 Tie {
1804 /// Variable being tied
1805 variable: Box<Node>,
1806 /// Class/package name to tie to
1807 package: Box<Node>,
1808 /// Arguments passed to TIE* method
1809 args: Vec<Node>,
1810 },
1811
1812 /// Untie operation for unbinding variables: `untie %hash`
1813 Untie {
1814 /// Variable being untied
1815 variable: Box<Node>,
1816 },
1817
1818 /// C-style for loop: `for (init; cond; update) { ... }`
1819 For {
1820 /// Initialization expression
1821 init: Option<Box<Node>>,
1822 /// Loop condition
1823 condition: Option<Box<Node>>,
1824 /// Update expression
1825 update: Option<Box<Node>>,
1826 /// Loop body
1827 body: Box<Node>,
1828 /// Optional continue block
1829 continue_block: Option<Box<Node>>,
1830 },
1831
1832 /// Foreach loop: `foreach my $item (@list) { ... }`
1833 Foreach {
1834 /// Iterator variable
1835 variable: Box<Node>,
1836 /// List to iterate
1837 list: Box<Node>,
1838 /// Loop body
1839 body: Box<Node>,
1840 /// Optional continue block
1841 continue_block: Option<Box<Node>>,
1842 },
1843
1844 /// Given statement for switch-like matching (Perl 5.10+)
1845 Given {
1846 /// Expression to match against
1847 expr: Box<Node>,
1848 /// Body containing when/default blocks
1849 body: Box<Node>,
1850 },
1851
1852 /// When clause in given/switch: `when ($pattern) { ... }`
1853 When {
1854 /// Pattern to match
1855 condition: Box<Node>,
1856 /// Handler block
1857 body: Box<Node>,
1858 },
1859
1860 /// Default clause in given/switch: `default { ... }`
1861 Default {
1862 /// Handler block for unmatched cases
1863 body: Box<Node>,
1864 },
1865
1866 /// Statement modifier syntax: `print "ok" if $condition`
1867 StatementModifier {
1868 /// Statement to conditionally execute
1869 statement: Box<Node>,
1870 /// Modifier keyword: if, unless, while, until, for, foreach
1871 modifier: String,
1872 /// Modifier condition
1873 condition: Box<Node>,
1874 },
1875
1876 // Functions
1877 /// Subroutine declaration (function) including name, prototype, signature and body.
1878 Subroutine {
1879 /// Name of the subroutine
1880 ///
1881 /// # Precise Navigation Support
1882 /// - Added name_span for exact LSP navigation
1883 /// - Enables precise go-to-definition and hover behavior
1884 /// - O(1) span lookup in workspace symbols
1885 ///
1886 /// ## Integration Points
1887 /// - Semantic token providers
1888 /// - Cross-reference generation
1889 /// - Symbol renaming
1890 name: Option<String>,
1891
1892 /// Source location span of the subroutine name
1893 ///
1894 /// ## Usage Notes
1895 /// - Always corresponds to the name field
1896 /// - Provides constant-time position information
1897 /// - Essential for precise editor interactions
1898 name_span: Option<SourceLocation>,
1899
1900 /// Optional prototype node (e.g. `($;@)`).
1901 prototype: Option<Box<Node>>,
1902 /// Optional signature node (Perl 5.20+ feature).
1903 signature: Option<Box<Node>>,
1904 /// Attributes attached to the subroutine (`:lvalue`, etc.).
1905 attributes: Vec<String>,
1906 /// The body block of the subroutine.
1907 body: Box<Node>,
1908 },
1909
1910 /// Subroutine prototype specification: `sub foo ($;@) { ... }`
1911 Prototype {
1912 /// Prototype string defining argument behavior
1913 content: String,
1914 },
1915
1916 /// Subroutine signature (Perl 5.20+): `sub foo ($x, $y = 0) { ... }`
1917 Signature {
1918 /// List of signature parameters
1919 parameters: Vec<Node>,
1920 },
1921
1922 /// Mandatory signature parameter: `$x` in `sub foo ($x) { }`
1923 MandatoryParameter {
1924 /// Variable being bound
1925 variable: Box<Node>,
1926 },
1927
1928 /// Optional signature parameter with default: `$y = 0` in `sub foo ($y = 0) { }`
1929 OptionalParameter {
1930 /// Variable being bound
1931 variable: Box<Node>,
1932 /// Default value expression
1933 default_value: Box<Node>,
1934 },
1935
1936 /// Slurpy parameter collecting remaining args: `@rest` or `%opts` in signature
1937 SlurpyParameter {
1938 /// Array or hash variable to receive remaining arguments
1939 variable: Box<Node>,
1940 },
1941
1942 /// Named parameter placeholder in signature (future Perl feature)
1943 NamedParameter {
1944 /// Variable for named parameter binding
1945 variable: Box<Node>,
1946 },
1947
1948 /// Method declaration (Perl 5.38+ with `use feature 'class'`)
1949 Method {
1950 /// Method name
1951 name: String,
1952 /// Optional signature
1953 signature: Option<Box<Node>>,
1954 /// Method attributes (e.g., `:lvalue`)
1955 attributes: Vec<String>,
1956 /// Method body
1957 body: Box<Node>,
1958 },
1959
1960 /// Return statement: `return;` or `return $value;`
1961 Return {
1962 /// Optional return value
1963 value: Option<Box<Node>>,
1964 },
1965
1966 /// Loop control statement: `next`, `last`, or `redo`
1967 LoopControl {
1968 /// Control keyword: "next", "last", or "redo"
1969 op: String,
1970 /// Optional label: `next LABEL`
1971 label: Option<String>,
1972 },
1973
1974 /// Goto statement: `goto LABEL`, `goto &sub`, or `goto $expr`
1975 Goto {
1976 /// The target of the goto (label identifier, sub reference, or expression)
1977 target: Box<Node>,
1978 },
1979
1980 /// Method call: `$obj->method(@args)` or `$obj->method`
1981 MethodCall {
1982 /// Object or class expression
1983 object: Box<Node>,
1984 /// Method name being called
1985 method: String,
1986 /// Method arguments
1987 args: Vec<Node>,
1988 },
1989
1990 /// Function call: `foo(@args)` or `foo()`
1991 FunctionCall {
1992 /// Function name (may be qualified: `Package::func`)
1993 name: String,
1994 /// Function arguments
1995 args: Vec<Node>,
1996 },
1997
1998 /// Indirect object call (legacy syntax): `new Class @args`
1999 IndirectCall {
2000 /// Method name
2001 method: String,
2002 /// Object or class
2003 object: Box<Node>,
2004 /// Arguments
2005 args: Vec<Node>,
2006 },
2007
2008 /// Regex literal: `/pattern/modifiers` or `qr/pattern/modifiers`
2009 Regex {
2010 /// Regular expression pattern
2011 pattern: String,
2012 /// Replacement string (for s/// when parsed as regex)
2013 replacement: Option<String>,
2014 /// Regex modifiers (i, m, s, x, g, etc.)
2015 modifiers: String,
2016 /// Whether the regex contains embedded code `(?{...})`
2017 has_embedded_code: bool,
2018 },
2019
2020 /// Match operation: `$str =~ /pattern/modifiers` or `$str !~ /pattern/modifiers`
2021 Match {
2022 /// Expression to match against
2023 expr: Box<Node>,
2024 /// Pattern to match
2025 pattern: String,
2026 /// Match modifiers
2027 modifiers: String,
2028 /// Whether the regex contains embedded code `(?{...})`
2029 has_embedded_code: bool,
2030 /// Whether the binding operator was `!~` (negated match)
2031 negated: bool,
2032 },
2033
2034 /// Substitution operation: `$str =~ s/pattern/replacement/modifiers`
2035 Substitution {
2036 /// Expression to substitute in
2037 expr: Box<Node>,
2038 /// Pattern to find
2039 pattern: String,
2040 /// Replacement string
2041 replacement: String,
2042 /// Substitution modifiers (g, e, r, etc.)
2043 modifiers: String,
2044 /// Whether the regex contains embedded code `(?{...})`
2045 has_embedded_code: bool,
2046 /// Whether the binding operator was `!~` (negated match)
2047 negated: bool,
2048 },
2049
2050 /// Transliteration operation: `$str =~ tr/search/replace/` or `y///`
2051 Transliteration {
2052 /// Expression to transliterate
2053 expr: Box<Node>,
2054 /// Characters to search for
2055 search: String,
2056 /// Replacement characters
2057 replace: String,
2058 /// Transliteration modifiers (c, d, s, r)
2059 modifiers: String,
2060 /// Whether the binding operator was `!~` (negated match)
2061 negated: bool,
2062 },
2063
2064 // Package system
2065 /// Package declaration (e.g. `package Foo;`) and optional inline block form.
2066 Package {
2067 /// Name of the package
2068 ///
2069 /// # Precise Navigation Support
2070 /// - Added name_span for exact LSP navigation
2071 /// - Enables precise go-to-definition and hover behavior
2072 /// - O(1) span lookup in workspace symbols
2073 ///
2074 /// ## Integration Points
2075 /// - Workspace indexing
2076 /// - Cross-module symbol resolution
2077 /// - Code action providers
2078 name: String,
2079
2080 /// Source location span of the package name
2081 ///
2082 /// ## Usage Notes
2083 /// - Always corresponds to the name field
2084 /// - Provides constant-time position information
2085 /// - Essential for precise editor interactions
2086 name_span: SourceLocation,
2087
2088 /// Optional inline block for `package Foo { ... }` declarations.
2089 block: Option<Box<Node>>,
2090 },
2091
2092 /// Use statement for module loading: `use Module qw(imports);`
2093 Use {
2094 /// Module name to load
2095 module: String,
2096 /// Import arguments (symbols to import)
2097 args: Vec<String>,
2098 /// Whether this module is a known source filter (security risk)
2099 has_filter_risk: bool,
2100 },
2101
2102 /// No statement for disabling features: `no strict;`
2103 No {
2104 /// Module/pragma name to disable
2105 module: String,
2106 /// Arguments for the no statement
2107 args: Vec<String>,
2108 /// Whether this module is a known source filter (security risk)
2109 has_filter_risk: bool,
2110 },
2111
2112 /// Phase block for compile/runtime hooks: `BEGIN`, `END`, `CHECK`, `INIT`, `UNITCHECK`
2113 PhaseBlock {
2114 /// Phase name: BEGIN, END, CHECK, INIT, UNITCHECK
2115 phase: String,
2116 /// Source location span of the phase block name for precise navigation
2117 phase_span: Option<SourceLocation>,
2118 /// Block to execute during the specified phase
2119 block: Box<Node>,
2120 },
2121
2122 /// Data section marker: `__DATA__` or `__END__`
2123 DataSection {
2124 /// Section marker (__DATA__ or __END__)
2125 marker: String,
2126 /// Content following the marker (if any)
2127 body: Option<String>,
2128 },
2129
2130 /// Class declaration (Perl 5.38+ with `use feature 'class'`)
2131 Class {
2132 /// Class name
2133 name: String,
2134 /// Parent class names from `:isa(Parent)` attributes
2135 parents: Vec<String>,
2136 /// Class body containing methods and attributes
2137 body: Box<Node>,
2138 },
2139
2140 /// Format declaration for legacy report generation
2141 Format {
2142 /// Format name (defaults to filehandle name)
2143 name: String,
2144 /// Format specification body
2145 body: String,
2146 },
2147
2148 /// Bare identifier (bareword or package-qualified name)
2149 Identifier {
2150 /// Identifier string
2151 name: String,
2152 },
2153
2154 /// Parse error placeholder with error message and recovery context
2155 Error {
2156 /// Error description
2157 message: String,
2158 /// Expected token types (if any)
2159 expected: Vec<TokenKind>,
2160 /// The token actually found (if any)
2161 found: Option<Token>,
2162 /// Partial AST node parsed before error (if any)
2163 partial: Option<Box<Node>>,
2164 },
2165
2166 /// Missing expression where one was expected
2167 MissingExpression,
2168 /// Missing statement where one was expected
2169 MissingStatement,
2170 /// Missing identifier where one was expected
2171 MissingIdentifier,
2172 /// Missing block where one was expected
2173 MissingBlock,
2174
2175 /// Lexer budget exceeded marker preserving partial parse results
2176 ///
2177 /// Used when recursion or token limits are hit to preserve already-parsed content.
2178 UnknownRest,
2179}
2180
2181impl NodeKind {
2182 /// Get the name of this `NodeKind` as a static string.
2183 ///
2184 /// Useful for diagnostics, logging, and human-readable AST dumps.
2185 ///
2186 /// # Examples
2187 ///
2188 /// ```
2189 /// use perl_ast::NodeKind;
2190 ///
2191 /// let kind = NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() };
2192 /// assert_eq!(kind.kind_name(), "Variable");
2193 ///
2194 /// let kind = NodeKind::Program { statements: vec![] };
2195 /// assert_eq!(kind.kind_name(), "Program");
2196 /// ```
2197 pub fn kind_name(&self) -> &'static str {
2198 match self {
2199 NodeKind::Program { .. } => "Program",
2200 NodeKind::ExpressionStatement { .. } => "ExpressionStatement",
2201 NodeKind::VariableDeclaration { .. } => "VariableDeclaration",
2202 NodeKind::VariableListDeclaration { .. } => "VariableListDeclaration",
2203 NodeKind::Variable { .. } => "Variable",
2204 NodeKind::VariableWithAttributes { .. } => "VariableWithAttributes",
2205 NodeKind::Assignment { .. } => "Assignment",
2206 NodeKind::Binary { .. } => "Binary",
2207 NodeKind::Ternary { .. } => "Ternary",
2208 NodeKind::Unary { .. } => "Unary",
2209 NodeKind::Diamond => "Diamond",
2210 NodeKind::Ellipsis => "Ellipsis",
2211 NodeKind::Undef => "Undef",
2212 NodeKind::Readline { .. } => "Readline",
2213 NodeKind::Glob { .. } => "Glob",
2214 NodeKind::Typeglob { .. } => "Typeglob",
2215 NodeKind::Number { .. } => "Number",
2216 NodeKind::String { .. } => "String",
2217 NodeKind::Heredoc { .. } => "Heredoc",
2218 NodeKind::ArrayLiteral { .. } => "ArrayLiteral",
2219 NodeKind::HashLiteral { .. } => "HashLiteral",
2220 NodeKind::Block { .. } => "Block",
2221 NodeKind::Eval { .. } => "Eval",
2222 NodeKind::Do { .. } => "Do",
2223 NodeKind::Defer { .. } => "Defer",
2224 NodeKind::Try { .. } => "Try",
2225 NodeKind::If { .. } => "If",
2226 NodeKind::LabeledStatement { .. } => "LabeledStatement",
2227 NodeKind::While { .. } => "While",
2228 NodeKind::Tie { .. } => "Tie",
2229 NodeKind::Untie { .. } => "Untie",
2230 NodeKind::For { .. } => "For",
2231 NodeKind::Foreach { .. } => "Foreach",
2232 NodeKind::Given { .. } => "Given",
2233 NodeKind::When { .. } => "When",
2234 NodeKind::Default { .. } => "Default",
2235 NodeKind::StatementModifier { .. } => "StatementModifier",
2236 NodeKind::Subroutine { .. } => "Subroutine",
2237 NodeKind::Prototype { .. } => "Prototype",
2238 NodeKind::Signature { .. } => "Signature",
2239 NodeKind::MandatoryParameter { .. } => "MandatoryParameter",
2240 NodeKind::OptionalParameter { .. } => "OptionalParameter",
2241 NodeKind::SlurpyParameter { .. } => "SlurpyParameter",
2242 NodeKind::NamedParameter { .. } => "NamedParameter",
2243 NodeKind::Method { .. } => "Method",
2244 NodeKind::Return { .. } => "Return",
2245 NodeKind::LoopControl { .. } => "LoopControl",
2246 NodeKind::Goto { .. } => "Goto",
2247 NodeKind::MethodCall { .. } => "MethodCall",
2248 NodeKind::FunctionCall { .. } => "FunctionCall",
2249 NodeKind::IndirectCall { .. } => "IndirectCall",
2250 NodeKind::Regex { .. } => "Regex",
2251 NodeKind::Match { .. } => "Match",
2252 NodeKind::Substitution { .. } => "Substitution",
2253 NodeKind::Transliteration { .. } => "Transliteration",
2254 NodeKind::Package { .. } => "Package",
2255 NodeKind::Use { .. } => "Use",
2256 NodeKind::No { .. } => "No",
2257 NodeKind::PhaseBlock { .. } => "PhaseBlock",
2258 NodeKind::DataSection { .. } => "DataSection",
2259 NodeKind::Class { .. } => "Class",
2260 NodeKind::Format { .. } => "Format",
2261 NodeKind::Identifier { .. } => "Identifier",
2262 NodeKind::Error { .. } => "Error",
2263 NodeKind::MissingExpression => "MissingExpression",
2264 NodeKind::MissingStatement => "MissingStatement",
2265 NodeKind::MissingIdentifier => "MissingIdentifier",
2266 NodeKind::MissingBlock => "MissingBlock",
2267 NodeKind::UnknownRest => "UnknownRest",
2268 }
2269 }
2270
2271 /// Canonical list of **all** `kind_name()` strings, in alphabetical order.
2272 ///
2273 /// Every consumer that needs the full set of NodeKind names should reference
2274 /// this constant instead of maintaining a hand-written copy.
2275 pub const ALL_KIND_NAMES: &[&'static str] = &[
2276 "ArrayLiteral",
2277 "Assignment",
2278 "Binary",
2279 "Block",
2280 "Class",
2281 "DataSection",
2282 "Default",
2283 "Defer",
2284 "Diamond",
2285 "Do",
2286 "Ellipsis",
2287 "Error",
2288 "Eval",
2289 "ExpressionStatement",
2290 "For",
2291 "Foreach",
2292 "Format",
2293 "FunctionCall",
2294 "Given",
2295 "Glob",
2296 "Goto",
2297 "HashLiteral",
2298 "Heredoc",
2299 "Identifier",
2300 "If",
2301 "IndirectCall",
2302 "LabeledStatement",
2303 "LoopControl",
2304 "MandatoryParameter",
2305 "Match",
2306 "Method",
2307 "MethodCall",
2308 "MissingBlock",
2309 "MissingExpression",
2310 "MissingIdentifier",
2311 "MissingStatement",
2312 "NamedParameter",
2313 "No",
2314 "Number",
2315 "OptionalParameter",
2316 "Package",
2317 "PhaseBlock",
2318 "Program",
2319 "Prototype",
2320 "Readline",
2321 "Regex",
2322 "Return",
2323 "Signature",
2324 "SlurpyParameter",
2325 "StatementModifier",
2326 "String",
2327 "Subroutine",
2328 "Substitution",
2329 "Ternary",
2330 "Tie",
2331 "Transliteration",
2332 "Try",
2333 "Typeglob",
2334 "Unary",
2335 "Undef",
2336 "UnknownRest",
2337 "Untie",
2338 "Use",
2339 "Variable",
2340 "VariableDeclaration",
2341 "VariableListDeclaration",
2342 "VariableWithAttributes",
2343 "When",
2344 "While",
2345 ];
2346
2347 /// Subset of `ALL_KIND_NAMES` that represent synthetic/recovery nodes.
2348 ///
2349 /// These kinds are only produced by `parse_with_recovery()` on malformed
2350 /// input and should not be expected in clean parses.
2351 pub const RECOVERY_KIND_NAMES: &[&'static str] = &[
2352 "Error",
2353 "MissingBlock",
2354 "MissingExpression",
2355 "MissingIdentifier",
2356 "MissingStatement",
2357 "UnknownRest",
2358 ];
2359}
2360
2361impl fmt::Display for NodeKind {
2362 /// Formats as the canonical `kind_name()` string.
2363 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2364 f.write_str(self.kind_name())
2365 }
2366}
2367
2368impl fmt::Display for Node {
2369 /// Formats as the tree-sitter compatible S-expression.
2370 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2371 f.write_str(&self.to_sexp())
2372 }
2373}
2374
2375/// Format unary operator for S-expression output
2376fn format_unary_operator(op: &str) -> String {
2377 match op {
2378 // Arithmetic unary operators
2379 "+" => "unary_+".to_string(),
2380 "-" => "unary_-".to_string(),
2381
2382 // Logical unary operators
2383 "!" => "unary_not".to_string(),
2384 "not" => "unary_not".to_string(),
2385
2386 // Bitwise complement
2387 "~" => "unary_complement".to_string(),
2388
2389 // Reference operator
2390 "\\" => "unary_ref".to_string(),
2391
2392 // Postfix operators
2393 "++" => "unary_++".to_string(),
2394 "--" => "unary_--".to_string(),
2395
2396 // File test operators
2397 "-f" => "unary_-f".to_string(),
2398 "-d" => "unary_-d".to_string(),
2399 "-e" => "unary_-e".to_string(),
2400 "-r" => "unary_-r".to_string(),
2401 "-w" => "unary_-w".to_string(),
2402 "-x" => "unary_-x".to_string(),
2403 "-o" => "unary_-o".to_string(),
2404 "-R" => "unary_-R".to_string(),
2405 "-W" => "unary_-W".to_string(),
2406 "-X" => "unary_-X".to_string(),
2407 "-O" => "unary_-O".to_string(),
2408 "-s" => "unary_-s".to_string(),
2409 "-p" => "unary_-p".to_string(),
2410 "-S" => "unary_-S".to_string(),
2411 "-b" => "unary_-b".to_string(),
2412 "-c" => "unary_-c".to_string(),
2413 "-t" => "unary_-t".to_string(),
2414 "-u" => "unary_-u".to_string(),
2415 "-g" => "unary_-g".to_string(),
2416 "-k" => "unary_-k".to_string(),
2417 "-T" => "unary_-T".to_string(),
2418 "-B" => "unary_-B".to_string(),
2419 "-M" => "unary_-M".to_string(),
2420 "-A" => "unary_-A".to_string(),
2421 "-C" => "unary_-C".to_string(),
2422 "-l" => "unary_-l".to_string(),
2423 "-z" => "unary_-z".to_string(),
2424
2425 // Postfix dereferencing
2426 "->@*" => "unary_->@*".to_string(),
2427 "->%*" => "unary_->%*".to_string(),
2428 "->$*" => "unary_->$*".to_string(),
2429 "->&*" => "unary_->&*".to_string(),
2430 "->**" => "unary_->**".to_string(),
2431
2432 // Defined operator
2433 "defined" => "unary_defined".to_string(),
2434
2435 // Default case for unknown operators
2436 _ => format!("unary_{}", op.replace(' ', "_")),
2437 }
2438}
2439
2440/// Format binary operator for S-expression output
2441fn format_binary_operator(op: &str) -> String {
2442 match op {
2443 // Arithmetic operators
2444 "+" => "binary_+".to_string(),
2445 "-" => "binary_-".to_string(),
2446 "*" => "binary_*".to_string(),
2447 "/" => "binary_/".to_string(),
2448 "%" => "binary_%".to_string(),
2449 "**" => "binary_**".to_string(),
2450
2451 // Comparison operators
2452 "==" => "binary_==".to_string(),
2453 "!=" => "binary_!=".to_string(),
2454 "<" => "binary_<".to_string(),
2455 ">" => "binary_>".to_string(),
2456 "<=" => "binary_<=".to_string(),
2457 ">=" => "binary_>=".to_string(),
2458 "<=>" => "binary_<=>".to_string(),
2459
2460 // String comparison
2461 "eq" => "binary_eq".to_string(),
2462 "ne" => "binary_ne".to_string(),
2463 "lt" => "binary_lt".to_string(),
2464 "le" => "binary_le".to_string(),
2465 "gt" => "binary_gt".to_string(),
2466 "ge" => "binary_ge".to_string(),
2467 "cmp" => "binary_cmp".to_string(),
2468
2469 // Logical operators
2470 "&&" => "binary_&&".to_string(),
2471 "||" => "binary_||".to_string(),
2472 "and" => "binary_and".to_string(),
2473 "or" => "binary_or".to_string(),
2474 "xor" => "binary_xor".to_string(),
2475
2476 // Bitwise operators
2477 "&" => "binary_&".to_string(),
2478 "|" => "binary_|".to_string(),
2479 "^" => "binary_^".to_string(),
2480 "<<" => "binary_<<".to_string(),
2481 ">>" => "binary_>>".to_string(),
2482
2483 // Pattern matching
2484 "=~" => "binary_=~".to_string(),
2485 "!~" => "binary_!~".to_string(),
2486
2487 // Smart match
2488 "~~" => "binary_~~".to_string(),
2489
2490 // String repetition
2491 "x" => "binary_x".to_string(),
2492
2493 // Concatenation
2494 "." => "binary_.".to_string(),
2495
2496 // Range operators
2497 ".." => "binary_..".to_string(),
2498 "..." => "binary_...".to_string(),
2499
2500 // Type checking
2501 "isa" => "binary_isa".to_string(),
2502
2503 // Assignment operators
2504 "=" => "binary_=".to_string(),
2505 "+=" => "binary_+=".to_string(),
2506 "-=" => "binary_-=".to_string(),
2507 "*=" => "binary_*=".to_string(),
2508 "/=" => "binary_/=".to_string(),
2509 "%=" => "binary_%=".to_string(),
2510 "**=" => "binary_**=".to_string(),
2511 ".=" => "binary_.=".to_string(),
2512 "&=" => "binary_&=".to_string(),
2513 "|=" => "binary_|=".to_string(),
2514 "^=" => "binary_^=".to_string(),
2515 "<<=" => "binary_<<=".to_string(),
2516 ">>=" => "binary_>>=".to_string(),
2517 "&&=" => "binary_&&=".to_string(),
2518 "||=" => "binary_||=".to_string(),
2519 "//=" => "binary_//=".to_string(),
2520
2521 // Defined-or operator
2522 "//" => "binary_//".to_string(),
2523
2524 // Method calls and dereferencing
2525 "->" => "binary_->".to_string(),
2526
2527 // Hash/array access
2528 "{}" => "binary_{}".to_string(),
2529 "[]" => "binary_[]".to_string(),
2530
2531 // Arrow hash/array dereference
2532 "->{}" => "arrow_hash_deref".to_string(),
2533 "->[]" => "arrow_array_deref".to_string(),
2534
2535 // Default case for unknown operators
2536 _ => format!("binary_{}", op.replace(' ', "_")),
2537 }
2538}
2539
2540// SourceLocation is now provided by perl-position-tracking crate
2541// See the re-export at the top of this file
2542
2543#[cfg(test)]
2544mod tests {
2545 use super::*;
2546 use std::collections::BTreeSet;
2547
2548 /// Build a dummy instance for every `NodeKind` variant and return its
2549 /// `kind_name()`. This ensures the compiler forces us to update here
2550 /// whenever a variant is added/removed.
2551 fn all_kind_names_from_variants() -> BTreeSet<&'static str> {
2552 let loc = SourceLocation { start: 0, end: 0 };
2553 let dummy_node = || Node::new(NodeKind::Undef, loc);
2554
2555 let variants: Vec<NodeKind> = vec![
2556 NodeKind::Program { statements: vec![] },
2557 NodeKind::ExpressionStatement { expression: Box::new(dummy_node()) },
2558 NodeKind::VariableDeclaration {
2559 declarator: String::new(),
2560 variable: Box::new(dummy_node()),
2561 attributes: vec![],
2562 initializer: None,
2563 },
2564 NodeKind::VariableListDeclaration {
2565 declarator: String::new(),
2566 variables: vec![],
2567 attributes: vec![],
2568 initializer: None,
2569 },
2570 NodeKind::Variable { sigil: String::new(), name: String::new() },
2571 NodeKind::VariableWithAttributes {
2572 variable: Box::new(dummy_node()),
2573 attributes: vec![],
2574 },
2575 NodeKind::Assignment {
2576 lhs: Box::new(dummy_node()),
2577 rhs: Box::new(dummy_node()),
2578 op: String::new(),
2579 },
2580 NodeKind::Binary {
2581 op: String::new(),
2582 left: Box::new(dummy_node()),
2583 right: Box::new(dummy_node()),
2584 },
2585 NodeKind::Ternary {
2586 condition: Box::new(dummy_node()),
2587 then_expr: Box::new(dummy_node()),
2588 else_expr: Box::new(dummy_node()),
2589 },
2590 NodeKind::Unary { op: String::new(), operand: Box::new(dummy_node()) },
2591 NodeKind::Diamond,
2592 NodeKind::Ellipsis,
2593 NodeKind::Undef,
2594 NodeKind::Readline { filehandle: None },
2595 NodeKind::Glob { pattern: String::new() },
2596 NodeKind::Typeglob { name: String::new() },
2597 NodeKind::Number { value: String::new() },
2598 NodeKind::String { value: String::new(), interpolated: false },
2599 NodeKind::Heredoc {
2600 delimiter: String::new(),
2601 content: String::new(),
2602 interpolated: false,
2603 indented: false,
2604 command: false,
2605 body_span: None,
2606 },
2607 NodeKind::ArrayLiteral { elements: vec![] },
2608 NodeKind::HashLiteral { pairs: vec![] },
2609 NodeKind::Block { statements: vec![] },
2610 NodeKind::Eval { block: Box::new(dummy_node()) },
2611 NodeKind::Do { block: Box::new(dummy_node()) },
2612 NodeKind::Defer { block: Box::new(dummy_node()) },
2613 NodeKind::Try {
2614 body: Box::new(dummy_node()),
2615 catch_blocks: vec![],
2616 finally_block: None,
2617 },
2618 NodeKind::If {
2619 condition: Box::new(dummy_node()),
2620 then_branch: Box::new(dummy_node()),
2621 elsif_branches: vec![],
2622 else_branch: None,
2623 },
2624 NodeKind::LabeledStatement { label: String::new(), statement: Box::new(dummy_node()) },
2625 NodeKind::While {
2626 condition: Box::new(dummy_node()),
2627 body: Box::new(dummy_node()),
2628 continue_block: None,
2629 },
2630 NodeKind::Tie {
2631 variable: Box::new(dummy_node()),
2632 package: Box::new(dummy_node()),
2633 args: vec![],
2634 },
2635 NodeKind::Untie { variable: Box::new(dummy_node()) },
2636 NodeKind::For {
2637 init: None,
2638 condition: None,
2639 update: None,
2640 body: Box::new(dummy_node()),
2641 continue_block: None,
2642 },
2643 NodeKind::Foreach {
2644 variable: Box::new(dummy_node()),
2645 list: Box::new(dummy_node()),
2646 body: Box::new(dummy_node()),
2647 continue_block: None,
2648 },
2649 NodeKind::Given { expr: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2650 NodeKind::When { condition: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2651 NodeKind::Default { body: Box::new(dummy_node()) },
2652 NodeKind::StatementModifier {
2653 statement: Box::new(dummy_node()),
2654 modifier: String::new(),
2655 condition: Box::new(dummy_node()),
2656 },
2657 NodeKind::Subroutine {
2658 name: None,
2659 name_span: None,
2660 prototype: None,
2661 signature: None,
2662 attributes: vec![],
2663 body: Box::new(dummy_node()),
2664 },
2665 NodeKind::Prototype { content: String::new() },
2666 NodeKind::Signature { parameters: vec![] },
2667 NodeKind::MandatoryParameter { variable: Box::new(dummy_node()) },
2668 NodeKind::OptionalParameter {
2669 variable: Box::new(dummy_node()),
2670 default_value: Box::new(dummy_node()),
2671 },
2672 NodeKind::SlurpyParameter { variable: Box::new(dummy_node()) },
2673 NodeKind::NamedParameter { variable: Box::new(dummy_node()) },
2674 NodeKind::Method {
2675 name: String::new(),
2676 signature: None,
2677 attributes: vec![],
2678 body: Box::new(dummy_node()),
2679 },
2680 NodeKind::Return { value: None },
2681 NodeKind::LoopControl { op: String::new(), label: None },
2682 NodeKind::Goto { target: Box::new(dummy_node()) },
2683 NodeKind::MethodCall {
2684 object: Box::new(dummy_node()),
2685 method: String::new(),
2686 args: vec![],
2687 },
2688 NodeKind::FunctionCall { name: String::new(), args: vec![] },
2689 NodeKind::IndirectCall {
2690 method: String::new(),
2691 object: Box::new(dummy_node()),
2692 args: vec![],
2693 },
2694 NodeKind::Regex {
2695 pattern: String::new(),
2696 replacement: None,
2697 modifiers: String::new(),
2698 has_embedded_code: false,
2699 },
2700 NodeKind::Match {
2701 expr: Box::new(dummy_node()),
2702 pattern: String::new(),
2703 modifiers: String::new(),
2704 has_embedded_code: false,
2705 negated: false,
2706 },
2707 NodeKind::Substitution {
2708 expr: Box::new(dummy_node()),
2709 pattern: String::new(),
2710 replacement: String::new(),
2711 modifiers: String::new(),
2712 has_embedded_code: false,
2713 negated: false,
2714 },
2715 NodeKind::Transliteration {
2716 expr: Box::new(dummy_node()),
2717 search: String::new(),
2718 replace: String::new(),
2719 modifiers: String::new(),
2720 negated: false,
2721 },
2722 NodeKind::Package { name: String::new(), name_span: loc, block: None },
2723 NodeKind::Use { module: String::new(), args: vec![], has_filter_risk: false },
2724 NodeKind::No { module: String::new(), args: vec![], has_filter_risk: false },
2725 NodeKind::PhaseBlock {
2726 phase: String::new(),
2727 phase_span: None,
2728 block: Box::new(dummy_node()),
2729 },
2730 NodeKind::DataSection { marker: String::new(), body: None },
2731 NodeKind::Class { name: String::new(), parents: vec![], body: Box::new(dummy_node()) },
2732 NodeKind::Format { name: String::new(), body: String::new() },
2733 NodeKind::Identifier { name: String::new() },
2734 NodeKind::Error {
2735 message: String::new(),
2736 expected: vec![],
2737 found: None,
2738 partial: None,
2739 },
2740 NodeKind::MissingExpression,
2741 NodeKind::MissingStatement,
2742 NodeKind::MissingIdentifier,
2743 NodeKind::MissingBlock,
2744 NodeKind::UnknownRest,
2745 ];
2746
2747 variants.iter().map(|v| v.kind_name()).collect()
2748 }
2749
2750 #[test]
2751 fn all_kind_names_is_consistent_with_kind_name() {
2752 let from_enum = all_kind_names_from_variants();
2753 let from_const: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2754
2755 // Check for duplicates in the const array
2756 assert_eq!(
2757 NodeKind::ALL_KIND_NAMES.len(),
2758 from_const.len(),
2759 "ALL_KIND_NAMES contains duplicates"
2760 );
2761
2762 let only_in_enum: Vec<_> = from_enum.difference(&from_const).collect();
2763 let only_in_const: Vec<_> = from_const.difference(&from_enum).collect();
2764
2765 assert!(
2766 only_in_enum.is_empty() && only_in_const.is_empty(),
2767 "ALL_KIND_NAMES is out of sync with NodeKind variants:\n \
2768 in enum but not in ALL_KIND_NAMES: {only_in_enum:?}\n \
2769 in ALL_KIND_NAMES but not in enum: {only_in_const:?}"
2770 );
2771 }
2772
2773 #[test]
2774 fn recovery_kind_names_is_subset_of_all() {
2775 let all: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2776 let recovery: BTreeSet<&str> = NodeKind::RECOVERY_KIND_NAMES.iter().copied().collect();
2777
2778 // No duplicates
2779 assert_eq!(
2780 NodeKind::RECOVERY_KIND_NAMES.len(),
2781 recovery.len(),
2782 "RECOVERY_KIND_NAMES contains duplicates"
2783 );
2784
2785 let not_in_all: Vec<_> = recovery.difference(&all).collect();
2786 assert!(
2787 not_in_all.is_empty(),
2788 "RECOVERY_KIND_NAMES contains entries not in ALL_KIND_NAMES: {not_in_all:?}"
2789 );
2790 }
2791}