perl_ast/ast.rs
1//! Abstract Syntax Tree definitions for Perl within the parsing and LSP workflow.
2//!
3//! This module defines the comprehensive AST node types that represent parsed Perl code
4//! during the Parse → Index → Navigate → Complete → Analyze stages. The design is optimized
5//! for both direct use in Rust analysis and for generating tree-sitter compatible
6//! S-expressions during large workspace processing operations.
7//!
8//! # LSP Workflow Integration
9//!
10//! The AST structures support Perl tooling workflows by:
11//! - **Parse**: Produced by the parser as the canonical syntax tree
12//! - **Index**: Traversed to build symbol and reference tables
13//! - **Navigate**: Provides locations for definition and reference lookups
14//! - **Complete**: Supplies context for completion, hover, and signature help
15//! - **Analyze**: Feeds semantic analysis, diagnostics, and refactoring
16//!
17//! # Performance Characteristics
18//!
19//! AST structures are optimized for large codebases with:
20//! - Memory-efficient node representation using `Box<Node>` for recursive structures
21//! - Fast pattern matching via enum variants for common Perl constructs
22//! - Location tracking for precise error reporting in large files
23//! - Cheap cloning for parallel analysis tasks
24//!
25//! # Usage Examples
26//!
27//! ## Basic AST Construction
28//!
29//! ```rust
30//! use perl_ast::{Node, NodeKind, SourceLocation};
31//!
32//! // Create a simple variable declaration node
33//! let location = SourceLocation { start: 0, end: 10 };
34//! let node = Node::new(
35//! NodeKind::VariableDeclaration {
36//! declarator: "my".to_string(),
37//! variable: Box::new(Node::new(
38//! NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
39//! location,
40//! )),
41//! attributes: vec![],
42//! initializer: None,
43//! },
44//! location,
45//! );
46//! assert_eq!(node.kind.kind_name(), "VariableDeclaration");
47//! ```
48//!
49//! ## Tree-sitter S-expression Generation
50//!
51//! ```rust
52//! use perl_ast::{Node, NodeKind, SourceLocation};
53//!
54//! let loc = SourceLocation { start: 0, end: 2 };
55//! let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
56//! let program = Node::new(NodeKind::Program { statements: vec![num] }, loc);
57//!
58//! let sexp = program.to_sexp();
59//! assert!(sexp.starts_with("(source_file"));
60//! ```
61//!
62//! ## AST Traversal and Analysis
63//!
64//! ```rust
65//! use perl_ast::{Node, NodeKind, SourceLocation};
66//!
67//! fn count_variables(node: &Node) -> usize {
68//! let mut count = 0;
69//! match &node.kind {
70//! NodeKind::Variable { .. } => count += 1,
71//! NodeKind::Program { statements } => {
72//! for stmt in statements {
73//! count += count_variables(stmt);
74//! }
75//! }
76//! _ => {} // Handle other node types as needed
77//! }
78//! count
79//! }
80//!
81//! let loc = SourceLocation { start: 0, end: 5 };
82//! let var = Node::new(
83//! NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
84//! loc,
85//! );
86//! let program = Node::new(NodeKind::Program { statements: vec![var] }, loc);
87//! assert_eq!(count_variables(&program), 1);
88//! ```
89//!
90//! ## Parsing Integration
91//!
92//! In practice the AST is produced by the parser rather than built by hand
93//! (requires `perl-parser-core`):
94//!
95//! ```rust,ignore
96//! use perl_parser_core::Parser;
97//! use perl_ast::NodeKind;
98//!
99//! let mut parser = Parser::new("my $x = 42;");
100//! let ast = parser.parse().expect("should parse");
101//! assert!(matches!(ast.kind, NodeKind::Program { .. }));
102//! ```
103
104// Re-export SourceLocation from perl-position-tracking for unified span handling
105pub use perl_position_tracking::SourceLocation;
106// Re-export Token and TokenKind from perl-token for AST error nodes
107pub use perl_token::{Token, TokenKind};
108use std::fmt;
109
110/// Core AST node representing any Perl language construct within parsing workflows.
111///
112/// This is the fundamental building block for representing parsed Perl code. Each node
113/// contains both the semantic information (kind) and positional information (location)
114/// necessary for comprehensive script analysis.
115///
116/// # LSP Workflow Role
117///
118/// Nodes flow through tooling stages:
119/// - **Parse**: Created by the parser as it builds the syntax tree
120/// - **Index**: Visited to build symbol and reference tables
121/// - **Navigate**: Used to resolve definitions, references, and call hierarchy
122/// - **Complete**: Provides contextual information for completion and hover
123/// - **Analyze**: Drives semantic analysis and diagnostics
124///
125/// # Memory Optimization
126///
127/// The structure is designed for efficient memory usage during large-scale parsing:
128/// - `SourceLocation` uses compact position encoding for large files
129/// - `NodeKind` enum variants minimize memory overhead for common constructs
130/// - Clone operations are optimized for shared analysis workflows
131///
132/// # Examples
133///
134/// Construct a variable declaration node manually:
135///
136/// ```
137/// use perl_ast::{Node, NodeKind, SourceLocation};
138///
139/// let loc = SourceLocation { start: 0, end: 11 };
140/// let var = Node::new(
141/// NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() },
142/// loc,
143/// );
144/// let decl = Node::new(
145/// NodeKind::VariableDeclaration {
146/// declarator: "my".to_string(),
147/// variable: Box::new(var),
148/// attributes: vec![],
149/// initializer: None,
150/// },
151/// loc,
152/// );
153/// assert_eq!(decl.kind.kind_name(), "VariableDeclaration");
154/// ```
155///
156/// Typically you obtain nodes from the parser rather than constructing them by hand:
157///
158/// ```ignore
159/// use perl_parser::Parser;
160///
161/// let mut parser = Parser::new("my $x = 42;");
162/// let ast = parser.parse()?;
163/// println!("AST: {}", ast.to_sexp());
164/// ```
165#[derive(Debug, Clone, PartialEq)]
166pub struct Node {
167 /// The specific type and semantic content of this AST node
168 pub kind: NodeKind,
169 /// Source position information for error reporting and code navigation
170 pub location: SourceLocation,
171}
172
173impl Node {
174 /// Create a new AST node with the given kind and source location.
175 ///
176 /// # Examples
177 ///
178 /// ```
179 /// use perl_ast::{Node, NodeKind, SourceLocation};
180 ///
181 /// let node = Node::new(
182 /// NodeKind::Number { value: "42".to_string() },
183 /// SourceLocation { start: 0, end: 2 },
184 /// );
185 /// assert_eq!(node.kind.kind_name(), "Number");
186 /// assert_eq!(node.location.start, 0);
187 /// ```
188 pub fn new(kind: NodeKind, location: SourceLocation) -> Self {
189 Node { kind, location }
190 }
191
192 /// Convert the AST to a tree-sitter compatible S-expression.
193 ///
194 /// Produces a parenthesized representation compatible with tree-sitter's
195 /// S-expression format, useful for debugging and snapshot testing.
196 ///
197 /// # Examples
198 ///
199 /// ```
200 /// use perl_ast::{Node, NodeKind, SourceLocation};
201 ///
202 /// let loc = SourceLocation { start: 0, end: 2 };
203 /// let num = Node::new(NodeKind::Number { value: "42".to_string() }, loc);
204 /// let program = Node::new(
205 /// NodeKind::Program { statements: vec![num] },
206 /// loc,
207 /// );
208 /// let sexp = program.to_sexp();
209 /// assert!(sexp.starts_with("(source_file"));
210 /// ```
211 pub fn to_sexp(&self) -> String {
212 match &self.kind {
213 NodeKind::Program { statements } => {
214 let stmts =
215 statements.iter().map(|s| s.to_sexp_inner()).collect::<Vec<_>>().join(" ");
216 format!("(source_file {})", stmts)
217 }
218
219 NodeKind::ExpressionStatement { expression } => {
220 format!("(expression_statement {})", expression.to_sexp())
221 }
222
223 NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
224 let attrs_str = if attributes.is_empty() {
225 String::new()
226 } else {
227 format!(" (attributes {})", attributes.join(" "))
228 };
229 if let Some(init) = initializer {
230 format!(
231 "({}_declaration {}{}{})",
232 declarator,
233 variable.to_sexp(),
234 attrs_str,
235 init.to_sexp()
236 )
237 } else {
238 format!("({}_declaration {}{})", declarator, variable.to_sexp(), attrs_str)
239 }
240 }
241
242 NodeKind::VariableListDeclaration {
243 declarator,
244 variables,
245 attributes,
246 initializer,
247 } => {
248 let vars = variables.iter().map(|v| v.to_sexp()).collect::<Vec<_>>().join(" ");
249 let attrs_str = if attributes.is_empty() {
250 String::new()
251 } else {
252 format!(" (attributes {})", attributes.join(" "))
253 };
254 if let Some(init) = initializer {
255 format!(
256 "({}_declaration ({}){}{})",
257 declarator,
258 vars,
259 attrs_str,
260 init.to_sexp()
261 )
262 } else {
263 format!("({}_declaration ({}){})", declarator, vars, attrs_str)
264 }
265 }
266
267 NodeKind::Variable { sigil, name } => {
268 // Format expected by bless parsing tests: (variable $ name)
269 format!("(variable {} {})", sigil, name)
270 }
271
272 NodeKind::VariableWithAttributes { variable, attributes } => {
273 let attrs = attributes.join(" ");
274 format!("({} (attributes {}))", variable.to_sexp(), attrs)
275 }
276
277 NodeKind::Assignment { lhs, rhs, op } => {
278 format!(
279 "(assignment_{} {} {})",
280 op.replace("=", "assign"),
281 lhs.to_sexp(),
282 rhs.to_sexp()
283 )
284 }
285
286 NodeKind::Binary { op, left, right } => {
287 // Tree-sitter format: (binary_op left right)
288 let op_name = format_binary_operator(op);
289 format!("({} {} {})", op_name, left.to_sexp(), right.to_sexp())
290 }
291
292 NodeKind::Ternary { condition, then_expr, else_expr } => {
293 format!(
294 "(ternary {} {} {})",
295 condition.to_sexp(),
296 then_expr.to_sexp(),
297 else_expr.to_sexp()
298 )
299 }
300
301 NodeKind::Unary { op, operand } => {
302 // Tree-sitter format: (unary_op operand)
303 let op_name = format_unary_operator(op);
304 format!("({} {})", op_name, operand.to_sexp())
305 }
306
307 NodeKind::Diamond => "(diamond)".to_string(),
308
309 NodeKind::Ellipsis => "(ellipsis)".to_string(),
310
311 NodeKind::Undef => "(undef)".to_string(),
312
313 NodeKind::Readline { filehandle } => {
314 if let Some(fh) = filehandle {
315 format!("(readline {})", fh)
316 } else {
317 "(readline)".to_string()
318 }
319 }
320
321 NodeKind::Glob { pattern } => {
322 format!("(glob {})", pattern)
323 }
324 NodeKind::Typeglob { name } => {
325 format!("(typeglob {})", name)
326 }
327
328 NodeKind::Number { value } => {
329 // Format expected by bless parsing tests: (number value)
330 format!("(number {})", value)
331 }
332
333 NodeKind::String { value, interpolated } => {
334 // Escape quotes in string value to prevent S-expression parsing issues
335 let escaped_value = value.replace('\\', "\\\\").replace('"', "\\\"");
336
337 // Format based on interpolation status
338 if *interpolated {
339 format!("(string_interpolated \"{}\")", escaped_value)
340 } else {
341 format!("(string \"{}\")", escaped_value)
342 }
343 }
344
345 NodeKind::Heredoc { delimiter, content, interpolated, indented, command, .. } => {
346 let type_str = if *command {
347 "heredoc_command"
348 } else if *indented {
349 if *interpolated { "heredoc_indented_interpolated" } else { "heredoc_indented" }
350 } else if *interpolated {
351 "heredoc_interpolated"
352 } else {
353 "heredoc"
354 };
355 format!("({} {:?} {:?})", type_str, delimiter, content)
356 }
357
358 NodeKind::ArrayLiteral { elements } => {
359 let elems = elements.iter().map(|e| e.to_sexp()).collect::<Vec<_>>().join(" ");
360 format!("(array {})", elems)
361 }
362
363 NodeKind::HashLiteral { pairs } => {
364 let kvs = pairs
365 .iter()
366 .map(|(k, v)| format!("({} {})", k.to_sexp(), v.to_sexp()))
367 .collect::<Vec<_>>()
368 .join(" ");
369 format!("(hash {})", kvs)
370 }
371
372 NodeKind::Block { statements } => {
373 let stmts = statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ");
374 format!("(block {})", stmts)
375 }
376
377 NodeKind::Eval { block } => {
378 format!("(eval {})", block.to_sexp())
379 }
380
381 NodeKind::Do { block } => {
382 format!("(do {})", block.to_sexp())
383 }
384
385 NodeKind::Defer { block } => {
386 format!("(defer {})", block.to_sexp())
387 }
388
389 NodeKind::Try { body, catch_blocks, finally_block } => {
390 let mut parts = vec![format!("(try {})", body.to_sexp())];
391
392 for (var, block) in catch_blocks {
393 if let Some(v) = var {
394 parts.push(format!("(catch {} {})", v, block.to_sexp()));
395 } else {
396 parts.push(format!("(catch {})", block.to_sexp()));
397 }
398 }
399
400 if let Some(finally) = finally_block {
401 parts.push(format!("(finally {})", finally.to_sexp()));
402 }
403
404 parts.join(" ")
405 }
406
407 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
408 let mut parts =
409 vec![format!("(if {} {})", condition.to_sexp(), then_branch.to_sexp())];
410
411 for (cond, block) in elsif_branches {
412 parts.push(format!("(elsif {} {})", cond.to_sexp(), block.to_sexp()));
413 }
414
415 if let Some(else_block) = else_branch {
416 parts.push(format!("(else {})", else_block.to_sexp()));
417 }
418
419 parts.join(" ")
420 }
421
422 NodeKind::LabeledStatement { label, statement } => {
423 format!("(labeled_statement {} {})", label, statement.to_sexp())
424 }
425
426 NodeKind::While { condition, body, continue_block } => {
427 let mut s = format!("(while {} {})", condition.to_sexp(), body.to_sexp());
428 if let Some(cont) = continue_block {
429 s.push_str(&format!(" (continue {})", cont.to_sexp()));
430 }
431 s
432 }
433 NodeKind::Tie { variable, package, args } => {
434 let mut s = format!("(tie {} {}", variable.to_sexp(), package.to_sexp());
435 for arg in args {
436 s.push_str(&format!(" {}", arg.to_sexp()));
437 }
438 s.push(')');
439 s
440 }
441 NodeKind::Untie { variable } => {
442 format!("(untie {})", variable.to_sexp())
443 }
444 NodeKind::For { init, condition, update, body, continue_block } => {
445 let init_str =
446 init.as_ref().map(|i| i.to_sexp()).unwrap_or_else(|| "()".to_string());
447 let cond_str =
448 condition.as_ref().map(|c| c.to_sexp()).unwrap_or_else(|| "()".to_string());
449 let update_str =
450 update.as_ref().map(|u| u.to_sexp()).unwrap_or_else(|| "()".to_string());
451 let mut result =
452 format!("(for {} {} {} {})", init_str, cond_str, update_str, body.to_sexp());
453 if let Some(cont) = continue_block {
454 result.push_str(&format!(" (continue {})", cont.to_sexp()));
455 }
456 result
457 }
458
459 NodeKind::Foreach { variable, list, body, continue_block } => {
460 let cont = if let Some(cb) = continue_block {
461 format!(" {}", cb.to_sexp())
462 } else {
463 String::new()
464 };
465 format!(
466 "(foreach {} {} {}{})",
467 variable.to_sexp(),
468 list.to_sexp(),
469 body.to_sexp(),
470 cont
471 )
472 }
473
474 NodeKind::Given { expr, body } => {
475 format!("(given {} {})", expr.to_sexp(), body.to_sexp())
476 }
477
478 NodeKind::When { condition, body } => {
479 format!("(when {} {})", condition.to_sexp(), body.to_sexp())
480 }
481
482 NodeKind::Default { body } => {
483 format!("(default {})", body.to_sexp())
484 }
485
486 NodeKind::StatementModifier { statement, modifier, condition } => {
487 format!(
488 "(statement_modifier_{} {} {})",
489 modifier,
490 statement.to_sexp(),
491 condition.to_sexp()
492 )
493 }
494
495 NodeKind::Subroutine { name, prototype, signature, attributes, body, name_span: _ } => {
496 if let Some(sub_name) = name {
497 // Named subroutine - bless test expected format: (sub name () block)
498 let mut parts = vec![sub_name.clone()];
499
500 // Add attributes if present (before prototype/signature)
501 if !attributes.is_empty() {
502 for attr in attributes {
503 parts.push(format!(":{}", attr));
504 }
505 }
506
507 // Add prototype/signature - use () for empty prototype
508 if let Some(proto) = prototype {
509 parts.push(format!("({})", proto.to_sexp()));
510 } else if signature.is_some() {
511 // If there's a signature but no prototype, still show ()
512 parts.push("()".to_string());
513 } else {
514 parts.push("()".to_string());
515 }
516
517 // Add body
518 parts.push(body.to_sexp());
519
520 // Format: (sub name [attrs...] ()(block ...)) - space between name and (), no space between () and block
521 if parts.len() >= 3 && parts[parts.len() - 2] == "()" {
522 let name_and_attrs = parts[0..parts.len() - 2].join(" ");
523 let proto = &parts[parts.len() - 2];
524 let body = &parts[parts.len() - 1];
525 format!("(sub {} {}{})", name_and_attrs, proto, body)
526 } else {
527 format!("(sub {})", parts.join(" "))
528 }
529 } else {
530 // Anonymous subroutine - tree-sitter format
531 let mut parts = Vec::new();
532
533 // Add attributes if present
534 if !attributes.is_empty() {
535 let attrs: Vec<String> = attributes
536 .iter()
537 .map(|_attr| "(attribute (attribute_name))".to_string())
538 .collect();
539 parts.push(format!("(attrlist {})", attrs.join("")));
540 }
541
542 // Add prototype if present
543 if let Some(proto) = prototype {
544 parts.push(proto.to_sexp());
545 }
546
547 // Add signature if present
548 if let Some(sig) = signature {
549 parts.push(sig.to_sexp());
550 }
551
552 // Add body
553 parts.push(body.to_sexp());
554
555 format!("(anonymous_subroutine_expression {})", parts.join(""))
556 }
557 }
558
559 NodeKind::Prototype { content: _ } => "(prototype)".to_string(),
560
561 NodeKind::Signature { parameters } => {
562 let params = parameters.iter().map(|p| p.to_sexp()).collect::<Vec<_>>().join(" ");
563 format!("(signature {})", params)
564 }
565
566 NodeKind::MandatoryParameter { variable } => {
567 format!("(mandatory_parameter {})", variable.to_sexp())
568 }
569
570 NodeKind::OptionalParameter { variable, default_value } => {
571 format!("(optional_parameter {} {})", variable.to_sexp(), default_value.to_sexp())
572 }
573
574 NodeKind::SlurpyParameter { variable } => {
575 format!("(slurpy_parameter {})", variable.to_sexp())
576 }
577
578 NodeKind::NamedParameter { variable } => {
579 format!("(named_parameter {})", variable.to_sexp())
580 }
581
582 NodeKind::Method { name: _, signature, attributes, body } => {
583 let block_contents = match &body.kind {
584 NodeKind::Block { statements } => {
585 statements.iter().map(|s| s.to_sexp()).collect::<Vec<_>>().join(" ")
586 }
587 _ => body.to_sexp(),
588 };
589
590 let mut parts = vec!["(bareword)".to_string()];
591
592 // Add signature if present
593 if let Some(sig) = signature {
594 parts.push(sig.to_sexp());
595 }
596
597 // Add attributes if present
598 if !attributes.is_empty() {
599 let attrs: Vec<String> = attributes
600 .iter()
601 .map(|_attr| "(attribute (attribute_name))".to_string())
602 .collect();
603 parts.push(format!("(attrlist {})", attrs.join("")));
604 }
605
606 parts.push(format!("(block {})", block_contents));
607 format!("(method_declaration_statement {})", parts.join(" "))
608 }
609
610 NodeKind::Return { value } => {
611 if let Some(val) = value {
612 format!("(return {})", val.to_sexp())
613 } else {
614 "(return)".to_string()
615 }
616 }
617
618 NodeKind::LoopControl { op, label } => {
619 if let Some(l) = label {
620 format!("({} {})", op, l)
621 } else {
622 format!("({})", op)
623 }
624 }
625
626 NodeKind::Goto { target } => {
627 format!("(goto {})", target.to_sexp())
628 }
629
630 NodeKind::MethodCall { object, method, args } => {
631 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
632 format!("(method_call {} {} ({}))", object.to_sexp(), method, args_str)
633 }
634
635 NodeKind::FunctionCall { name, args } => {
636 // Special handling for functions that should use call format in tree-sitter tests
637 if matches!(
638 name.as_str(),
639 "bless"
640 | "shift"
641 | "unshift"
642 | "open"
643 | "die"
644 | "warn"
645 | "print"
646 | "printf"
647 | "say"
648 | "push"
649 | "pop"
650 | "map"
651 | "sort"
652 | "grep"
653 | "keys"
654 | "values"
655 | "each"
656 | "defined"
657 | "scalar"
658 | "ref"
659 ) {
660 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
661 if args.is_empty() {
662 format!("(call {} ())", name)
663 } else {
664 format!("(call {} ({}))", name, args_str)
665 }
666 } else {
667 // Tree-sitter format varies by context
668 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
669 if args.is_empty() {
670 "(function_call_expression (function))".to_string()
671 } else {
672 format!("(ambiguous_function_call_expression (function) {})", args_str)
673 }
674 }
675 }
676
677 NodeKind::IndirectCall { method, object, args } => {
678 let args_str = args.iter().map(|a| a.to_sexp()).collect::<Vec<_>>().join(" ");
679 format!("(indirect_call {} {} ({}))", method, object.to_sexp(), args_str)
680 }
681
682 NodeKind::Regex { pattern, replacement, modifiers, has_embedded_code } => {
683 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
684 format!("(regex {:?} {:?} {:?}{})", pattern, replacement, modifiers, risk_marker)
685 }
686
687 NodeKind::Match { expr, pattern, modifiers, has_embedded_code, negated } => {
688 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
689 let op = if *negated { "not_match" } else { "match" };
690 format!(
691 "({} {} (regex {:?} {:?}{}))",
692 op,
693 expr.to_sexp(),
694 pattern,
695 modifiers,
696 risk_marker
697 )
698 }
699
700 NodeKind::Substitution {
701 expr,
702 pattern,
703 replacement,
704 modifiers,
705 has_embedded_code,
706 negated,
707 } => {
708 let risk_marker = if *has_embedded_code { " (risk:code)" } else { "" };
709 let neg_marker = if *negated { " (negated)" } else { "" };
710 format!(
711 "(substitution {} {:?} {:?} {:?}{}{})",
712 expr.to_sexp(),
713 pattern,
714 replacement,
715 modifiers,
716 risk_marker,
717 neg_marker
718 )
719 }
720
721 NodeKind::Transliteration { expr, search, replace, modifiers, negated } => {
722 let neg_marker = if *negated { " (negated)" } else { "" };
723 format!(
724 "(transliteration {} {:?} {:?} {:?}{})",
725 expr.to_sexp(),
726 search,
727 replace,
728 modifiers,
729 neg_marker
730 )
731 }
732
733 NodeKind::Package { name, block, name_span: _ } => {
734 if let Some(blk) = block {
735 format!("(package {} {})", name, blk.to_sexp())
736 } else {
737 format!("(package {})", name)
738 }
739 }
740
741 NodeKind::Use { module, args, has_filter_risk } => {
742 let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
743 if args.is_empty() {
744 format!("(use {}{})", module, risk_marker)
745 } else {
746 let args_str = args.join(" ");
747 format!("(use {} ({}){})", module, args_str, risk_marker)
748 }
749 }
750
751 NodeKind::No { module, args, has_filter_risk } => {
752 let risk_marker = if *has_filter_risk { " (risk:filter)" } else { "" };
753 if args.is_empty() {
754 format!("(no {}{})", module, risk_marker)
755 } else {
756 let args_str = args.join(" ");
757 format!("(no {} ({}){})", module, args_str, risk_marker)
758 }
759 }
760
761 NodeKind::PhaseBlock { phase, phase_span: _, block } => {
762 format!("({} {})", phase, block.to_sexp())
763 }
764
765 NodeKind::DataSection { marker, body } => {
766 if let Some(body_text) = body {
767 format!("(data_section {} \"{}\")", marker, body_text.escape_default())
768 } else {
769 format!("(data_section {})", marker)
770 }
771 }
772
773 NodeKind::Class { name, parents, body } => {
774 if parents.is_empty() {
775 format!("(class {} {})", name, body.to_sexp())
776 } else {
777 format!("(class {} :isa({}) {})", name, parents.join(","), body.to_sexp())
778 }
779 }
780
781 NodeKind::Format { name, body } => {
782 format!("(format {} {:?})", name, body)
783 }
784
785 NodeKind::Identifier { name } => {
786 // Format expected by tests: (identifier name)
787 format!("(identifier {})", name)
788 }
789
790 NodeKind::Error { message, partial, .. } => {
791 if let Some(node) = partial {
792 format!("(ERROR \"{}\" {})", message.escape_default(), node.to_sexp())
793 } else {
794 format!("(ERROR \"{}\")", message.escape_default())
795 }
796 }
797 NodeKind::MissingExpression => "(missing_expression)".to_string(),
798 NodeKind::MissingStatement => "(missing_statement)".to_string(),
799 NodeKind::MissingIdentifier => "(missing_identifier)".to_string(),
800 NodeKind::MissingBlock => "(missing_block)".to_string(),
801 NodeKind::UnknownRest => "(UNKNOWN_REST)".to_string(),
802 }
803 }
804
805 /// Convert the AST to S-expression format that unwraps expression statements in programs
806 pub fn to_sexp_inner(&self) -> String {
807 match &self.kind {
808 NodeKind::ExpressionStatement { expression } => {
809 // Check if this is an anonymous subroutine - if so, keep it wrapped
810 match &expression.kind {
811 NodeKind::Subroutine { name, .. } if name.is_none() => {
812 // Anonymous subroutine should remain wrapped in expression statement
813 self.to_sexp()
814 }
815 _ => {
816 // In the inner format, other expression statements are unwrapped
817 expression.to_sexp()
818 }
819 }
820 }
821 _ => {
822 // For all other node types, use regular to_sexp
823 self.to_sexp()
824 }
825 }
826 }
827
828 /// Call a function on every direct child node of this node.
829 ///
830 /// This enables depth-first traversal for operations like heredoc content attachment.
831 /// The closure receives a mutable reference to each child node.
832 #[inline]
833 pub fn for_each_child_mut<F: FnMut(&mut Node)>(&mut self, mut f: F) {
834 match &mut self.kind {
835 NodeKind::Tie { variable, package, args } => {
836 f(variable);
837 f(package);
838 for arg in args {
839 f(arg);
840 }
841 }
842 NodeKind::Untie { variable } => f(variable),
843
844 // Root program node
845 NodeKind::Program { statements } => {
846 for stmt in statements {
847 f(stmt);
848 }
849 }
850
851 // Statement wrappers
852 NodeKind::ExpressionStatement { expression } => f(expression),
853
854 // Variable declarations
855 NodeKind::VariableDeclaration { variable, initializer, .. } => {
856 f(variable);
857 if let Some(init) = initializer {
858 f(init);
859 }
860 }
861 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
862 for var in variables {
863 f(var);
864 }
865 if let Some(init) = initializer {
866 f(init);
867 }
868 }
869 NodeKind::VariableWithAttributes { variable, .. } => f(variable),
870
871 // Binary operations
872 NodeKind::Binary { left, right, .. } => {
873 f(left);
874 f(right);
875 }
876 NodeKind::Ternary { condition, then_expr, else_expr } => {
877 f(condition);
878 f(then_expr);
879 f(else_expr);
880 }
881 NodeKind::Unary { operand, .. } => f(operand),
882 NodeKind::Assignment { lhs, rhs, .. } => {
883 f(lhs);
884 f(rhs);
885 }
886
887 // Control flow
888 NodeKind::Block { statements } => {
889 for stmt in statements {
890 f(stmt);
891 }
892 }
893 NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
894 f(condition);
895 f(then_branch);
896 for (elsif_cond, elsif_body) in elsif_branches {
897 f(elsif_cond);
898 f(elsif_body);
899 }
900 if let Some(else_body) = else_branch {
901 f(else_body);
902 }
903 }
904 NodeKind::While { condition, body, continue_block, .. } => {
905 f(condition);
906 f(body);
907 if let Some(cont) = continue_block {
908 f(cont);
909 }
910 }
911 NodeKind::For { init, condition, update, body, continue_block, .. } => {
912 if let Some(i) = init {
913 f(i);
914 }
915 if let Some(c) = condition {
916 f(c);
917 }
918 if let Some(u) = update {
919 f(u);
920 }
921 f(body);
922 if let Some(cont) = continue_block {
923 f(cont);
924 }
925 }
926 NodeKind::Foreach { variable, list, body, continue_block } => {
927 f(variable);
928 f(list);
929 f(body);
930 if let Some(cb) = continue_block {
931 f(cb);
932 }
933 }
934 NodeKind::Given { expr, body } => {
935 f(expr);
936 f(body);
937 }
938 NodeKind::When { condition, body } => {
939 f(condition);
940 f(body);
941 }
942 NodeKind::Default { body } => f(body),
943 NodeKind::StatementModifier { statement, condition, .. } => {
944 f(statement);
945 f(condition);
946 }
947 NodeKind::LabeledStatement { statement, .. } => f(statement),
948
949 // Eval and Do blocks
950 NodeKind::Eval { block } => f(block),
951 NodeKind::Do { block } => f(block),
952 NodeKind::Defer { block } => f(block),
953 NodeKind::Try { body, catch_blocks, finally_block } => {
954 f(body);
955 for (_, catch_body) in catch_blocks {
956 f(catch_body);
957 }
958 if let Some(finally) = finally_block {
959 f(finally);
960 }
961 }
962
963 // Function calls
964 NodeKind::FunctionCall { args, .. } => {
965 for arg in args {
966 f(arg);
967 }
968 }
969 NodeKind::MethodCall { object, args, .. } => {
970 f(object);
971 for arg in args {
972 f(arg);
973 }
974 }
975 NodeKind::IndirectCall { object, args, .. } => {
976 f(object);
977 for arg in args {
978 f(arg);
979 }
980 }
981
982 // Functions
983 NodeKind::Subroutine { prototype, signature, body, .. } => {
984 if let Some(proto) = prototype {
985 f(proto);
986 }
987 if let Some(sig) = signature {
988 f(sig);
989 }
990 f(body);
991 }
992 NodeKind::Method { signature, body, .. } => {
993 if let Some(sig) = signature {
994 f(sig);
995 }
996 f(body);
997 }
998 NodeKind::Return { value } => {
999 if let Some(v) = value {
1000 f(v);
1001 }
1002 }
1003 NodeKind::Goto { target } => f(target),
1004 NodeKind::Signature { parameters } => {
1005 for param in parameters {
1006 f(param);
1007 }
1008 }
1009 NodeKind::MandatoryParameter { variable } => f(variable),
1010 NodeKind::OptionalParameter { variable, default_value } => {
1011 f(variable);
1012 f(default_value);
1013 }
1014 NodeKind::SlurpyParameter { variable } => f(variable),
1015 NodeKind::NamedParameter { variable } => f(variable),
1016
1017 // Pattern matching
1018 NodeKind::Match { expr, .. } => f(expr),
1019 NodeKind::Substitution { expr, .. } => f(expr),
1020 NodeKind::Transliteration { expr, .. } => f(expr),
1021
1022 // Containers
1023 NodeKind::ArrayLiteral { elements } => {
1024 for elem in elements {
1025 f(elem);
1026 }
1027 }
1028 NodeKind::HashLiteral { pairs } => {
1029 for (key, value) in pairs {
1030 f(key);
1031 f(value);
1032 }
1033 }
1034
1035 // Package system
1036 NodeKind::Package { block, .. } => {
1037 if let Some(b) = block {
1038 f(b);
1039 }
1040 }
1041 NodeKind::PhaseBlock { block, .. } => f(block),
1042 NodeKind::Class { body, .. } => f(body),
1043
1044 // Error node might have a partial valid tree
1045 NodeKind::Error { partial, .. } => {
1046 if let Some(node) = partial {
1047 f(node);
1048 }
1049 }
1050
1051 // Leaf nodes (no children to traverse)
1052 NodeKind::Variable { .. }
1053 | NodeKind::Identifier { .. }
1054 | NodeKind::Number { .. }
1055 | NodeKind::String { .. }
1056 | NodeKind::Heredoc { .. }
1057 | NodeKind::Regex { .. }
1058 | NodeKind::Readline { .. }
1059 | NodeKind::Glob { .. }
1060 | NodeKind::Typeglob { .. }
1061 | NodeKind::Diamond
1062 | NodeKind::Ellipsis
1063 | NodeKind::Undef
1064 | NodeKind::Use { .. }
1065 | NodeKind::No { .. }
1066 | NodeKind::Prototype { .. }
1067 | NodeKind::DataSection { .. }
1068 | NodeKind::Format { .. }
1069 | NodeKind::LoopControl { .. }
1070 | NodeKind::MissingExpression
1071 | NodeKind::MissingStatement
1072 | NodeKind::MissingIdentifier
1073 | NodeKind::MissingBlock
1074 | NodeKind::UnknownRest => {}
1075 }
1076 }
1077
1078 /// Call a function on every direct child node of this node (immutable version).
1079 ///
1080 /// This enables depth-first traversal for read-only operations like AST analysis.
1081 /// The closure receives an immutable reference to each child node.
1082 #[inline]
1083 pub fn for_each_child<'a, F: FnMut(&'a Node)>(&'a self, mut f: F) {
1084 match &self.kind {
1085 NodeKind::Tie { variable, package, args } => {
1086 f(variable);
1087 f(package);
1088 for arg in args {
1089 f(arg);
1090 }
1091 }
1092 NodeKind::Untie { variable } => f(variable),
1093
1094 // Root program node
1095 NodeKind::Program { statements } => {
1096 for stmt in statements {
1097 f(stmt);
1098 }
1099 }
1100
1101 // Statement wrappers
1102 NodeKind::ExpressionStatement { expression } => f(expression),
1103
1104 // Variable declarations
1105 NodeKind::VariableDeclaration { variable, initializer, .. } => {
1106 f(variable);
1107 if let Some(init) = initializer {
1108 f(init);
1109 }
1110 }
1111 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
1112 for var in variables {
1113 f(var);
1114 }
1115 if let Some(init) = initializer {
1116 f(init);
1117 }
1118 }
1119 NodeKind::VariableWithAttributes { variable, .. } => f(variable),
1120
1121 // Binary operations
1122 NodeKind::Binary { left, right, .. } => {
1123 f(left);
1124 f(right);
1125 }
1126 NodeKind::Ternary { condition, then_expr, else_expr } => {
1127 f(condition);
1128 f(then_expr);
1129 f(else_expr);
1130 }
1131 NodeKind::Unary { operand, .. } => f(operand),
1132 NodeKind::Assignment { lhs, rhs, .. } => {
1133 f(lhs);
1134 f(rhs);
1135 }
1136
1137 // Control flow
1138 NodeKind::Block { statements } => {
1139 for stmt in statements {
1140 f(stmt);
1141 }
1142 }
1143 NodeKind::If { condition, then_branch, elsif_branches, else_branch, .. } => {
1144 f(condition);
1145 f(then_branch);
1146 for (elsif_cond, elsif_body) in elsif_branches {
1147 f(elsif_cond);
1148 f(elsif_body);
1149 }
1150 if let Some(else_body) = else_branch {
1151 f(else_body);
1152 }
1153 }
1154 NodeKind::While { condition, body, continue_block, .. } => {
1155 f(condition);
1156 f(body);
1157 if let Some(cont) = continue_block {
1158 f(cont);
1159 }
1160 }
1161 NodeKind::For { init, condition, update, body, continue_block, .. } => {
1162 if let Some(i) = init {
1163 f(i);
1164 }
1165 if let Some(c) = condition {
1166 f(c);
1167 }
1168 if let Some(u) = update {
1169 f(u);
1170 }
1171 f(body);
1172 if let Some(cont) = continue_block {
1173 f(cont);
1174 }
1175 }
1176 NodeKind::Foreach { variable, list, body, continue_block } => {
1177 f(variable);
1178 f(list);
1179 f(body);
1180 if let Some(cb) = continue_block {
1181 f(cb);
1182 }
1183 }
1184 NodeKind::Given { expr, body } => {
1185 f(expr);
1186 f(body);
1187 }
1188 NodeKind::When { condition, body } => {
1189 f(condition);
1190 f(body);
1191 }
1192 NodeKind::Default { body } => f(body),
1193 NodeKind::StatementModifier { statement, condition, .. } => {
1194 f(statement);
1195 f(condition);
1196 }
1197 NodeKind::LabeledStatement { statement, .. } => f(statement),
1198
1199 // Eval and Do blocks
1200 NodeKind::Eval { block } => f(block),
1201 NodeKind::Do { block } => f(block),
1202 NodeKind::Defer { block } => f(block),
1203 NodeKind::Try { body, catch_blocks, finally_block } => {
1204 f(body);
1205 for (_, catch_body) in catch_blocks {
1206 f(catch_body);
1207 }
1208 if let Some(finally) = finally_block {
1209 f(finally);
1210 }
1211 }
1212
1213 // Function calls
1214 NodeKind::FunctionCall { args, .. } => {
1215 for arg in args {
1216 f(arg);
1217 }
1218 }
1219 NodeKind::MethodCall { object, args, .. } => {
1220 f(object);
1221 for arg in args {
1222 f(arg);
1223 }
1224 }
1225 NodeKind::IndirectCall { object, args, .. } => {
1226 f(object);
1227 for arg in args {
1228 f(arg);
1229 }
1230 }
1231
1232 // Functions
1233 NodeKind::Subroutine { prototype, signature, body, .. } => {
1234 if let Some(proto) = prototype {
1235 f(proto);
1236 }
1237 if let Some(sig) = signature {
1238 f(sig);
1239 }
1240 f(body);
1241 }
1242 NodeKind::Method { signature, body, .. } => {
1243 if let Some(sig) = signature {
1244 f(sig);
1245 }
1246 f(body);
1247 }
1248 NodeKind::Return { value } => {
1249 if let Some(v) = value {
1250 f(v);
1251 }
1252 }
1253 NodeKind::Goto { target } => f(target),
1254 NodeKind::Signature { parameters } => {
1255 for param in parameters {
1256 f(param);
1257 }
1258 }
1259 NodeKind::MandatoryParameter { variable } => f(variable),
1260 NodeKind::OptionalParameter { variable, default_value } => {
1261 f(variable);
1262 f(default_value);
1263 }
1264 NodeKind::SlurpyParameter { variable } => f(variable),
1265 NodeKind::NamedParameter { variable } => f(variable),
1266
1267 // Pattern matching
1268 NodeKind::Match { expr, .. } => f(expr),
1269 NodeKind::Substitution { expr, .. } => f(expr),
1270 NodeKind::Transliteration { expr, .. } => f(expr),
1271
1272 // Containers
1273 NodeKind::ArrayLiteral { elements } => {
1274 for elem in elements {
1275 f(elem);
1276 }
1277 }
1278 NodeKind::HashLiteral { pairs } => {
1279 for (key, value) in pairs {
1280 f(key);
1281 f(value);
1282 }
1283 }
1284
1285 // Package system
1286 NodeKind::Package { block, .. } => {
1287 if let Some(b) = block {
1288 f(b);
1289 }
1290 }
1291 NodeKind::PhaseBlock { block, .. } => f(block),
1292 NodeKind::Class { body, .. } => f(body),
1293
1294 // Error node might have a partial valid tree
1295 NodeKind::Error { partial, .. } => {
1296 if let Some(node) = partial {
1297 f(node);
1298 }
1299 }
1300
1301 // Leaf nodes (no children to traverse)
1302 NodeKind::Variable { .. }
1303 | NodeKind::Identifier { .. }
1304 | NodeKind::Number { .. }
1305 | NodeKind::String { .. }
1306 | NodeKind::Heredoc { .. }
1307 | NodeKind::Regex { .. }
1308 | NodeKind::Readline { .. }
1309 | NodeKind::Glob { .. }
1310 | NodeKind::Typeglob { .. }
1311 | NodeKind::Diamond
1312 | NodeKind::Ellipsis
1313 | NodeKind::Undef
1314 | NodeKind::Use { .. }
1315 | NodeKind::No { .. }
1316 | NodeKind::Prototype { .. }
1317 | NodeKind::DataSection { .. }
1318 | NodeKind::Format { .. }
1319 | NodeKind::LoopControl { .. }
1320 | NodeKind::MissingExpression
1321 | NodeKind::MissingStatement
1322 | NodeKind::MissingIdentifier
1323 | NodeKind::MissingBlock
1324 | NodeKind::UnknownRest => {}
1325 }
1326 }
1327
1328 /// Count the total number of nodes in this subtree (inclusive).
1329 ///
1330 /// # Examples
1331 ///
1332 /// ```
1333 /// use perl_ast::{Node, NodeKind, SourceLocation};
1334 ///
1335 /// let loc = SourceLocation { start: 0, end: 1 };
1336 /// let leaf = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1337 /// assert_eq!(leaf.count_nodes(), 1);
1338 ///
1339 /// let program = Node::new(
1340 /// NodeKind::Program { statements: vec![leaf] },
1341 /// loc,
1342 /// );
1343 /// assert_eq!(program.count_nodes(), 2);
1344 /// ```
1345 pub fn count_nodes(&self) -> usize {
1346 let mut count = 1;
1347 self.for_each_child(|child| {
1348 count += child.count_nodes();
1349 });
1350 count
1351 }
1352
1353 /// Collect direct child nodes into a vector for convenience APIs.
1354 ///
1355 /// # Examples
1356 ///
1357 /// ```
1358 /// use perl_ast::{Node, NodeKind, SourceLocation};
1359 ///
1360 /// let loc = SourceLocation { start: 0, end: 1 };
1361 /// let stmt = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1362 /// let program = Node::new(
1363 /// NodeKind::Program { statements: vec![stmt] },
1364 /// loc,
1365 /// );
1366 /// assert_eq!(program.children().len(), 1);
1367 /// ```
1368 #[inline]
1369 pub fn children(&self) -> Vec<&Node> {
1370 let mut children = Vec::new();
1371 self.for_each_child(|child| children.push(child));
1372 children
1373 }
1374
1375 /// Count direct child nodes without allocating an intermediate vector.
1376 ///
1377 /// This is more efficient than `children().len()` when callers only need
1378 /// cardinality.
1379 #[inline]
1380 pub fn child_count(&self) -> usize {
1381 let mut count = 0;
1382 self.for_each_child(|_| count += 1);
1383 count
1384 }
1385
1386 /// Get the first direct child node, if any.
1387 ///
1388 /// Optimized to avoid allocating the children vector.
1389 #[inline]
1390 pub fn first_child(&self) -> Option<&Node> {
1391 let mut result = None;
1392 self.for_each_child(|child| {
1393 if result.is_none() {
1394 result = Some(child);
1395 }
1396 });
1397 result
1398 }
1399
1400 /// Returns `true` when this node's source span contains `offset`.
1401 ///
1402 /// The start position is inclusive and the end position is exclusive.
1403 #[inline]
1404 pub fn contains_offset(&self, offset: usize) -> bool {
1405 self.location.start <= offset && offset < self.location.end
1406 }
1407
1408 /// Returns the byte length of this node's source span.
1409 ///
1410 /// Uses saturating subtraction so malformed spans never underflow.
1411 #[inline]
1412 pub fn span_len(&self) -> usize {
1413 self.location.end.saturating_sub(self.location.start)
1414 }
1415
1416 /// Get the last direct child node, if any.
1417 ///
1418 /// Optimized to avoid allocating the children vector.
1419 ///
1420 /// # Examples
1421 ///
1422 /// ```
1423 /// use perl_ast::{Node, NodeKind, SourceLocation};
1424 ///
1425 /// let loc = SourceLocation { start: 0, end: 1 };
1426 /// let first = Node::new(NodeKind::Number { value: "1".to_string() }, loc);
1427 /// let second = Node::new(NodeKind::Number { value: "2".to_string() }, loc);
1428 /// let program = Node::new(
1429 /// NodeKind::Program { statements: vec![first, second] },
1430 /// loc,
1431 /// );
1432 ///
1433 /// assert_eq!(program.last_child().map(|n| n.kind.kind_name()), Some("Number"));
1434 /// assert_eq!(Node::new(NodeKind::Block { statements: vec![] }, loc).last_child(), None);
1435 /// ```
1436 #[inline]
1437 pub fn last_child(&self) -> Option<&Node> {
1438 let mut result = None;
1439 self.for_each_child(|child| {
1440 result = Some(child);
1441 });
1442 result
1443 }
1444}
1445
1446/// Comprehensive enumeration of all Perl language constructs supported by the parser.
1447///
1448/// This enum represents every possible AST node type that can be parsed from Perl code
1449/// during the Parse → Index → Navigate → Complete → Analyze workflow. Each variant captures
1450/// the semantic meaning and structural relationships needed for complete script analysis
1451/// and transformation.
1452///
1453/// # LSP Workflow Integration
1454///
1455/// Node kinds are processed differently across workflow stages:
1456/// - **Parse**: All variants are produced by the parser
1457/// - **Index**: Symbol-bearing variants feed workspace indexing
1458/// - **Navigate**: Call and reference variants support navigation features
1459/// - **Complete**: Expression variants provide completion context
1460/// - **Analyze**: Semantic variants drive diagnostics and refactoring
1461///
1462/// # Examples
1463///
1464/// Pattern-match on node kinds to extract semantic information:
1465///
1466/// ```
1467/// use perl_ast::{Node, NodeKind, SourceLocation};
1468///
1469/// let loc = SourceLocation { start: 0, end: 5 };
1470/// let node = Node::new(
1471/// NodeKind::Variable { sigil: "$".to_string(), name: "foo".to_string() },
1472/// loc,
1473/// );
1474///
1475/// assert!(matches!(
1476/// &node.kind,
1477/// NodeKind::Variable { sigil, name } if sigil == "$" && name == "foo"
1478/// ));
1479/// ```
1480///
1481/// Use [`kind_name()`](NodeKind::kind_name) for debugging and diagnostics:
1482///
1483/// ```
1484/// use perl_ast::NodeKind;
1485///
1486/// let kind = NodeKind::Number { value: "99".to_string() };
1487/// assert_eq!(kind.kind_name(), "Number");
1488///
1489/// let kind = NodeKind::Variable { sigil: "@".to_string(), name: "list".to_string() };
1490/// assert_eq!(kind.kind_name(), "Variable");
1491/// ```
1492///
1493/// # Performance Considerations
1494///
1495/// The enum design optimizes for large codebases:
1496/// - Box pointers minimize stack usage for recursive structures
1497/// - Vector storage enables efficient bulk operations on child nodes
1498/// - Clone operations optimized for concurrent analysis workflows
1499/// - Pattern matching performance tuned for common Perl constructs
1500#[derive(Debug, Clone, PartialEq)]
1501pub enum NodeKind {
1502 /// Top-level program containing all statements in an Perl script
1503 ///
1504 /// This is the root node for any parsed Perl script content, containing all
1505 /// top-level statements found during the Parse stage of LSP workflow.
1506 Program {
1507 /// All top-level statements in the Perl script
1508 statements: Vec<Node>,
1509 },
1510
1511 /// Statement wrapper for expressions that appear at statement level
1512 ///
1513 /// Used during Analyze stage to distinguish between expressions used as
1514 /// statements versus expressions within other contexts during Perl parsing.
1515 ExpressionStatement {
1516 /// The expression being used as a statement
1517 expression: Box<Node>,
1518 },
1519
1520 /// Variable declaration with scope declarator in Perl script processing
1521 ///
1522 /// Represents declarations like `my $var`, `our $global`, `local $dynamic`, etc.
1523 /// Critical for Analyze stage symbol table construction during Perl parsing.
1524 VariableDeclaration {
1525 /// Scope declarator: "my", "our", "local", "state"
1526 declarator: String,
1527 /// The variable being declared
1528 variable: Box<Node>,
1529 /// Variable attributes (e.g., ":shared", ":locked")
1530 attributes: Vec<String>,
1531 /// Optional initializer expression
1532 initializer: Option<Box<Node>>,
1533 },
1534
1535 /// Multiple variable declaration in a single statement
1536 ///
1537 /// Handles constructs like `my ($x, $y) = @values` common in Perl script processing.
1538 /// Supports efficient bulk variable analysis during Navigate stage operations.
1539 VariableListDeclaration {
1540 /// Scope declarator for all variables in the list
1541 declarator: String,
1542 /// All variables being declared in the list
1543 variables: Vec<Node>,
1544 /// Attributes applied to the variable list
1545 attributes: Vec<String>,
1546 /// Optional initializer for the entire variable list
1547 initializer: Option<Box<Node>>,
1548 },
1549
1550 /// Perl variable reference (scalar, array, hash, etc.) in Perl parsing workflow
1551 Variable {
1552 /// Variable sigil indicating type: $, @, %, &, *
1553 sigil: String, // $, @, %, &, *
1554 /// Variable name without sigil
1555 name: String,
1556 },
1557
1558 /// Variable with additional attributes for enhanced LSP workflow
1559 VariableWithAttributes {
1560 /// The base variable node
1561 variable: Box<Node>,
1562 /// List of attribute names applied to the variable
1563 attributes: Vec<String>,
1564 },
1565
1566 /// Assignment operation for LSP data processing workflows
1567 Assignment {
1568 /// Left-hand side of assignment
1569 lhs: Box<Node>,
1570 /// Right-hand side of assignment
1571 rhs: Box<Node>,
1572 /// Assignment operator: =, +=, -=, etc.
1573 op: String, // =, +=, -=, etc.
1574 },
1575
1576 // Expressions
1577 /// Binary operation for Perl parsing workflow calculations
1578 Binary {
1579 /// Binary operator
1580 op: String,
1581 /// Left operand
1582 left: Box<Node>,
1583 /// Right operand
1584 right: Box<Node>,
1585 },
1586
1587 /// Ternary conditional expression for Perl parsing workflow logic
1588 Ternary {
1589 /// Condition to evaluate
1590 condition: Box<Node>,
1591 /// Expression when condition is true
1592 then_expr: Box<Node>,
1593 /// Expression when condition is false
1594 else_expr: Box<Node>,
1595 },
1596
1597 /// Unary operation for Perl parsing workflow
1598 Unary {
1599 /// Unary operator
1600 op: String,
1601 /// Operand to apply operator to
1602 operand: Box<Node>,
1603 },
1604
1605 // I/O operations
1606 /// Diamond operator for file input in Perl parsing workflow
1607 Diamond, // <>
1608
1609 /// Ellipsis operator for Perl parsing workflow
1610 Ellipsis, // ...
1611
1612 /// Undef value for Perl parsing workflow
1613 Undef, // undef
1614
1615 /// Readline operation for LSP file processing
1616 Readline {
1617 /// Optional filehandle: `<STDIN>`, `<$fh>`, etc.
1618 filehandle: Option<String>, // <STDIN>, <$fh>, etc.
1619 },
1620
1621 /// Glob pattern for LSP workspace file matching
1622 Glob {
1623 /// Pattern string for file matching
1624 pattern: String, // <*.txt>
1625 },
1626
1627 /// Typeglob expression: `*foo` or `*main::bar`
1628 ///
1629 /// Provides access to all symbol table entries for a given name.
1630 Typeglob {
1631 /// Name of the symbol (including package qualification)
1632 name: String,
1633 },
1634
1635 /// Numeric literal in Perl code (integer, float, hex, octal, binary)
1636 ///
1637 /// Represents all numeric literal forms: `42`, `3.14`, `0x1A`, `0o755`, `0b1010`.
1638 Number {
1639 /// String representation preserving original format
1640 value: String,
1641 },
1642
1643 /// String literal with optional interpolation
1644 ///
1645 /// Handles both single-quoted (`'literal'`) and double-quoted (`"$interpolated"`) strings.
1646 String {
1647 /// String content (after quote processing)
1648 value: String,
1649 /// Whether the string supports variable interpolation
1650 interpolated: bool,
1651 },
1652
1653 /// Heredoc string literal for multi-line content
1654 ///
1655 /// Supports all heredoc forms: `<<EOF`, `<<'EOF'`, `<<"EOF"`, `<<~EOF` (indented).
1656 Heredoc {
1657 /// Delimiter marking heredoc boundaries
1658 delimiter: String,
1659 /// Content between delimiters
1660 content: String,
1661 /// Whether content supports variable interpolation
1662 interpolated: bool,
1663 /// Whether leading whitespace is stripped (<<~ form)
1664 indented: bool,
1665 /// Whether this is a command execution heredoc (<<`EOF`)
1666 command: bool,
1667 /// Body span for breakpoint detection (populated by drain_pending_heredocs)
1668 body_span: Option<SourceLocation>,
1669 },
1670
1671 /// Array literal expression: `(1, 2, 3)` or `[1, 2, 3]`
1672 ArrayLiteral {
1673 /// Elements in the array
1674 elements: Vec<Node>,
1675 },
1676
1677 /// Hash literal expression: `(key => 'value')` or `{key => 'value'}`
1678 HashLiteral {
1679 /// Key-value pairs in the hash
1680 pairs: Vec<(Node, Node)>,
1681 },
1682
1683 /// Block of statements: `{ ... }`
1684 ///
1685 /// Used for control structures, subroutine bodies, and bare blocks.
1686 Block {
1687 /// Statements within the block
1688 statements: Vec<Node>,
1689 },
1690
1691 /// Eval block for exception handling: `eval { ... }`
1692 Eval {
1693 /// Block to evaluate with exception trapping
1694 block: Box<Node>,
1695 },
1696
1697 /// Do block for file inclusion or expression evaluation: `do { ... }` or `do "file"`
1698 Do {
1699 /// Block to execute or file expression
1700 block: Box<Node>,
1701 },
1702
1703 /// Defer block for deferred cleanup on scope exit (Perl 5.36+ experimental, stable in 5.40)
1704 Defer {
1705 /// Block to execute on scope exit
1706 block: Box<Node>,
1707 },
1708
1709 /// Try-catch-finally for modern exception handling (Syntax::Keyword::Try style)
1710 Try {
1711 /// Try block body
1712 body: Box<Node>,
1713 /// Catch blocks: (optional exception variable, handler block)
1714 catch_blocks: Vec<(Option<String>, Box<Node>)>,
1715 /// Optional finally block
1716 finally_block: Option<Box<Node>>,
1717 },
1718
1719 /// If-elsif-else conditional statement
1720 If {
1721 /// Condition expression
1722 condition: Box<Node>,
1723 /// Then branch block
1724 then_branch: Box<Node>,
1725 /// Elsif branches: (condition, block) pairs
1726 elsif_branches: Vec<(Box<Node>, Box<Node>)>,
1727 /// Optional else branch
1728 else_branch: Option<Box<Node>>,
1729 },
1730
1731 /// Statement with a label for loop control: `LABEL: while (...)`
1732 LabeledStatement {
1733 /// Label name (e.g., "OUTER", "LINE")
1734 label: String,
1735 /// Labeled statement (typically a loop)
1736 statement: Box<Node>,
1737 },
1738
1739 /// While loop: `while (condition) { ... }`
1740 While {
1741 /// Loop condition
1742 condition: Box<Node>,
1743 /// Loop body
1744 body: Box<Node>,
1745 /// Optional continue block
1746 continue_block: Option<Box<Node>>,
1747 },
1748
1749 /// Tie operation for binding variables to objects: `tie %hash, 'Package', @args`
1750 Tie {
1751 /// Variable being tied
1752 variable: Box<Node>,
1753 /// Class/package name to tie to
1754 package: Box<Node>,
1755 /// Arguments passed to TIE* method
1756 args: Vec<Node>,
1757 },
1758
1759 /// Untie operation for unbinding variables: `untie %hash`
1760 Untie {
1761 /// Variable being untied
1762 variable: Box<Node>,
1763 },
1764
1765 /// C-style for loop: `for (init; cond; update) { ... }`
1766 For {
1767 /// Initialization expression
1768 init: Option<Box<Node>>,
1769 /// Loop condition
1770 condition: Option<Box<Node>>,
1771 /// Update expression
1772 update: Option<Box<Node>>,
1773 /// Loop body
1774 body: Box<Node>,
1775 /// Optional continue block
1776 continue_block: Option<Box<Node>>,
1777 },
1778
1779 /// Foreach loop: `foreach my $item (@list) { ... }`
1780 Foreach {
1781 /// Iterator variable
1782 variable: Box<Node>,
1783 /// List to iterate
1784 list: Box<Node>,
1785 /// Loop body
1786 body: Box<Node>,
1787 /// Optional continue block
1788 continue_block: Option<Box<Node>>,
1789 },
1790
1791 /// Given statement for switch-like matching (Perl 5.10+)
1792 Given {
1793 /// Expression to match against
1794 expr: Box<Node>,
1795 /// Body containing when/default blocks
1796 body: Box<Node>,
1797 },
1798
1799 /// When clause in given/switch: `when ($pattern) { ... }`
1800 When {
1801 /// Pattern to match
1802 condition: Box<Node>,
1803 /// Handler block
1804 body: Box<Node>,
1805 },
1806
1807 /// Default clause in given/switch: `default { ... }`
1808 Default {
1809 /// Handler block for unmatched cases
1810 body: Box<Node>,
1811 },
1812
1813 /// Statement modifier syntax: `print "ok" if $condition`
1814 StatementModifier {
1815 /// Statement to conditionally execute
1816 statement: Box<Node>,
1817 /// Modifier keyword: if, unless, while, until, for, foreach
1818 modifier: String,
1819 /// Modifier condition
1820 condition: Box<Node>,
1821 },
1822
1823 // Functions
1824 /// Subroutine declaration (function) including name, prototype, signature and body.
1825 Subroutine {
1826 /// Name of the subroutine
1827 ///
1828 /// # Precise Navigation Support
1829 /// - Added name_span for exact LSP navigation
1830 /// - Enables precise go-to-definition and hover behavior
1831 /// - O(1) span lookup in workspace symbols
1832 ///
1833 /// ## Integration Points
1834 /// - Semantic token providers
1835 /// - Cross-reference generation
1836 /// - Symbol renaming
1837 name: Option<String>,
1838
1839 /// Source location span of the subroutine name
1840 ///
1841 /// ## Usage Notes
1842 /// - Always corresponds to the name field
1843 /// - Provides constant-time position information
1844 /// - Essential for precise editor interactions
1845 name_span: Option<SourceLocation>,
1846
1847 /// Optional prototype node (e.g. `($;@)`).
1848 prototype: Option<Box<Node>>,
1849 /// Optional signature node (Perl 5.20+ feature).
1850 signature: Option<Box<Node>>,
1851 /// Attributes attached to the subroutine (`:lvalue`, etc.).
1852 attributes: Vec<String>,
1853 /// The body block of the subroutine.
1854 body: Box<Node>,
1855 },
1856
1857 /// Subroutine prototype specification: `sub foo ($;@) { ... }`
1858 Prototype {
1859 /// Prototype string defining argument behavior
1860 content: String,
1861 },
1862
1863 /// Subroutine signature (Perl 5.20+): `sub foo ($x, $y = 0) { ... }`
1864 Signature {
1865 /// List of signature parameters
1866 parameters: Vec<Node>,
1867 },
1868
1869 /// Mandatory signature parameter: `$x` in `sub foo ($x) { }`
1870 MandatoryParameter {
1871 /// Variable being bound
1872 variable: Box<Node>,
1873 },
1874
1875 /// Optional signature parameter with default: `$y = 0` in `sub foo ($y = 0) { }`
1876 OptionalParameter {
1877 /// Variable being bound
1878 variable: Box<Node>,
1879 /// Default value expression
1880 default_value: Box<Node>,
1881 },
1882
1883 /// Slurpy parameter collecting remaining args: `@rest` or `%opts` in signature
1884 SlurpyParameter {
1885 /// Array or hash variable to receive remaining arguments
1886 variable: Box<Node>,
1887 },
1888
1889 /// Named parameter placeholder in signature (future Perl feature)
1890 NamedParameter {
1891 /// Variable for named parameter binding
1892 variable: Box<Node>,
1893 },
1894
1895 /// Method declaration (Perl 5.38+ with `use feature 'class'`)
1896 Method {
1897 /// Method name
1898 name: String,
1899 /// Optional signature
1900 signature: Option<Box<Node>>,
1901 /// Method attributes (e.g., `:lvalue`)
1902 attributes: Vec<String>,
1903 /// Method body
1904 body: Box<Node>,
1905 },
1906
1907 /// Return statement: `return;` or `return $value;`
1908 Return {
1909 /// Optional return value
1910 value: Option<Box<Node>>,
1911 },
1912
1913 /// Loop control statement: `next`, `last`, or `redo`
1914 LoopControl {
1915 /// Control keyword: "next", "last", or "redo"
1916 op: String,
1917 /// Optional label: `next LABEL`
1918 label: Option<String>,
1919 },
1920
1921 /// Goto statement: `goto LABEL`, `goto &sub`, or `goto $expr`
1922 Goto {
1923 /// The target of the goto (label identifier, sub reference, or expression)
1924 target: Box<Node>,
1925 },
1926
1927 /// Method call: `$obj->method(@args)` or `$obj->method`
1928 MethodCall {
1929 /// Object or class expression
1930 object: Box<Node>,
1931 /// Method name being called
1932 method: String,
1933 /// Method arguments
1934 args: Vec<Node>,
1935 },
1936
1937 /// Function call: `foo(@args)` or `foo()`
1938 FunctionCall {
1939 /// Function name (may be qualified: `Package::func`)
1940 name: String,
1941 /// Function arguments
1942 args: Vec<Node>,
1943 },
1944
1945 /// Indirect object call (legacy syntax): `new Class @args`
1946 IndirectCall {
1947 /// Method name
1948 method: String,
1949 /// Object or class
1950 object: Box<Node>,
1951 /// Arguments
1952 args: Vec<Node>,
1953 },
1954
1955 /// Regex literal: `/pattern/modifiers` or `qr/pattern/modifiers`
1956 Regex {
1957 /// Regular expression pattern
1958 pattern: String,
1959 /// Replacement string (for s/// when parsed as regex)
1960 replacement: Option<String>,
1961 /// Regex modifiers (i, m, s, x, g, etc.)
1962 modifiers: String,
1963 /// Whether the regex contains embedded code `(?{...})`
1964 has_embedded_code: bool,
1965 },
1966
1967 /// Match operation: `$str =~ /pattern/modifiers` or `$str !~ /pattern/modifiers`
1968 Match {
1969 /// Expression to match against
1970 expr: Box<Node>,
1971 /// Pattern to match
1972 pattern: String,
1973 /// Match modifiers
1974 modifiers: String,
1975 /// Whether the regex contains embedded code `(?{...})`
1976 has_embedded_code: bool,
1977 /// Whether the binding operator was `!~` (negated match)
1978 negated: bool,
1979 },
1980
1981 /// Substitution operation: `$str =~ s/pattern/replacement/modifiers`
1982 Substitution {
1983 /// Expression to substitute in
1984 expr: Box<Node>,
1985 /// Pattern to find
1986 pattern: String,
1987 /// Replacement string
1988 replacement: String,
1989 /// Substitution modifiers (g, e, r, etc.)
1990 modifiers: String,
1991 /// Whether the regex contains embedded code `(?{...})`
1992 has_embedded_code: bool,
1993 /// Whether the binding operator was `!~` (negated match)
1994 negated: bool,
1995 },
1996
1997 /// Transliteration operation: `$str =~ tr/search/replace/` or `y///`
1998 Transliteration {
1999 /// Expression to transliterate
2000 expr: Box<Node>,
2001 /// Characters to search for
2002 search: String,
2003 /// Replacement characters
2004 replace: String,
2005 /// Transliteration modifiers (c, d, s, r)
2006 modifiers: String,
2007 /// Whether the binding operator was `!~` (negated match)
2008 negated: bool,
2009 },
2010
2011 // Package system
2012 /// Package declaration (e.g. `package Foo;`) and optional inline block form.
2013 Package {
2014 /// Name of the package
2015 ///
2016 /// # Precise Navigation Support
2017 /// - Added name_span for exact LSP navigation
2018 /// - Enables precise go-to-definition and hover behavior
2019 /// - O(1) span lookup in workspace symbols
2020 ///
2021 /// ## Integration Points
2022 /// - Workspace indexing
2023 /// - Cross-module symbol resolution
2024 /// - Code action providers
2025 name: String,
2026
2027 /// Source location span of the package name
2028 ///
2029 /// ## Usage Notes
2030 /// - Always corresponds to the name field
2031 /// - Provides constant-time position information
2032 /// - Essential for precise editor interactions
2033 name_span: SourceLocation,
2034
2035 /// Optional inline block for `package Foo { ... }` declarations.
2036 block: Option<Box<Node>>,
2037 },
2038
2039 /// Use statement for module loading: `use Module qw(imports);`
2040 Use {
2041 /// Module name to load
2042 module: String,
2043 /// Import arguments (symbols to import)
2044 args: Vec<String>,
2045 /// Whether this module is a known source filter (security risk)
2046 has_filter_risk: bool,
2047 },
2048
2049 /// No statement for disabling features: `no strict;`
2050 No {
2051 /// Module/pragma name to disable
2052 module: String,
2053 /// Arguments for the no statement
2054 args: Vec<String>,
2055 /// Whether this module is a known source filter (security risk)
2056 has_filter_risk: bool,
2057 },
2058
2059 /// Phase block for compile/runtime hooks: `BEGIN`, `END`, `CHECK`, `INIT`, `UNITCHECK`
2060 PhaseBlock {
2061 /// Phase name: BEGIN, END, CHECK, INIT, UNITCHECK
2062 phase: String,
2063 /// Source location span of the phase block name for precise navigation
2064 phase_span: Option<SourceLocation>,
2065 /// Block to execute during the specified phase
2066 block: Box<Node>,
2067 },
2068
2069 /// Data section marker: `__DATA__` or `__END__`
2070 DataSection {
2071 /// Section marker (__DATA__ or __END__)
2072 marker: String,
2073 /// Content following the marker (if any)
2074 body: Option<String>,
2075 },
2076
2077 /// Class declaration (Perl 5.38+ with `use feature 'class'`)
2078 Class {
2079 /// Class name
2080 name: String,
2081 /// Parent class names from `:isa(Parent)` attributes
2082 parents: Vec<String>,
2083 /// Class body containing methods and attributes
2084 body: Box<Node>,
2085 },
2086
2087 /// Format declaration for legacy report generation
2088 Format {
2089 /// Format name (defaults to filehandle name)
2090 name: String,
2091 /// Format specification body
2092 body: String,
2093 },
2094
2095 /// Bare identifier (bareword or package-qualified name)
2096 Identifier {
2097 /// Identifier string
2098 name: String,
2099 },
2100
2101 /// Parse error placeholder with error message and recovery context
2102 Error {
2103 /// Error description
2104 message: String,
2105 /// Expected token types (if any)
2106 expected: Vec<TokenKind>,
2107 /// The token actually found (if any)
2108 found: Option<Token>,
2109 /// Partial AST node parsed before error (if any)
2110 partial: Option<Box<Node>>,
2111 },
2112
2113 /// Missing expression where one was expected
2114 MissingExpression,
2115 /// Missing statement where one was expected
2116 MissingStatement,
2117 /// Missing identifier where one was expected
2118 MissingIdentifier,
2119 /// Missing block where one was expected
2120 MissingBlock,
2121
2122 /// Lexer budget exceeded marker preserving partial parse results
2123 ///
2124 /// Used when recursion or token limits are hit to preserve already-parsed content.
2125 UnknownRest,
2126}
2127
2128impl NodeKind {
2129 /// Get the name of this `NodeKind` as a static string.
2130 ///
2131 /// Useful for diagnostics, logging, and human-readable AST dumps.
2132 ///
2133 /// # Examples
2134 ///
2135 /// ```
2136 /// use perl_ast::NodeKind;
2137 ///
2138 /// let kind = NodeKind::Variable { sigil: "$".to_string(), name: "x".to_string() };
2139 /// assert_eq!(kind.kind_name(), "Variable");
2140 ///
2141 /// let kind = NodeKind::Program { statements: vec![] };
2142 /// assert_eq!(kind.kind_name(), "Program");
2143 /// ```
2144 pub fn kind_name(&self) -> &'static str {
2145 match self {
2146 NodeKind::Program { .. } => "Program",
2147 NodeKind::ExpressionStatement { .. } => "ExpressionStatement",
2148 NodeKind::VariableDeclaration { .. } => "VariableDeclaration",
2149 NodeKind::VariableListDeclaration { .. } => "VariableListDeclaration",
2150 NodeKind::Variable { .. } => "Variable",
2151 NodeKind::VariableWithAttributes { .. } => "VariableWithAttributes",
2152 NodeKind::Assignment { .. } => "Assignment",
2153 NodeKind::Binary { .. } => "Binary",
2154 NodeKind::Ternary { .. } => "Ternary",
2155 NodeKind::Unary { .. } => "Unary",
2156 NodeKind::Diamond => "Diamond",
2157 NodeKind::Ellipsis => "Ellipsis",
2158 NodeKind::Undef => "Undef",
2159 NodeKind::Readline { .. } => "Readline",
2160 NodeKind::Glob { .. } => "Glob",
2161 NodeKind::Typeglob { .. } => "Typeglob",
2162 NodeKind::Number { .. } => "Number",
2163 NodeKind::String { .. } => "String",
2164 NodeKind::Heredoc { .. } => "Heredoc",
2165 NodeKind::ArrayLiteral { .. } => "ArrayLiteral",
2166 NodeKind::HashLiteral { .. } => "HashLiteral",
2167 NodeKind::Block { .. } => "Block",
2168 NodeKind::Eval { .. } => "Eval",
2169 NodeKind::Do { .. } => "Do",
2170 NodeKind::Defer { .. } => "Defer",
2171 NodeKind::Try { .. } => "Try",
2172 NodeKind::If { .. } => "If",
2173 NodeKind::LabeledStatement { .. } => "LabeledStatement",
2174 NodeKind::While { .. } => "While",
2175 NodeKind::Tie { .. } => "Tie",
2176 NodeKind::Untie { .. } => "Untie",
2177 NodeKind::For { .. } => "For",
2178 NodeKind::Foreach { .. } => "Foreach",
2179 NodeKind::Given { .. } => "Given",
2180 NodeKind::When { .. } => "When",
2181 NodeKind::Default { .. } => "Default",
2182 NodeKind::StatementModifier { .. } => "StatementModifier",
2183 NodeKind::Subroutine { .. } => "Subroutine",
2184 NodeKind::Prototype { .. } => "Prototype",
2185 NodeKind::Signature { .. } => "Signature",
2186 NodeKind::MandatoryParameter { .. } => "MandatoryParameter",
2187 NodeKind::OptionalParameter { .. } => "OptionalParameter",
2188 NodeKind::SlurpyParameter { .. } => "SlurpyParameter",
2189 NodeKind::NamedParameter { .. } => "NamedParameter",
2190 NodeKind::Method { .. } => "Method",
2191 NodeKind::Return { .. } => "Return",
2192 NodeKind::LoopControl { .. } => "LoopControl",
2193 NodeKind::Goto { .. } => "Goto",
2194 NodeKind::MethodCall { .. } => "MethodCall",
2195 NodeKind::FunctionCall { .. } => "FunctionCall",
2196 NodeKind::IndirectCall { .. } => "IndirectCall",
2197 NodeKind::Regex { .. } => "Regex",
2198 NodeKind::Match { .. } => "Match",
2199 NodeKind::Substitution { .. } => "Substitution",
2200 NodeKind::Transliteration { .. } => "Transliteration",
2201 NodeKind::Package { .. } => "Package",
2202 NodeKind::Use { .. } => "Use",
2203 NodeKind::No { .. } => "No",
2204 NodeKind::PhaseBlock { .. } => "PhaseBlock",
2205 NodeKind::DataSection { .. } => "DataSection",
2206 NodeKind::Class { .. } => "Class",
2207 NodeKind::Format { .. } => "Format",
2208 NodeKind::Identifier { .. } => "Identifier",
2209 NodeKind::Error { .. } => "Error",
2210 NodeKind::MissingExpression => "MissingExpression",
2211 NodeKind::MissingStatement => "MissingStatement",
2212 NodeKind::MissingIdentifier => "MissingIdentifier",
2213 NodeKind::MissingBlock => "MissingBlock",
2214 NodeKind::UnknownRest => "UnknownRest",
2215 }
2216 }
2217
2218 /// Canonical list of **all** `kind_name()` strings, in alphabetical order.
2219 ///
2220 /// Every consumer that needs the full set of NodeKind names should reference
2221 /// this constant instead of maintaining a hand-written copy.
2222 pub const ALL_KIND_NAMES: &[&'static str] = &[
2223 "ArrayLiteral",
2224 "Assignment",
2225 "Binary",
2226 "Block",
2227 "Class",
2228 "DataSection",
2229 "Default",
2230 "Defer",
2231 "Diamond",
2232 "Do",
2233 "Ellipsis",
2234 "Error",
2235 "Eval",
2236 "ExpressionStatement",
2237 "For",
2238 "Foreach",
2239 "Format",
2240 "FunctionCall",
2241 "Given",
2242 "Glob",
2243 "Goto",
2244 "HashLiteral",
2245 "Heredoc",
2246 "Identifier",
2247 "If",
2248 "IndirectCall",
2249 "LabeledStatement",
2250 "LoopControl",
2251 "MandatoryParameter",
2252 "Match",
2253 "Method",
2254 "MethodCall",
2255 "MissingBlock",
2256 "MissingExpression",
2257 "MissingIdentifier",
2258 "MissingStatement",
2259 "NamedParameter",
2260 "No",
2261 "Number",
2262 "OptionalParameter",
2263 "Package",
2264 "PhaseBlock",
2265 "Program",
2266 "Prototype",
2267 "Readline",
2268 "Regex",
2269 "Return",
2270 "Signature",
2271 "SlurpyParameter",
2272 "StatementModifier",
2273 "String",
2274 "Subroutine",
2275 "Substitution",
2276 "Ternary",
2277 "Tie",
2278 "Transliteration",
2279 "Try",
2280 "Typeglob",
2281 "Unary",
2282 "Undef",
2283 "UnknownRest",
2284 "Untie",
2285 "Use",
2286 "Variable",
2287 "VariableDeclaration",
2288 "VariableListDeclaration",
2289 "VariableWithAttributes",
2290 "When",
2291 "While",
2292 ];
2293
2294 /// Subset of `ALL_KIND_NAMES` that represent synthetic/recovery nodes.
2295 ///
2296 /// These kinds are only produced by `parse_with_recovery()` on malformed
2297 /// input and should not be expected in clean parses.
2298 pub const RECOVERY_KIND_NAMES: &[&'static str] = &[
2299 "Error",
2300 "MissingBlock",
2301 "MissingExpression",
2302 "MissingIdentifier",
2303 "MissingStatement",
2304 "UnknownRest",
2305 ];
2306}
2307
2308impl fmt::Display for NodeKind {
2309 /// Formats as the canonical `kind_name()` string.
2310 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2311 f.write_str(self.kind_name())
2312 }
2313}
2314
2315impl fmt::Display for Node {
2316 /// Formats as the tree-sitter compatible S-expression.
2317 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2318 f.write_str(&self.to_sexp())
2319 }
2320}
2321
2322/// Format unary operator for S-expression output
2323fn format_unary_operator(op: &str) -> String {
2324 match op {
2325 // Arithmetic unary operators
2326 "+" => "unary_+".to_string(),
2327 "-" => "unary_-".to_string(),
2328
2329 // Logical unary operators
2330 "!" => "unary_not".to_string(),
2331 "not" => "unary_not".to_string(),
2332
2333 // Bitwise complement
2334 "~" => "unary_complement".to_string(),
2335
2336 // Reference operator
2337 "\\" => "unary_ref".to_string(),
2338
2339 // Postfix operators
2340 "++" => "unary_++".to_string(),
2341 "--" => "unary_--".to_string(),
2342
2343 // File test operators
2344 "-f" => "unary_-f".to_string(),
2345 "-d" => "unary_-d".to_string(),
2346 "-e" => "unary_-e".to_string(),
2347 "-r" => "unary_-r".to_string(),
2348 "-w" => "unary_-w".to_string(),
2349 "-x" => "unary_-x".to_string(),
2350 "-o" => "unary_-o".to_string(),
2351 "-R" => "unary_-R".to_string(),
2352 "-W" => "unary_-W".to_string(),
2353 "-X" => "unary_-X".to_string(),
2354 "-O" => "unary_-O".to_string(),
2355 "-s" => "unary_-s".to_string(),
2356 "-p" => "unary_-p".to_string(),
2357 "-S" => "unary_-S".to_string(),
2358 "-b" => "unary_-b".to_string(),
2359 "-c" => "unary_-c".to_string(),
2360 "-t" => "unary_-t".to_string(),
2361 "-u" => "unary_-u".to_string(),
2362 "-g" => "unary_-g".to_string(),
2363 "-k" => "unary_-k".to_string(),
2364 "-T" => "unary_-T".to_string(),
2365 "-B" => "unary_-B".to_string(),
2366 "-M" => "unary_-M".to_string(),
2367 "-A" => "unary_-A".to_string(),
2368 "-C" => "unary_-C".to_string(),
2369 "-l" => "unary_-l".to_string(),
2370 "-z" => "unary_-z".to_string(),
2371
2372 // Postfix dereferencing
2373 "->@*" => "unary_->@*".to_string(),
2374 "->%*" => "unary_->%*".to_string(),
2375 "->$*" => "unary_->$*".to_string(),
2376 "->&*" => "unary_->&*".to_string(),
2377 "->**" => "unary_->**".to_string(),
2378
2379 // Defined operator
2380 "defined" => "unary_defined".to_string(),
2381
2382 // Default case for unknown operators
2383 _ => format!("unary_{}", op.replace(' ', "_")),
2384 }
2385}
2386
2387/// Format binary operator for S-expression output
2388fn format_binary_operator(op: &str) -> String {
2389 match op {
2390 // Arithmetic operators
2391 "+" => "binary_+".to_string(),
2392 "-" => "binary_-".to_string(),
2393 "*" => "binary_*".to_string(),
2394 "/" => "binary_/".to_string(),
2395 "%" => "binary_%".to_string(),
2396 "**" => "binary_**".to_string(),
2397
2398 // Comparison operators
2399 "==" => "binary_==".to_string(),
2400 "!=" => "binary_!=".to_string(),
2401 "<" => "binary_<".to_string(),
2402 ">" => "binary_>".to_string(),
2403 "<=" => "binary_<=".to_string(),
2404 ">=" => "binary_>=".to_string(),
2405 "<=>" => "binary_<=>".to_string(),
2406
2407 // String comparison
2408 "eq" => "binary_eq".to_string(),
2409 "ne" => "binary_ne".to_string(),
2410 "lt" => "binary_lt".to_string(),
2411 "le" => "binary_le".to_string(),
2412 "gt" => "binary_gt".to_string(),
2413 "ge" => "binary_ge".to_string(),
2414 "cmp" => "binary_cmp".to_string(),
2415
2416 // Logical operators
2417 "&&" => "binary_&&".to_string(),
2418 "||" => "binary_||".to_string(),
2419 "and" => "binary_and".to_string(),
2420 "or" => "binary_or".to_string(),
2421 "xor" => "binary_xor".to_string(),
2422
2423 // Bitwise operators
2424 "&" => "binary_&".to_string(),
2425 "|" => "binary_|".to_string(),
2426 "^" => "binary_^".to_string(),
2427 "<<" => "binary_<<".to_string(),
2428 ">>" => "binary_>>".to_string(),
2429
2430 // Pattern matching
2431 "=~" => "binary_=~".to_string(),
2432 "!~" => "binary_!~".to_string(),
2433
2434 // Smart match
2435 "~~" => "binary_~~".to_string(),
2436
2437 // String repetition
2438 "x" => "binary_x".to_string(),
2439
2440 // Concatenation
2441 "." => "binary_.".to_string(),
2442
2443 // Range operators
2444 ".." => "binary_..".to_string(),
2445 "..." => "binary_...".to_string(),
2446
2447 // Type checking
2448 "isa" => "binary_isa".to_string(),
2449
2450 // Assignment operators
2451 "=" => "binary_=".to_string(),
2452 "+=" => "binary_+=".to_string(),
2453 "-=" => "binary_-=".to_string(),
2454 "*=" => "binary_*=".to_string(),
2455 "/=" => "binary_/=".to_string(),
2456 "%=" => "binary_%=".to_string(),
2457 "**=" => "binary_**=".to_string(),
2458 ".=" => "binary_.=".to_string(),
2459 "&=" => "binary_&=".to_string(),
2460 "|=" => "binary_|=".to_string(),
2461 "^=" => "binary_^=".to_string(),
2462 "<<=" => "binary_<<=".to_string(),
2463 ">>=" => "binary_>>=".to_string(),
2464 "&&=" => "binary_&&=".to_string(),
2465 "||=" => "binary_||=".to_string(),
2466 "//=" => "binary_//=".to_string(),
2467
2468 // Defined-or operator
2469 "//" => "binary_//".to_string(),
2470
2471 // Method calls and dereferencing
2472 "->" => "binary_->".to_string(),
2473
2474 // Hash/array access
2475 "{}" => "binary_{}".to_string(),
2476 "[]" => "binary_[]".to_string(),
2477
2478 // Arrow hash/array dereference
2479 "->{}" => "arrow_hash_deref".to_string(),
2480 "->[]" => "arrow_array_deref".to_string(),
2481
2482 // Default case for unknown operators
2483 _ => format!("binary_{}", op.replace(' ', "_")),
2484 }
2485}
2486
2487// SourceLocation is now provided by perl-position-tracking crate
2488// See the re-export at the top of this file
2489
2490#[cfg(test)]
2491mod tests {
2492 use super::*;
2493 use std::collections::BTreeSet;
2494
2495 /// Build a dummy instance for every `NodeKind` variant and return its
2496 /// `kind_name()`. This ensures the compiler forces us to update here
2497 /// whenever a variant is added/removed.
2498 fn all_kind_names_from_variants() -> BTreeSet<&'static str> {
2499 let loc = SourceLocation { start: 0, end: 0 };
2500 let dummy_node = || Node::new(NodeKind::Undef, loc);
2501
2502 let variants: Vec<NodeKind> = vec![
2503 NodeKind::Program { statements: vec![] },
2504 NodeKind::ExpressionStatement { expression: Box::new(dummy_node()) },
2505 NodeKind::VariableDeclaration {
2506 declarator: String::new(),
2507 variable: Box::new(dummy_node()),
2508 attributes: vec![],
2509 initializer: None,
2510 },
2511 NodeKind::VariableListDeclaration {
2512 declarator: String::new(),
2513 variables: vec![],
2514 attributes: vec![],
2515 initializer: None,
2516 },
2517 NodeKind::Variable { sigil: String::new(), name: String::new() },
2518 NodeKind::VariableWithAttributes {
2519 variable: Box::new(dummy_node()),
2520 attributes: vec![],
2521 },
2522 NodeKind::Assignment {
2523 lhs: Box::new(dummy_node()),
2524 rhs: Box::new(dummy_node()),
2525 op: String::new(),
2526 },
2527 NodeKind::Binary {
2528 op: String::new(),
2529 left: Box::new(dummy_node()),
2530 right: Box::new(dummy_node()),
2531 },
2532 NodeKind::Ternary {
2533 condition: Box::new(dummy_node()),
2534 then_expr: Box::new(dummy_node()),
2535 else_expr: Box::new(dummy_node()),
2536 },
2537 NodeKind::Unary { op: String::new(), operand: Box::new(dummy_node()) },
2538 NodeKind::Diamond,
2539 NodeKind::Ellipsis,
2540 NodeKind::Undef,
2541 NodeKind::Readline { filehandle: None },
2542 NodeKind::Glob { pattern: String::new() },
2543 NodeKind::Typeglob { name: String::new() },
2544 NodeKind::Number { value: String::new() },
2545 NodeKind::String { value: String::new(), interpolated: false },
2546 NodeKind::Heredoc {
2547 delimiter: String::new(),
2548 content: String::new(),
2549 interpolated: false,
2550 indented: false,
2551 command: false,
2552 body_span: None,
2553 },
2554 NodeKind::ArrayLiteral { elements: vec![] },
2555 NodeKind::HashLiteral { pairs: vec![] },
2556 NodeKind::Block { statements: vec![] },
2557 NodeKind::Eval { block: Box::new(dummy_node()) },
2558 NodeKind::Do { block: Box::new(dummy_node()) },
2559 NodeKind::Defer { block: Box::new(dummy_node()) },
2560 NodeKind::Try {
2561 body: Box::new(dummy_node()),
2562 catch_blocks: vec![],
2563 finally_block: None,
2564 },
2565 NodeKind::If {
2566 condition: Box::new(dummy_node()),
2567 then_branch: Box::new(dummy_node()),
2568 elsif_branches: vec![],
2569 else_branch: None,
2570 },
2571 NodeKind::LabeledStatement { label: String::new(), statement: Box::new(dummy_node()) },
2572 NodeKind::While {
2573 condition: Box::new(dummy_node()),
2574 body: Box::new(dummy_node()),
2575 continue_block: None,
2576 },
2577 NodeKind::Tie {
2578 variable: Box::new(dummy_node()),
2579 package: Box::new(dummy_node()),
2580 args: vec![],
2581 },
2582 NodeKind::Untie { variable: Box::new(dummy_node()) },
2583 NodeKind::For {
2584 init: None,
2585 condition: None,
2586 update: None,
2587 body: Box::new(dummy_node()),
2588 continue_block: None,
2589 },
2590 NodeKind::Foreach {
2591 variable: Box::new(dummy_node()),
2592 list: Box::new(dummy_node()),
2593 body: Box::new(dummy_node()),
2594 continue_block: None,
2595 },
2596 NodeKind::Given { expr: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2597 NodeKind::When { condition: Box::new(dummy_node()), body: Box::new(dummy_node()) },
2598 NodeKind::Default { body: Box::new(dummy_node()) },
2599 NodeKind::StatementModifier {
2600 statement: Box::new(dummy_node()),
2601 modifier: String::new(),
2602 condition: Box::new(dummy_node()),
2603 },
2604 NodeKind::Subroutine {
2605 name: None,
2606 name_span: None,
2607 prototype: None,
2608 signature: None,
2609 attributes: vec![],
2610 body: Box::new(dummy_node()),
2611 },
2612 NodeKind::Prototype { content: String::new() },
2613 NodeKind::Signature { parameters: vec![] },
2614 NodeKind::MandatoryParameter { variable: Box::new(dummy_node()) },
2615 NodeKind::OptionalParameter {
2616 variable: Box::new(dummy_node()),
2617 default_value: Box::new(dummy_node()),
2618 },
2619 NodeKind::SlurpyParameter { variable: Box::new(dummy_node()) },
2620 NodeKind::NamedParameter { variable: Box::new(dummy_node()) },
2621 NodeKind::Method {
2622 name: String::new(),
2623 signature: None,
2624 attributes: vec![],
2625 body: Box::new(dummy_node()),
2626 },
2627 NodeKind::Return { value: None },
2628 NodeKind::LoopControl { op: String::new(), label: None },
2629 NodeKind::Goto { target: Box::new(dummy_node()) },
2630 NodeKind::MethodCall {
2631 object: Box::new(dummy_node()),
2632 method: String::new(),
2633 args: vec![],
2634 },
2635 NodeKind::FunctionCall { name: String::new(), args: vec![] },
2636 NodeKind::IndirectCall {
2637 method: String::new(),
2638 object: Box::new(dummy_node()),
2639 args: vec![],
2640 },
2641 NodeKind::Regex {
2642 pattern: String::new(),
2643 replacement: None,
2644 modifiers: String::new(),
2645 has_embedded_code: false,
2646 },
2647 NodeKind::Match {
2648 expr: Box::new(dummy_node()),
2649 pattern: String::new(),
2650 modifiers: String::new(),
2651 has_embedded_code: false,
2652 negated: false,
2653 },
2654 NodeKind::Substitution {
2655 expr: Box::new(dummy_node()),
2656 pattern: String::new(),
2657 replacement: String::new(),
2658 modifiers: String::new(),
2659 has_embedded_code: false,
2660 negated: false,
2661 },
2662 NodeKind::Transliteration {
2663 expr: Box::new(dummy_node()),
2664 search: String::new(),
2665 replace: String::new(),
2666 modifiers: String::new(),
2667 negated: false,
2668 },
2669 NodeKind::Package { name: String::new(), name_span: loc, block: None },
2670 NodeKind::Use { module: String::new(), args: vec![], has_filter_risk: false },
2671 NodeKind::No { module: String::new(), args: vec![], has_filter_risk: false },
2672 NodeKind::PhaseBlock {
2673 phase: String::new(),
2674 phase_span: None,
2675 block: Box::new(dummy_node()),
2676 },
2677 NodeKind::DataSection { marker: String::new(), body: None },
2678 NodeKind::Class { name: String::new(), parents: vec![], body: Box::new(dummy_node()) },
2679 NodeKind::Format { name: String::new(), body: String::new() },
2680 NodeKind::Identifier { name: String::new() },
2681 NodeKind::Error {
2682 message: String::new(),
2683 expected: vec![],
2684 found: None,
2685 partial: None,
2686 },
2687 NodeKind::MissingExpression,
2688 NodeKind::MissingStatement,
2689 NodeKind::MissingIdentifier,
2690 NodeKind::MissingBlock,
2691 NodeKind::UnknownRest,
2692 ];
2693
2694 variants.iter().map(|v| v.kind_name()).collect()
2695 }
2696
2697 #[test]
2698 fn all_kind_names_is_consistent_with_kind_name() {
2699 let from_enum = all_kind_names_from_variants();
2700 let from_const: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2701
2702 // Check for duplicates in the const array
2703 assert_eq!(
2704 NodeKind::ALL_KIND_NAMES.len(),
2705 from_const.len(),
2706 "ALL_KIND_NAMES contains duplicates"
2707 );
2708
2709 let only_in_enum: Vec<_> = from_enum.difference(&from_const).collect();
2710 let only_in_const: Vec<_> = from_const.difference(&from_enum).collect();
2711
2712 assert!(
2713 only_in_enum.is_empty() && only_in_const.is_empty(),
2714 "ALL_KIND_NAMES is out of sync with NodeKind variants:\n \
2715 in enum but not in ALL_KIND_NAMES: {only_in_enum:?}\n \
2716 in ALL_KIND_NAMES but not in enum: {only_in_const:?}"
2717 );
2718 }
2719
2720 #[test]
2721 fn recovery_kind_names_is_subset_of_all() {
2722 let all: BTreeSet<&str> = NodeKind::ALL_KIND_NAMES.iter().copied().collect();
2723 let recovery: BTreeSet<&str> = NodeKind::RECOVERY_KIND_NAMES.iter().copied().collect();
2724
2725 // No duplicates
2726 assert_eq!(
2727 NodeKind::RECOVERY_KIND_NAMES.len(),
2728 recovery.len(),
2729 "RECOVERY_KIND_NAMES contains duplicates"
2730 );
2731
2732 let not_in_all: Vec<_> = recovery.difference(&all).collect();
2733 assert!(
2734 not_in_all.is_empty(),
2735 "RECOVERY_KIND_NAMES contains entries not in ALL_KIND_NAMES: {not_in_all:?}"
2736 );
2737 }
2738}