1use crate::pratt_parser::PrattParser;
7use pest::{
8 Parser,
9 iterators::{Pair, Pairs},
10};
11use pest_derive::Parser;
12use regex::Regex;
13use stacker;
14use std::sync::{Arc, OnceLock};
15
16#[derive(Parser)]
17#[grammar = "grammar.pest"]
18pub struct PerlParser;
19
20#[derive(Debug, Clone, PartialEq)]
26pub enum AstNode {
27 Program(Vec<AstNode>),
29 Statement(Box<AstNode>),
30 Block(Vec<AstNode>),
31
32 VariableDeclaration {
34 scope: Arc<str>,
35 variables: Vec<AstNode>,
36 initializer: Option<Box<AstNode>>,
37 },
38 SubDeclaration {
39 name: Arc<str>,
40 prototype: Option<Arc<str>>,
41 attributes: Vec<Arc<str>>,
42 body: Box<AstNode>,
43 },
44 FormatDeclaration {
45 name: Arc<str>,
46 format_lines: Vec<Arc<str>>,
47 },
48 PackageDeclaration {
49 name: Arc<str>,
50 version: Option<Arc<str>>,
51 block: Option<Box<AstNode>>,
52 },
53 UseStatement {
54 module: Arc<str>,
55 version: Option<Arc<str>>,
56 import_list: Vec<Arc<str>>,
57 },
58 RequireStatement {
59 module: Arc<str>,
60 },
61
62 IfStatement {
64 condition: Box<AstNode>,
65 then_block: Box<AstNode>,
66 elsif_clauses: Vec<(AstNode, AstNode)>,
67 else_block: Option<Box<AstNode>>,
68 },
69 UnlessStatement {
70 condition: Box<AstNode>,
71 block: Box<AstNode>,
72 else_block: Option<Box<AstNode>>,
73 },
74 GivenStatement {
75 expression: Box<AstNode>,
76 when_clauses: Vec<(AstNode, AstNode)>,
77 default_block: Option<Box<AstNode>>,
78 },
79 WhileStatement {
80 label: Option<Arc<str>>,
81 condition: Box<AstNode>,
82 block: Box<AstNode>,
83 },
84 UntilStatement {
85 label: Option<Arc<str>>,
86 condition: Box<AstNode>,
87 block: Box<AstNode>,
88 },
89 ForStatement {
90 label: Option<Arc<str>>,
91 init: Option<Box<AstNode>>,
92 condition: Option<Box<AstNode>>,
93 update: Option<Box<AstNode>>,
94 block: Box<AstNode>,
95 },
96 ForeachStatement {
97 label: Option<Arc<str>>,
98 variable: Option<Box<AstNode>>,
99 list: Box<AstNode>,
100 block: Box<AstNode>,
101 },
102
103 BinaryOp {
105 op: Arc<str>,
106 left: Box<AstNode>,
107 right: Box<AstNode>,
108 },
109 UnaryOp {
110 op: Arc<str>,
111 operand: Box<AstNode>,
112 },
113 TernaryOp {
114 condition: Box<AstNode>,
115 true_expr: Box<AstNode>,
116 false_expr: Box<AstNode>,
117 },
118 PostfixDereference {
119 expr: Box<AstNode>,
120 deref_type: Arc<str>,
121 },
122 Dereference {
123 expr: Box<AstNode>,
124 deref_type: Arc<str>,
125 },
126 TypeglobSlotAccess {
127 typeglob: Box<AstNode>,
128 slot: Arc<str>,
129 },
130 Assignment {
131 target: Box<AstNode>,
132 op: Arc<str>,
133 value: Box<AstNode>,
134 },
135 FunctionCall {
136 function: Box<AstNode>,
137 args: Vec<AstNode>,
138 },
139 MethodCall {
140 object: Box<AstNode>,
141 method: Arc<str>,
142 args: Vec<AstNode>,
143 },
144 BuiltinListOp {
145 name: Arc<str>,
146 args: Vec<AstNode>,
147 },
148 ArrayAccess {
149 array: Box<AstNode>,
150 index: Box<AstNode>,
151 },
152 HashAccess {
153 hash: Box<AstNode>,
154 key: Box<AstNode>,
155 },
156
157 ScalarVariable(Arc<str>),
159 ArrayVariable(Arc<str>),
160 HashVariable(Arc<str>),
161 TypeglobVariable(Arc<str>),
162
163 ScalarReference(Arc<str>),
165 ArrayReference(Arc<str>),
166 HashReference(Arc<str>),
167 SubroutineReference(Arc<str>),
168 GlobReference(Arc<str>),
169 ArrayElement {
170 array: Arc<str>,
171 index: Box<AstNode>,
172 },
173 HashElement {
174 hash: Arc<str>,
175 key: Box<AstNode>,
176 },
177
178 Number(Arc<str>),
180 String(Arc<str>),
181 Identifier(Arc<str>),
182 SpecialLiteral(Arc<str>),
183 Bareword(Arc<str>),
184 EmptyExpression,
185 Regex {
186 pattern: Arc<str>,
187 flags: Arc<str>,
188 named_groups: Vec<Arc<str>>,
189 },
190 Substitution {
191 pattern: Arc<str>,
192 replacement: Arc<str>,
193 flags: Arc<str>,
194 },
195 Transliteration {
196 search_list: Arc<str>,
197 replace_list: Arc<str>,
198 flags: Arc<str>,
199 },
200 ReturnStatement {
201 value: Option<Box<AstNode>>,
202 },
203 LastStatement {
204 label: Option<Arc<str>>,
205 },
206 NextStatement {
207 label: Option<Arc<str>>,
208 },
209 TieStatement {
210 variable: Box<AstNode>,
211 class: Box<AstNode>,
212 args: Vec<AstNode>,
213 },
214 UntieStatement {
215 variable: Box<AstNode>,
216 },
217 TiedExpression {
218 variable: Box<AstNode>,
219 },
220
221 Comment(Arc<str>),
223 Label(Arc<str>),
224 LabeledBlock {
225 label: Arc<str>,
226 block: Box<AstNode>,
227 },
228 AnonymousSub {
229 prototype: Option<Arc<str>>,
230 body: Box<AstNode>,
231 },
232 List(Vec<AstNode>),
233 ArrayRef(Vec<AstNode>),
234 HashRef(Vec<AstNode>),
235
236 BeginBlock(Box<AstNode>),
238 EndBlock(Box<AstNode>),
239 CheckBlock(Box<AstNode>),
240 InitBlock(Box<AstNode>),
241 UnitcheckBlock(Box<AstNode>),
242
243 QwList(Vec<Arc<str>>),
245 QqString(Arc<str>),
246 QxString(Arc<str>),
247 QrRegex {
248 pattern: Arc<str>,
249 flags: Arc<str>,
250 named_groups: Vec<Arc<str>>,
251 },
252 InterpolatedString(Vec<AstNode>),
253
254 Heredoc {
256 marker: Arc<str>,
257 indented: bool,
258 quoted: bool,
259 content: Arc<str>,
260 },
261
262 Glob(Arc<str>),
264 Readline {
265 filehandle: Option<Arc<str>>,
266 },
267
268 DoBlock(Box<AstNode>),
270 EvalBlock(Box<AstNode>),
271 EvalString(Box<AstNode>),
272 GotoStatement {
273 target: Arc<str>,
274 },
275
276 DataSection(Arc<str>),
278 EndSection(Arc<str>),
279
280 Pod(Arc<str>),
282
283 TryCatch {
285 try_block: Box<AstNode>,
286 catch_clauses: Vec<(Option<Arc<str>>, AstNode)>, finally_block: Option<Box<AstNode>>,
288 },
289 DeferStatement(Box<AstNode>),
290
291 ErrorNode {
293 message: Arc<str>,
294 content: Arc<str>,
295 },
296 ClassDeclaration {
297 name: Arc<str>,
298 version: Option<Arc<str>>,
299 superclass: Option<Arc<str>>,
300 body: Vec<AstNode>,
301 },
302 FieldDeclaration {
303 name: Arc<str>,
304 attributes: Vec<Arc<str>>,
305 default: Option<Box<AstNode>>,
306 },
307 MethodDeclaration {
308 name: Arc<str>,
309 signature: Option<Arc<str>>,
310 attributes: Vec<Arc<str>>,
311 body: Box<AstNode>,
312 },
313 RoleDeclaration {
314 name: Arc<str>,
315 body: Box<AstNode>,
316 },
317}
318
319pub struct PureRustPerlParser {
321 _pratt_parser: PrattParser,
322}
323
324impl PureRustPerlParser {
325 pub fn new() -> Self {
326 Self { _pratt_parser: PrattParser::new() }
327 }
328
329 #[inline(always)]
330 pub fn parse(&mut self, source: &str) -> Result<AstNode, Box<dyn std::error::Error>> {
331 let normalized = Self::normalize_source(source);
332
333 match <PerlParser as Parser<Rule>>::parse(Rule::program, &normalized) {
334 Ok(pairs) => self.build_ast(pairs),
335 Err(e) => {
336 self.parse_with_recovery(&normalized, e)
338 }
339 }
340 }
341
342 fn normalize_source(source: &str) -> String {
343 static LOOP_DECL_RE: OnceLock<Option<Regex>> = OnceLock::new();
344 static SIMPLE_SCALAR_DEREF_RE: OnceLock<Option<Regex>> = OnceLock::new();
345 static ASSIGN_BITNOT_RE: OnceLock<Option<Regex>> = OnceLock::new();
346
347 let Some(loop_decl_re) = LOOP_DECL_RE
350 .get_or_init(|| {
351 Regex::new(
352 r"\b(?P<kw>for(?:each)?)\s+(?:my|our|local|state)\s+(?P<var>\$[A-Za-z_][A-Za-z0-9_:]*)\s*(?P<paren>\()",
353 )
354 .ok()
355 })
356 .as_ref()
357 else {
358 return source.to_string();
359 };
360 let Some(scalar_deref_re) = SIMPLE_SCALAR_DEREF_RE
361 .get_or_init(|| Regex::new(r"\$\$(?P<name>[A-Za-z_][A-Za-z0-9_:]*)").ok())
362 .as_ref()
363 else {
364 return source.to_string();
365 };
366 let Some(assign_bitnot_re) = ASSIGN_BITNOT_RE
367 .get_or_init(|| Regex::new(r"=\s+~\s*(?P<expr>\$[A-Za-z_][A-Za-z0-9_:]*)").ok())
368 .as_ref()
369 else {
370 return source.to_string();
371 };
372
373 let normalized_loops = loop_decl_re.replace_all(source, "$kw $var $paren").into_owned();
374 let normalized_derefs = scalar_deref_re
375 .replace_all(&normalized_loops, |caps: ®ex::Captures<'_>| {
376 let variable = format!("${}", &caps["name"]);
377 format!("${{{}}}", variable)
378 })
379 .into_owned();
380
381 assign_bitnot_re
382 .replace_all(&normalized_derefs, |caps: ®ex::Captures<'_>| {
383 format!("= bitnot({})", &caps["expr"])
384 })
385 .into_owned()
386 }
387
388 fn parse_with_recovery(
389 &mut self,
390 source: &str,
391 original_error: pest::error::Error<Rule>,
392 ) -> Result<AstNode, Box<dyn std::error::Error>> {
393 let mut statements = Vec::new();
394 let lines: Vec<&str> = source.lines().collect();
395 let mut current_block = String::new();
396 let mut brace_count: i32 = 0;
397 let mut in_single_quote = false;
398 let mut in_double_quote = false;
399
400 for line in lines {
401 current_block.push_str(line);
402 current_block.push('\n');
403
404 {
406 let mut escaped = false;
407 for ch in line.chars() {
408 if escaped {
409 escaped = false;
410 continue;
411 }
412 if ch == '\\' {
413 escaped = true;
414 continue;
415 }
416 match ch {
417 '\'' if !in_double_quote => in_single_quote = !in_single_quote,
418 '"' if !in_single_quote => in_double_quote = !in_double_quote,
419 '{' if !in_single_quote && !in_double_quote => brace_count += 1,
420 '}' if !in_single_quote && !in_double_quote => brace_count -= 1,
421 _ => {}
422 }
423 }
424 }
425
426 if (brace_count == 0 && (line.trim().ends_with(';') || line.trim().ends_with('}')))
428 || line.trim().is_empty()
429 {
430 let trimmed = current_block.trim();
431 if !trimmed.is_empty() && !trimmed.starts_with('#') {
432 let with_semi = if !trimmed.ends_with(';') && !trimmed.ends_with('}') {
434 format!("{};", trimmed)
435 } else {
436 trimmed.to_string()
437 };
438
439 if let Ok(pairs) =
440 <PerlParser as Parser<Rule>>::parse(Rule::statements, &with_semi)
441 {
442 for pair in pairs {
443 for inner_pair in pair.into_inner() {
444 if let Some(node) = self.build_node(inner_pair).unwrap_or(None) {
445 statements.push(node);
446 }
447 }
448 }
449 current_block.clear();
450 in_single_quote = false;
451 in_double_quote = false;
452 } else {
453 current_block.clear();
455 in_single_quote = false;
456 in_double_quote = false;
457 }
458 } else if trimmed.starts_with('#') {
459 statements.push(AstNode::Comment(Arc::from(trimmed)));
461 current_block.clear();
462 }
463 }
464 }
465
466 if statements.is_empty() {
467 Err(Box::new(original_error))
468 } else {
469 Ok(AstNode::Program(statements))
470 }
471 }
472
473 fn extract_named_groups(&self, pattern: &str) -> Vec<Arc<str>> {
474 let mut groups = Vec::new();
475 let chars: Vec<char> = pattern.chars().collect();
476 let mut i = 0;
477
478 while i < chars.len() {
479 if i + 3 < chars.len() && chars[i..i + 3] == ['(', '?', '<'] {
480 i += 3;
482 let mut name = String::new();
483 while i < chars.len() && chars[i] != '>' {
484 name.push(chars[i]);
485 i += 1;
486 }
487 if !name.is_empty() {
488 groups.push(Arc::from(name));
489 }
490 }
491 i += 1;
492 }
493
494 groups
495 }
496
497 pub fn build_ast(&mut self, pairs: Pairs<Rule>) -> Result<AstNode, Box<dyn std::error::Error>> {
498 let mut nodes = Vec::new();
499 for pair in pairs {
500 if let Some(node) = self.build_node(pair)? {
501 nodes.push(node);
502 }
503 }
504 if nodes.len() == 1 {
505 nodes.pop().ok_or_else(|| "Empty nodes".into())
506 } else {
507 Ok(AstNode::Program(nodes))
508 }
509 }
510
511 #[inline]
516 pub fn build_node(
517 &mut self,
518 pair: Pair<Rule>,
519 ) -> Result<Option<AstNode>, Box<dyn std::error::Error>> {
520 const STACK_RED_ZONE: usize = 512 * 1024; const STACK_SIZE: usize = 8 * 1024 * 1024; stacker::maybe_grow(STACK_RED_ZONE, STACK_SIZE, || self.build_node_impl(pair))
524 }
525
526 #[inline]
528 fn build_node_impl(
529 &mut self,
530 pair: Pair<Rule>,
531 ) -> Result<Option<AstNode>, Box<dyn std::error::Error>> {
532 match pair.as_rule() {
533 Rule::simple_assignment => {
535 let mut inner = pair.into_inner();
536 let var_pair = inner.next().ok_or("Missing variable in simple assignment")?;
537 let var = self.build_node(var_pair)?.ok_or("Failed to build variable node")?;
538 let val_pair = inner.next().ok_or("Missing value in simple assignment")?;
539 let value = self.build_node(val_pair)?.ok_or("Failed to build value node")?;
540 Ok(Some(AstNode::Assignment {
541 target: Box::new(var),
542 op: Arc::from("="),
543 value: Box::new(value),
544 }))
545 }
546 Rule::simple_method_call => {
547 let text = pair.as_str();
549 let parts: Vec<&str> = text.split("->").collect();
551 if parts.len() == 2 {
552 let var_part = parts[0].trim();
553 let method_part = parts[1].trim();
554
555 let method_name = method_part.split('(').next().unwrap_or("").trim();
557
558 let object = if var_part.starts_with('$') {
560 AstNode::ScalarVariable(Arc::from(var_part))
561 } else {
562 AstNode::Identifier(Arc::from(var_part))
563 };
564
565 Ok(Some(AstNode::MethodCall {
566 object: Box::new(object),
567 method: Arc::from(method_name),
568 args: vec![],
569 }))
570 } else {
571 Ok(None)
572 }
573 }
574 Rule::simple_function_call => {
575 let text = pair.as_str();
577 let parts: Vec<&str> = text.split('(').collect();
579 if parts.len() >= 2 {
580 let func_name = parts[0].trim();
581 let func = AstNode::Identifier(Arc::from(func_name));
582
583 let args = if let Some(arg_part) = parts.get(1) {
585 let arg_text = arg_part.trim_end_matches(");").trim_end_matches(';').trim();
586 if !arg_text.is_empty() {
587 let arg = if arg_text.starts_with('$') {
588 AstNode::ScalarVariable(Arc::from(arg_text))
589 } else if arg_text.starts_with('"') || arg_text.starts_with('\'') {
590 AstNode::String(Arc::from(arg_text))
591 } else if arg_text.chars().all(|c| c.is_numeric() || c == '.') {
592 AstNode::Number(Arc::from(arg_text))
593 } else {
594 AstNode::Identifier(Arc::from(arg_text))
595 };
596 vec![arg]
597 } else {
598 vec![]
599 }
600 } else {
601 vec![]
602 };
603
604 Ok(Some(AstNode::FunctionCall { function: Box::new(func), args }))
605 } else {
606 Ok(None)
607 }
608 }
609 Rule::program => {
610 let mut statements = Vec::new();
611 for inner in pair.into_inner() {
612 if let Some(node) = self.build_node(inner)? {
613 statements.push(node);
614 }
615 }
616 Ok(Some(AstNode::Program(statements)))
617 }
618 Rule::statements => {
619 let mut statements = Vec::new();
620 for inner in pair.into_inner() {
621 if let Some(node) = self.build_node(inner)? {
622 statements.push(node);
623 }
624 }
625 Ok(Some(AstNode::Program(statements)))
626 }
627 Rule::statement => {
628 let inner = pair.into_inner().next().ok_or("Empty statement")?;
629 self.build_node(inner)
630 }
631 Rule::modified_statement => {
632 let mut inner = pair.into_inner();
633 let expr_pair = inner.next().ok_or("Missing expression in modified statement")?;
634 let expr = self.build_node(expr_pair)?;
635 let modifier = inner.next().ok_or("Missing modifier in modified statement")?;
636
637 let modifier_str = modifier.as_str();
639 let (modifier_type, condition_expr) =
640 if let Some(rest) = modifier_str.strip_prefix("if ") {
641 ("if", rest)
642 } else if let Some(rest) = modifier_str.strip_prefix("unless ") {
643 ("unless", rest)
644 } else if let Some(rest) = modifier_str.strip_prefix("while ") {
645 ("while", rest)
646 } else if let Some(rest) = modifier_str.strip_prefix("until ") {
647 ("until", rest)
648 } else if let Some(rest) = modifier_str.strip_prefix("for ") {
649 ("for", rest)
650 } else if let Some(rest) = modifier_str.strip_prefix("foreach ") {
651 ("foreach", rest)
652 } else {
653 return Ok(expr);
654 };
655
656 let condition = Some(AstNode::Identifier(Arc::from(condition_expr.trim())));
659
660 if let (Some(expr), Some(condition)) = (expr, condition) {
661 match modifier_type {
662 "if" => Ok(Some(AstNode::IfStatement {
663 condition: Box::new(condition),
664 then_block: Box::new(expr),
665 elsif_clauses: Vec::new(),
666 else_block: None,
667 })),
668 "unless" => Ok(Some(AstNode::UnlessStatement {
669 condition: Box::new(condition),
670 block: Box::new(expr),
671 else_block: None,
672 })),
673 "while" => Ok(Some(AstNode::WhileStatement {
674 label: None,
675 condition: Box::new(condition),
676 block: Box::new(expr),
677 })),
678 "until" => Ok(Some(AstNode::UntilStatement {
679 label: None,
680 condition: Box::new(condition),
681 block: Box::new(expr),
682 })),
683 "for" | "foreach" => Ok(Some(AstNode::ForStatement {
684 init: None,
685 condition: Some(Box::new(condition)),
686 update: None,
687 block: Box::new(expr),
688 label: None,
689 })),
690 _ => Ok(Some(AstNode::Statement(Box::new(expr)))),
691 }
692 } else {
693 Ok(None)
694 }
695 }
696 Rule::expression_statement => {
697 let inner = pair.into_inner().next().ok_or("Empty expression statement")?;
698 if let Some(expr) = self.build_node(inner)? {
699 Ok(Some(AstNode::Statement(Box::new(expr))))
700 } else {
701 Ok(None)
702 }
703 }
704 Rule::declaration_statement => {
705 let inner = pair.into_inner().next().ok_or("Empty declaration statement")?;
706 self.build_node(inner)
707 }
708 Rule::variable_declaration => {
709 let mut inner = pair.into_inner();
710 let scope_pair = inner.next().ok_or("Missing scope in variable declaration")?;
711 let scope = Arc::from(scope_pair.as_str());
712 let mut variables = Vec::new();
713 let mut initializer = None;
714
715 for p in inner {
716 match p.as_rule() {
717 Rule::variable_list => {
718 for var in p.into_inner() {
719 if let Some(v) = self.build_node(var)? {
720 variables.push(v);
721 }
722 }
723 }
724 Rule::expression => {
725 initializer = self.build_node(p)?.map(Box::new);
726 }
727 _ => {}
728 }
729 }
730
731 Ok(Some(AstNode::VariableDeclaration { scope, variables, initializer }))
732 }
733 Rule::sub_declaration => {
734 let inner = pair.into_inner();
735 let mut _sub_modifier = None;
736 let mut name = Arc::from("");
737 let mut prototype = None;
738 let mut attributes = Vec::new();
739 let mut body = None;
740
741 for p in inner {
742 match p.as_rule() {
743 Rule::sub_modifier => {
744 _sub_modifier = Some(p.as_str().to_string());
745 }
746 Rule::identifier => {
747 name = Arc::from(p.as_str());
748 }
749 Rule::prototype => {
750 prototype = Some(Arc::from(p.as_str()));
751 }
752 Rule::signature => {
753 prototype = Some(Arc::from(p.as_str()));
754 }
755 Rule::attributes => {
756 for attr in p.into_inner() {
757 attributes.push(Arc::from(attr.as_str()));
758 }
759 }
760 Rule::block => {
761 body = self.build_node(p)?.map(Box::new);
762 }
763 _ => {}
764 }
765 }
766
767 Ok(Some(AstNode::SubDeclaration {
768 name,
769 prototype,
770 attributes,
771 body: body.unwrap_or_else(|| Box::new(AstNode::Block(vec![]))),
772 }))
773 }
774 Rule::glob => {
775 let inner = pair.into_inner().next().ok_or("Empty glob")?; Ok(Some(AstNode::Glob(Arc::from(inner.as_str()))))
777 }
778 Rule::readline => {
779 let mut filehandle = None;
780 for p in pair.into_inner() {
781 if p.as_rule() == Rule::filehandle {
782 filehandle = Some(Arc::from(p.as_str()));
783 }
784 }
785 Ok(Some(AstNode::Readline { filehandle }))
786 }
787 Rule::format_declaration => {
788 let inner = pair.into_inner();
789 let mut name = Arc::from("");
790 let mut format_lines = Vec::new();
791
792 for p in inner {
793 match p.as_rule() {
794 Rule::format_name => {
795 name = Arc::from(p.as_str());
796 }
797 Rule::format_lines => {
798 for line in p.into_inner() {
799 if line.as_rule() == Rule::format_line {
800 format_lines.push(Arc::from(line.as_str()));
801 }
802 }
803 }
804 _ => {}
805 }
806 }
807
808 Ok(Some(AstNode::FormatDeclaration { name, format_lines }))
809 }
810 Rule::if_statement => {
811 let mut inner = pair.into_inner();
812 let cond_pair = inner.next().ok_or("Missing condition in if statement")?;
814 let condition =
815 Box::new(self.build_node(cond_pair)?.ok_or("Failed to build condition node")?);
816 let block_pair = inner.next().ok_or("Missing block in if statement")?;
818 let then_block =
819 Box::new(self.build_node(block_pair)?.ok_or("Failed to build block node")?);
820 let mut elsif_clauses = Vec::new();
821 let mut else_block = None;
822
823 for p in inner {
824 match p.as_rule() {
825 Rule::elsif_clause => {
826 let mut elsif_inner = p.into_inner();
827 let cond_pair =
829 elsif_inner.next().ok_or("Missing condition in elsif")?;
830 let cond = self
831 .build_node(cond_pair)?
832 .ok_or("Failed to build elsif condition")?;
833 let block_pair = elsif_inner.next().ok_or("Missing block in elsif")?;
835 let block = self
836 .build_node(block_pair)?
837 .ok_or("Failed to build elsif block")?;
838 elsif_clauses.push((cond, block));
839 }
840 Rule::else_clause => {
841 let mut else_inner = p.into_inner();
842 let else_pair = else_inner.next().ok_or("Missing block in else")?;
844 else_block = self.build_node(else_pair)?.map(Box::new);
845 }
846 _ => {}
847 }
848 }
849
850 Ok(Some(AstNode::IfStatement { condition, then_block, elsif_clauses, else_block }))
851 }
852 Rule::tie_statement => {
853 let mut inner = pair.into_inner();
854 let var_pair = inner.next().ok_or("Missing variable in tie statement")?;
855 let variable =
856 Box::new(self.build_node(var_pair)?.ok_or("Failed to build variable node")?);
857 let class_pair = inner.next().ok_or("Missing class in tie statement")?;
858 let class =
859 Box::new(self.build_node(class_pair)?.ok_or("Failed to build class node")?);
860 let mut args = Vec::new();
861
862 for arg in inner {
863 if let Some(node) = self.build_node(arg)? {
864 args.push(node);
865 }
866 }
867
868 Ok(Some(AstNode::TieStatement { variable, class, args }))
869 }
870 Rule::untie_statement => {
871 let mut inner = pair.into_inner();
872 let var_pair = inner.next().ok_or("Missing variable in untie statement")?;
873 let variable =
874 Box::new(self.build_node(var_pair)?.ok_or("Failed to build variable node")?);
875
876 Ok(Some(AstNode::UntieStatement { variable }))
877 }
878 Rule::tied_statement => {
879 let mut inner = pair.into_inner();
880 let var_pair = inner.next().ok_or("Missing variable in tied statement")?;
881 let variable =
882 Box::new(self.build_node(var_pair)?.ok_or("Failed to build variable node")?);
883
884 Ok(Some(AstNode::TiedExpression { variable }))
885 }
886 Rule::given_statement => {
887 let mut inner = pair.into_inner();
888 let expr_pair = inner.next().ok_or("Missing expression in given statement")?;
889 let expression =
890 Box::new(self.build_node(expr_pair)?.ok_or("Failed to build expression node")?);
891 let given_block = inner.next().ok_or("Missing block in given statement")?;
892
893 let mut when_clauses = Vec::new();
894 let mut default_block = None;
895
896 for p in given_block.into_inner() {
897 match p.as_rule() {
898 Rule::when_clause => {
899 let mut when_inner = p.into_inner();
900 let when_cond_pair =
901 when_inner.next().ok_or("Missing condition in when clause")?;
902 let cond_inner_pair =
903 when_cond_pair.into_inner().next().ok_or("Empty when condition")?;
904 let cond = self
905 .build_node(cond_inner_pair)?
906 .ok_or("Failed to build when condition")?;
907 let block_pair =
908 when_inner.next().ok_or("Missing block in when clause")?;
909 let block =
910 self.build_node(block_pair)?.ok_or("Failed to build when block")?;
911 when_clauses.push((cond, block));
912 }
913 Rule::default_clause => {
914 let mut default_inner = p.into_inner();
915 let default_pair =
916 default_inner.next().ok_or("Missing block in default clause")?;
917 default_block = Some(Box::new(
918 self.build_node(default_pair)?
919 .ok_or("Failed to build default block")?,
920 ));
921 }
922 _ => {}
923 }
924 }
925
926 Ok(Some(AstNode::GivenStatement { expression, when_clauses, default_block }))
927 }
928 Rule::block => {
929 let mut statements = Vec::new();
930 for inner in pair.into_inner() {
931 if inner.as_rule() == Rule::statements {
932 for stmt in inner.into_inner() {
933 if let Some(node) = self.build_node(stmt)? {
934 statements.push(node);
935 }
936 }
937 }
938 }
939 Ok(Some(AstNode::Block(statements)))
940 }
941 Rule::anonymous_sub => {
942 let inner = pair.into_inner();
943 let mut prototype = None;
944 let mut attributes: Vec<Arc<str>> = Vec::new();
945 let mut body = None;
946
947 for p in inner {
948 match p.as_rule() {
949 Rule::prototype => {
950 prototype = Some(Arc::from(p.as_str()));
951 }
952 Rule::attributes => {
953 for attr in p.into_inner() {
954 attributes.push(Arc::from(attr.as_str()));
955 }
956 }
957 Rule::block => {
958 body = self.build_node(p)?.map(Box::new);
959 }
960 _ => {}
961 }
962 }
963
964 Ok(Some(AstNode::AnonymousSub {
965 prototype,
966 body: body.unwrap_or_else(|| Box::new(AstNode::Block(vec![]))),
967 }))
968 }
969 Rule::expression => self.build_expression(pair),
970 Rule::logical_or_expression => {
971 self.build_binary_expression(pair, Rule::logical_or_expression)
972 }
973 Rule::logical_xor_expression => {
974 self.build_binary_expression(pair, Rule::logical_xor_expression)
975 }
976 Rule::defined_or_expression => {
977 self.build_binary_expression(pair, Rule::defined_or_expression)
978 }
979 Rule::logical_and_expression => {
980 self.build_binary_expression(pair, Rule::logical_and_expression)
981 }
982 Rule::equality_expression => {
983 self.build_binary_expression(pair, Rule::equality_expression)
984 }
985 Rule::relational_expression => {
986 self.build_binary_expression(pair, Rule::relational_expression)
987 }
988 Rule::isa_expression => self.build_binary_expression(pair, Rule::isa_expression),
989 Rule::bitwise_expression => {
990 self.build_binary_expression(pair, Rule::bitwise_expression)
991 }
992 Rule::bitwise_string_expression => {
993 self.build_binary_expression(pair, Rule::bitwise_string_expression)
994 }
995 Rule::range_expression => self.build_binary_expression(pair, Rule::range_expression),
996 Rule::additive_expression => {
997 self.build_binary_expression(pair, Rule::additive_expression)
998 }
999 Rule::multiplicative_expression => {
1000 self.build_binary_expression(pair, Rule::multiplicative_expression)
1001 }
1002 Rule::exponential_expression => {
1003 self.build_binary_expression(pair, Rule::exponential_expression)
1004 }
1005 Rule::assignment_expression => {
1006 let mut inner = pair.into_inner();
1007 if let (Some(target_pair), Some(op_pair), Some(value_pair)) =
1008 (inner.next(), inner.next(), inner.next())
1009 {
1010 let target =
1011 Box::new(self.build_node(target_pair)?.unwrap_or(AstNode::EmptyExpression));
1012 let op_str = op_pair.as_str();
1013 let op = if op_str == "_DIV_=" {
1014 Arc::from("/=")
1015 } else if op_str.contains("_DIV_") {
1016 Arc::from(op_str.replace("_DIV_", "/"))
1017 } else {
1018 Arc::from(op_str)
1019 };
1020 let value =
1021 Box::new(self.build_node(value_pair)?.unwrap_or(AstNode::EmptyExpression));
1022 Ok(Some(AstNode::Assignment { target, op, value }))
1023 } else {
1024 Ok(None)
1025 }
1026 }
1027 Rule::unary_expression => {
1028 let mut inner = pair.into_inner();
1029 let first = inner.next().ok_or("Empty unary expression")?;
1030
1031 match first.as_rule() {
1033 Rule::postfix_expression | Rule::reference => {
1034 self.build_node(first)
1036 }
1037 Rule::file_test_operator => {
1038 let op = Arc::from(first.as_str());
1040 if let Some(next_pair) = inner.next() {
1041 if let Some(operand_node) = self.build_node(next_pair)? {
1042 let operand = Box::new(operand_node);
1043 Ok(Some(AstNode::UnaryOp { op, operand }))
1044 } else {
1045 Ok(None)
1046 }
1047 } else {
1048 Ok(None)
1049 }
1050 }
1051 _ => {
1052 let op = Arc::from(first.as_str());
1054 if let Some(next_pair) = inner.next() {
1055 if let Some(operand_node) = self.build_node(next_pair)? {
1056 let operand = Box::new(operand_node);
1057 Ok(Some(AstNode::UnaryOp { op, operand }))
1058 } else {
1059 Ok(None)
1060 }
1061 } else {
1062 Ok(None)
1063 }
1064 }
1065 }
1066 }
1067 Rule::postfix_expression => {
1068 let mut inner = pair.into_inner();
1069 let expr_pair = inner.next().ok_or("Empty postfix expression")?;
1070 let mut expr =
1071 self.build_node(expr_pair)?.ok_or("Failed to build base expression")?;
1072
1073 for postfix_op in inner {
1074 if postfix_op.as_rule() == Rule::postfix_dereference {
1075 let op_inner =
1076 postfix_op.into_inner().next().ok_or("Empty postfix dereference")?;
1077 match op_inner.as_rule() {
1078 Rule::postfix_dereference => {
1079 let deref_str = op_inner.as_str();
1080 let deref_type = deref_str.strip_prefix("->").unwrap_or(deref_str);
1082 expr = AstNode::PostfixDereference {
1083 expr: Box::new(expr),
1084 deref_type: Arc::from(deref_type),
1085 };
1086 }
1087 Rule::method_call => {
1088 let method_inner = op_inner.into_inner();
1089 let mut method = Arc::from("");
1090 let mut args = Vec::new();
1091
1092 for p in method_inner {
1093 match p.as_rule() {
1094 Rule::method_name => {
1095 method = Arc::from(p.as_str());
1096 }
1097 Rule::function_args => {
1098 if let Some(arg_list) = p.into_inner().next() {
1099 args = self.parse_arg_list(arg_list)?;
1100 }
1101 }
1102 _ => {}
1103 }
1104 }
1105
1106 expr = AstNode::MethodCall { object: Box::new(expr), method, args };
1107 }
1108 Rule::typeglob_slot_access => {
1109 let slot_pair = op_inner
1110 .into_inner()
1111 .next()
1112 .ok_or("Empty typeglob slot access")?;
1113 let slot = Arc::from(slot_pair.as_str());
1114 expr =
1115 AstNode::TypeglobSlotAccess { typeglob: Box::new(expr), slot };
1116 }
1117 Rule::array_access => {
1118 let index_pair =
1119 op_inner.into_inner().next().ok_or("Empty array access")?;
1120 let index_expr = self
1121 .build_node(index_pair)?
1122 .ok_or("Failed to build array index node")?;
1123 expr = AstNode::ArrayAccess {
1124 array: Box::new(expr),
1125 index: Box::new(index_expr),
1126 };
1127 }
1128 Rule::hash_access => {
1129 let key_pair =
1130 op_inner.into_inner().next().ok_or("Empty hash access")?;
1131 let key_expr = self
1132 .build_node(key_pair)?
1133 .ok_or("Failed to build hash key node")?;
1134 expr = AstNode::HashAccess {
1135 hash: Box::new(expr),
1136 key: Box::new(key_expr),
1137 };
1138 }
1139 Rule::function_call => {
1140 let args = if let Some(args_pair) = op_inner.into_inner().next() {
1141 self.parse_arg_list(args_pair)?
1142 } else {
1143 Vec::new()
1144 };
1145 expr = AstNode::FunctionCall { function: Box::new(expr), args };
1146 }
1147 _ => {}
1148 }
1149 }
1150 }
1151
1152 Ok(Some(expr))
1153 }
1154 Rule::scalar_variable => Ok(Some(AstNode::ScalarVariable(Arc::from(pair.as_str())))),
1155 Rule::array_variable => Ok(Some(AstNode::ArrayVariable(Arc::from(pair.as_str())))),
1156 Rule::hash_variable => Ok(Some(AstNode::HashVariable(Arc::from(pair.as_str())))),
1157 Rule::typeglob_variable => {
1158 Ok(Some(AstNode::TypeglobVariable(Arc::from(pair.as_str()))))
1159 }
1160 Rule::number => Ok(Some(AstNode::Number(Arc::from(pair.as_str())))),
1161 Rule::identifier => Ok(Some(AstNode::Identifier(Arc::from(pair.as_str())))),
1162 Rule::qualified_name => Ok(Some(AstNode::Identifier(Arc::from(pair.as_str())))),
1163 Rule::qualified_name_or_identifier => {
1164 Ok(Some(AstNode::Identifier(Arc::from(pair.as_str()))))
1165 }
1166 Rule::class_method_call => {
1167 let inner = pair.into_inner();
1168 let mut parts = Vec::new();
1169 let mut args = Vec::new();
1170
1171 for p in inner {
1172 match p.as_rule() {
1173 Rule::identifier => {
1174 parts.push(p.as_str());
1175 }
1176 Rule::method_name => {
1177 parts.push(p.as_str());
1178 }
1179 Rule::function_args => {
1180 if let Some(arg_list) = p.into_inner().next() {
1181 for arg_pair in arg_list.into_inner() {
1182 if let Ok(Some(arg)) = self.build_node(arg_pair) {
1183 args.push(arg);
1184 }
1185 }
1186 }
1187 }
1188 _ => {}
1189 }
1190 }
1191
1192 if parts.len() >= 2 {
1194 let method_name = Arc::from(parts.pop().ok_or("Empty parts list")?);
1195 let class_name = parts.join("::");
1196 let class_node = AstNode::Identifier(Arc::from(class_name));
1197
1198 Ok(Some(AstNode::MethodCall {
1199 object: Box::new(class_node),
1200 method: method_name,
1201 args,
1202 }))
1203 } else {
1204 Ok(Some(AstNode::Identifier(Arc::from(""))))
1206 }
1207 }
1208 Rule::user_function_call => {
1209 let inner = pair.into_inner();
1210 let mut name = Arc::from("");
1211 let mut args = Vec::new();
1212
1213 for p in inner {
1214 match p.as_rule() {
1215 Rule::identifier => {
1216 name = Arc::from(p.as_str());
1217 }
1218 Rule::list_op_args => {
1219 for arg_pair in p.into_inner() {
1220 if arg_pair.as_rule() == Rule::list_op_arg {
1221 for expr_pair in arg_pair.into_inner() {
1222 if let Ok(Some(arg)) = self.build_node(expr_pair) {
1223 args.push(arg);
1224 }
1225 }
1226 }
1227 }
1228 }
1229 _ => {}
1230 }
1231 }
1232
1233 Ok(Some(AstNode::FunctionCall {
1234 function: Box::new(AstNode::Identifier(name)),
1235 args,
1236 }))
1237 }
1238 Rule::builtin_list_op => {
1239 let inner = pair.into_inner();
1240 let mut name = Arc::from("");
1241 let mut args = Vec::new();
1242
1243 for p in inner {
1244 match p.as_rule() {
1245 Rule::builtin_list_op_name => {
1246 name = Arc::from(p.as_str().trim());
1247 }
1248 Rule::list_op_args => {
1249 for arg_pair in p.into_inner() {
1250 if arg_pair.as_rule() == Rule::list_op_arg {
1251 for expr_pair in arg_pair.into_inner() {
1252 if let Ok(Some(arg)) = self.build_node(expr_pair) {
1253 args.push(arg);
1254 }
1255 }
1256 }
1257 }
1258 }
1259 _ => {}
1260 }
1261 }
1262
1263 Ok(Some(AstNode::BuiltinListOp { name, args }))
1264 }
1265 Rule::special_literal => Ok(Some(AstNode::SpecialLiteral(Arc::from(pair.as_str())))),
1266 Rule::string => {
1267 let inner_pairs: Vec<_> = pair.into_inner().collect();
1269 if let Some(inner) = inner_pairs.into_iter().next() {
1270 self.build_node(inner)
1271 } else {
1272 Ok(Some(AstNode::String(Arc::from(""))))
1273 }
1274 }
1275 Rule::single_quoted_string => Ok(Some(AstNode::String(Arc::from(pair.as_str())))),
1276 Rule::double_quoted_string => {
1277 let mut parts = Vec::new();
1279
1280 if let Some(content_pair) = pair.into_inner().next() {
1281 for part in content_pair.into_inner() {
1283 for inner in part.into_inner() {
1285 match inner.as_rule() {
1286 Rule::double_string_chars => {
1287 parts.push(AstNode::String(Arc::from(inner.as_str())));
1288 }
1289 Rule::interpolation => {
1290 if let Ok(Some(interp_node)) = self.build_node(inner) {
1291 parts.push(interp_node);
1292 }
1293 }
1294 _ => {}
1295 }
1296 }
1297 }
1298
1299 if parts.is_empty() {
1300 Ok(Some(AstNode::String(Arc::from(""))))
1301 } else if parts.len() == 1 && matches!(parts[0], AstNode::String(_)) {
1302 Ok(Some(parts.into_iter().next().ok_or("Empty parts list")?))
1303 } else {
1304 Ok(Some(AstNode::InterpolatedString(parts)))
1305 }
1306 } else {
1307 Ok(Some(AstNode::String(Arc::from(""))))
1309 }
1310 }
1311 Rule::q_string => {
1312 let content = pair.as_str();
1315 if content.contains("__HEREDOC__") {
1316 if let Some(start_idx) = content.find("{__HEREDOC__")
1318 && let Some(end_idx) = content.rfind("__HEREDOC__}")
1319 {
1320 let heredoc_content = &content[start_idx + 12..end_idx];
1321 return Ok(Some(AstNode::String(Arc::from(heredoc_content))));
1322 }
1323 }
1324 Ok(Some(AstNode::String(Arc::from(content))))
1325 }
1326 Rule::qq_string => {
1327 let content = pair.as_str();
1330 if content.contains("__HEREDOC__") {
1331 if let Some(start_idx) = content.find("{__HEREDOC__")
1333 && let Some(end_idx) = content.rfind("__HEREDOC__}")
1334 {
1335 let heredoc_content = &content[start_idx + 12..end_idx];
1336 return Ok(Some(AstNode::QqString(Arc::from(heredoc_content))));
1337 }
1338 }
1339 Ok(Some(AstNode::QqString(Arc::from(content))))
1340 }
1341 Rule::qx_string => {
1342 Ok(Some(AstNode::QxString(Arc::from(pair.as_str()))))
1344 }
1345 Rule::backtick_string => {
1346 Ok(Some(AstNode::QxString(Arc::from(pair.as_str()))))
1348 }
1349 Rule::heredoc_placeholder => {
1350 Ok(Some(AstNode::String(Arc::from(pair.as_str()))))
1352 }
1353 Rule::heredoc => {
1354 let inner = pair.into_inner();
1355 let mut indented = false;
1356 let mut marker = Arc::from("");
1357 let mut quoted = false;
1358
1359 for p in inner {
1360 match p.as_rule() {
1361 Rule::heredoc_indented => {
1362 indented = true;
1363 }
1364 Rule::heredoc_delimiter => {
1365 let delimiter_str = p.as_str();
1366 let delimiter_inner = p.into_inner().next();
1367 if let Some(d) = delimiter_inner {
1368 match d.as_rule() {
1369 Rule::heredoc_single_quoted => {
1370 quoted = true;
1371 marker = Arc::from(d.as_str().trim_matches('\''));
1372 }
1373 Rule::heredoc_double_quoted => {
1374 marker = Arc::from(d.as_str().trim_matches('"'));
1375 }
1376 Rule::heredoc_backtick => {
1377 marker = Arc::from(d.as_str().trim_matches('`'));
1378 }
1379 Rule::heredoc_escaped => {
1380 marker = Arc::from(d.as_str().trim_start_matches('\\'));
1381 }
1382 Rule::bare_heredoc_delimiter => {
1383 marker = Arc::from(d.as_str());
1384 }
1385 _ => {}
1386 }
1387 } else {
1388 marker = Arc::from(delimiter_str);
1389 }
1390 }
1391 _ => {}
1392 }
1393 }
1394
1395 Ok(Some(AstNode::Heredoc {
1399 marker,
1400 indented,
1401 quoted,
1402 content: Arc::from(""), }))
1404 }
1405 Rule::list => {
1406 let mut elements = Vec::new();
1407 for inner in pair.into_inner() {
1408 if inner.as_rule() == Rule::list_elements {
1409 for elem in inner.into_inner() {
1410 if let Some(node) = self.build_node(elem)? {
1411 elements.push(node);
1412 }
1413 }
1414 }
1415 }
1416 Ok(Some(AstNode::List(elements)))
1417 }
1418 Rule::array_ref => {
1419 let mut elements = Vec::new();
1420 for inner in pair.into_inner() {
1421 if inner.as_rule() == Rule::list_elements {
1422 for elem in inner.into_inner() {
1423 if let Some(node) = self.build_node(elem)? {
1424 elements.push(node);
1425 }
1426 }
1427 }
1428 }
1429 Ok(Some(AstNode::ArrayRef(elements)))
1430 }
1431 Rule::hash_ref => {
1432 let mut elements = Vec::new();
1433 for inner in pair.into_inner() {
1434 if inner.as_rule() == Rule::hash_elements {
1435 for elem in inner.into_inner() {
1436 if let Some(node) = self.build_node(elem)? {
1437 elements.push(node);
1438 }
1439 }
1440 }
1441 }
1442 Ok(Some(AstNode::HashRef(elements)))
1443 }
1444 Rule::begin_block => {
1445 let inner = pair.into_inner().next().ok_or("Empty begin block")?; let block = self.build_node(inner)?.map(Box::new);
1447 Ok(block.map(AstNode::BeginBlock))
1448 }
1449 Rule::end_block => {
1450 let inner = pair.into_inner().next().ok_or("Empty end block")?; let block = self.build_node(inner)?.map(Box::new);
1452 Ok(block.map(AstNode::EndBlock))
1453 }
1454 Rule::check_block => {
1455 let inner = pair.into_inner().next().ok_or("Empty check block")?; let block = self.build_node(inner)?.map(Box::new);
1457 Ok(block.map(AstNode::CheckBlock))
1458 }
1459 Rule::init_block => {
1460 let inner = pair.into_inner().next().ok_or("Empty init block")?; let block = self.build_node(inner)?.map(Box::new);
1462 Ok(block.map(AstNode::InitBlock))
1463 }
1464 Rule::unitcheck_block => {
1465 let inner = pair.into_inner().next().ok_or("Empty unitcheck block")?; let block = self.build_node(inner)?.map(Box::new);
1467 Ok(block.map(AstNode::UnitcheckBlock))
1468 }
1469 Rule::qw_list => {
1470 let mut words = Vec::new();
1471 for inner in pair.into_inner() {
1472 match inner.as_rule() {
1473 Rule::qw_paren_items
1474 | Rule::qw_bracket_items
1475 | Rule::qw_brace_items
1476 | Rule::qw_angle_items
1477 | Rule::qw_delimited_items => {
1478 let content = inner.as_str();
1480 words.extend(content.split_whitespace().map(Arc::from));
1481 }
1482 _ => {}
1483 }
1484 }
1485 Ok(Some(AstNode::QwList(words)))
1486 }
1487 Rule::do_block => {
1488 let inner = pair.into_inner().next().ok_or("Empty do block")?;
1489 let expr = self.build_node(inner)?.map(Box::new);
1490 Ok(expr.map(AstNode::DoBlock))
1491 }
1492 Rule::eval_statement => {
1493 let inner = pair.into_inner().next().ok_or("Empty eval statement")?;
1494 let expr = self.build_node(inner)?;
1495 Ok(expr.map(|e| match e {
1496 AstNode::Block(_) => AstNode::EvalBlock(Box::new(e)),
1497 _ => AstNode::EvalString(Box::new(e)),
1498 }))
1499 }
1500 Rule::goto_statement => {
1501 let inner = pair.into_inner().next().ok_or("Empty goto statement")?;
1502 let target = match inner.as_rule() {
1503 Rule::goto_target => Arc::from(inner.as_str()),
1504 _ => {
1505 if let Some(expr) = self.build_node(inner)? {
1507 Arc::from(format!("{:?}", expr)) } else {
1509 Arc::from("")
1510 }
1511 }
1512 };
1513 Ok(Some(AstNode::GotoStatement { target }))
1514 }
1515 Rule::try_catch_statement => {
1516 let mut inner = pair.into_inner();
1517 let try_pair = inner.next().ok_or("Missing try block")?;
1518 let try_block =
1519 Box::new(self.build_node(try_pair)?.ok_or("Failed to build try block")?);
1520 let mut catch_clauses = Vec::new();
1521 let mut finally_block = None;
1522
1523 for p in inner {
1524 match p.as_rule() {
1525 Rule::catch_clause => {
1526 let catch_inner = p.into_inner();
1527 let mut param = None;
1528 let mut block = None;
1529
1530 for cp in catch_inner {
1531 match cp.as_rule() {
1532 Rule::catch_parameter => {
1533 param = Some(Arc::from(cp.as_str()));
1534 }
1535 Rule::block => {
1536 block = Some(
1537 self.build_node(cp)?
1538 .ok_or("Failed to build catch block")?,
1539 );
1540 }
1541 _ => {}
1542 }
1543 }
1544
1545 if let Some(b) = block {
1546 catch_clauses.push((param, b));
1547 }
1548 }
1549 Rule::finally_clause => {
1550 let mut finally_inner = p.into_inner();
1551 if let Some(block_pair) = finally_inner.next() {
1552 finally_block = Some(Box::new(
1553 self.build_node(block_pair)?
1554 .ok_or("Failed to build finally block")?,
1555 ));
1556 }
1557 }
1558 _ => {}
1559 }
1560 }
1561
1562 Ok(Some(AstNode::TryCatch { try_block, catch_clauses, finally_block }))
1563 }
1564 Rule::defer_statement => {
1565 let mut inner = pair.into_inner();
1566 let block_pair = inner.next().ok_or("Missing block in defer statement")?;
1567 let block =
1568 Box::new(self.build_node(block_pair)?.ok_or("Failed to build defer block")?);
1569 Ok(Some(AstNode::DeferStatement(block)))
1570 }
1571 Rule::class_declaration => {
1572 let mut inner = pair.into_inner();
1573 let name_pair = inner.next().ok_or("Missing class name")?;
1574 let name = Arc::from(name_pair.as_str());
1575 let mut version = None;
1576 let mut superclass = None;
1577 let mut body = Vec::new();
1578
1579 for p in inner {
1580 match p.as_rule() {
1581 Rule::version => {
1582 version = Some(Arc::from(p.as_str()));
1583 }
1584 Rule::superclass => {
1585 let super_pair =
1586 p.into_inner().next().ok_or("Missing superclass name")?;
1587 superclass = Some(Arc::from(super_pair.as_str()));
1588 }
1589 Rule::class_body => {
1590 for member in p.into_inner() {
1591 if let Some(node) = self.build_node(member)? {
1592 body.push(node);
1593 }
1594 }
1595 }
1596 _ => {}
1597 }
1598 }
1599
1600 Ok(Some(AstNode::ClassDeclaration { name, version, superclass, body }))
1601 }
1602 Rule::method_declaration => {
1603 let mut inner = pair.into_inner();
1604 let name_pair = inner.next().ok_or("Missing method name")?;
1605 let name = Arc::from(name_pair.as_str());
1606 let mut signature = None;
1607 let mut attributes = Vec::new();
1608 let mut body = None;
1609
1610 for p in inner {
1611 match p.as_rule() {
1612 Rule::signature => {
1613 signature = Some(Arc::from(p.as_str()));
1614 }
1615 Rule::attributes => {
1616 for attr in p.into_inner() {
1617 attributes.push(Arc::from(attr.as_str()));
1618 }
1619 }
1620 Rule::block => {
1621 body = Some(Box::new(
1622 self.build_node(p)?.ok_or("Failed to build method body")?,
1623 ));
1624 }
1625 _ => {}
1626 }
1627 }
1628
1629 Ok(Some(AstNode::MethodDeclaration {
1630 name,
1631 signature,
1632 attributes,
1633 body: body.unwrap_or_else(|| Box::new(AstNode::Block(vec![]))),
1634 }))
1635 }
1636 Rule::field_declaration => {
1637 let mut inner = pair.into_inner();
1638 let name_pair = inner.next().ok_or("Missing field name")?;
1639 let name = Arc::from(name_pair.as_str());
1640 let mut attributes = Vec::new();
1641 let mut default = None;
1642
1643 for p in inner {
1644 match p.as_rule() {
1645 Rule::field_attributes => {
1646 for attr in p.into_inner() {
1647 attributes.push(Arc::from(attr.as_str()));
1648 }
1649 }
1650 Rule::default_value => {
1651 if let Some(expr) = p.into_inner().next() {
1652 default = Some(Box::new(
1653 self.build_node(expr)?
1654 .ok_or("Failed to build field default value")?,
1655 ));
1656 }
1657 }
1658 _ => {}
1659 }
1660 }
1661
1662 Ok(Some(AstNode::FieldDeclaration { name, attributes, default }))
1663 }
1664 Rule::return_statement => {
1665 let mut inner = pair.into_inner();
1666 if let Some(expr_pair) = inner.next() {
1668 if expr_pair.as_rule() != Rule::semicolon {
1669 let expr = self.build_node(expr_pair)?;
1670 Ok(Some(AstNode::ReturnStatement { value: expr.map(Box::new) }))
1671 } else {
1672 Ok(Some(AstNode::ReturnStatement { value: None }))
1673 }
1674 } else {
1675 Ok(Some(AstNode::ReturnStatement { value: None }))
1676 }
1677 }
1678 Rule::pod_section => Ok(Some(AstNode::Pod(Arc::from(pair.as_str())))),
1679 Rule::data_section => Ok(Some(AstNode::DataSection(Arc::from(pair.as_str())))),
1680 Rule::end_section => Ok(Some(AstNode::EndSection(Arc::from(pair.as_str())))),
1681 Rule::labeled_block => {
1682 let mut inner = pair.into_inner();
1683 let label_pair = inner.next().ok_or("Missing label in labeled block")?;
1684 let label = Arc::from(label_pair.as_str().trim_end_matches(':'));
1685 let block_pair = inner.next().ok_or("Missing block in labeled block")?;
1686 let block = self.build_node(block_pair)?.map(Box::new);
1687 Ok(block.map(|b| AstNode::LabeledBlock { label, block: b }))
1688 }
1689 Rule::comment => Ok(Some(AstNode::Comment(Arc::from(pair.as_str())))),
1690 Rule::semicolon | Rule::WHITESPACE => Ok(None),
1691 Rule::standalone_expression => {
1692 let inner = pair.into_inner().next().ok_or("Empty standalone expression")?;
1693 self.build_node(inner)
1694 }
1695 Rule::regex => {
1696 let mut inner = pair.into_inner();
1697 if let Some(first) = inner.next() {
1698 match first.as_rule() {
1699 Rule::match_regex => {
1700 let mut match_inner = first.into_inner();
1701 let pattern = match_inner
1702 .next()
1703 .map(|p| Arc::from(p.as_str()))
1704 .unwrap_or_else(|| Arc::from(""));
1705 let flags = match_inner
1706 .next()
1707 .map(|p| Arc::from(p.as_str()))
1708 .unwrap_or_else(|| Arc::from(""));
1709
1710 let named_groups = self.extract_named_groups(&pattern);
1712
1713 Ok(Some(AstNode::Regex { pattern, flags, named_groups }))
1714 }
1715 _ => {
1716 let pattern = Arc::from(first.as_str());
1717 let flags = inner
1718 .next()
1719 .map(|p| Arc::from(p.as_str()))
1720 .unwrap_or_else(|| Arc::from(""));
1721 let named_groups = self.extract_named_groups(&pattern);
1722 Ok(Some(AstNode::Regex { pattern, flags, named_groups }))
1723 }
1724 }
1725 } else {
1726 Ok(Some(AstNode::Regex {
1727 pattern: Arc::from(""),
1728 flags: Arc::from(""),
1729 named_groups: Vec::new(),
1730 }))
1731 }
1732 }
1733 Rule::substitution => {
1734 let inner = pair.into_inner();
1735 let mut pattern = Arc::from("");
1736 let mut replacement = Arc::from("");
1737 let mut flags = Arc::from("");
1738
1739 for p in inner {
1740 match p.as_rule() {
1741 Rule::sub_pattern => {
1742 pattern = Arc::from(p.as_str());
1743 }
1744 Rule::replacement => {
1745 replacement = Arc::from(p.as_str());
1746 }
1747 Rule::regex_flags => {
1748 flags = Arc::from(p.as_str());
1749 }
1750 _ => {}
1751 }
1752 }
1753
1754 Ok(Some(AstNode::Substitution { pattern, replacement, flags }))
1755 }
1756 Rule::transliteration => {
1757 let inner = pair.into_inner();
1758 let mut search_list = Arc::from("");
1759 let mut replace_list = Arc::from("");
1760 let mut flags = Arc::from("");
1761
1762 for p in inner {
1763 match p.as_rule() {
1764 Rule::search_list => {
1765 search_list = Arc::from(p.as_str());
1766 }
1767 Rule::replace_list => {
1768 replace_list = Arc::from(p.as_str());
1769 }
1770 Rule::trans_flags => {
1771 flags = Arc::from(p.as_str());
1772 }
1773 _ => {}
1774 }
1775 }
1776
1777 Ok(Some(AstNode::Transliteration { search_list, replace_list, flags }))
1778 }
1779 Rule::while_statement => {
1780 let inner = pair.into_inner();
1781 let mut label = None;
1782 let mut condition = None;
1783 let mut block = None;
1784
1785 for p in inner {
1786 match p.as_rule() {
1787 Rule::label => {
1788 label = Some(Arc::from(p.as_str().trim_end_matches(':')));
1789 }
1790 Rule::expression => {
1791 condition = Some(Box::new(
1792 self.build_node(p)?.unwrap_or(AstNode::EmptyExpression),
1793 ));
1794 }
1795 Rule::block => {
1796 block = Some(Box::new(
1797 self.build_node(p)?.unwrap_or(AstNode::EmptyExpression),
1798 ));
1799 }
1800 _ => {}
1801 }
1802 }
1803
1804 Ok(Some(AstNode::WhileStatement {
1805 label,
1806 condition: condition.unwrap_or_else(|| Box::new(AstNode::EmptyExpression)),
1807 block: block.unwrap_or_else(|| Box::new(AstNode::EmptyExpression)),
1808 }))
1809 }
1810 Rule::until_statement => {
1811 let inner = pair.into_inner();
1812 let mut label = None;
1813 let mut condition = None;
1814 let mut block = None;
1815
1816 for p in inner {
1817 match p.as_rule() {
1818 Rule::label => {
1819 label = Some(Arc::from(p.as_str().trim_end_matches(':')));
1820 }
1821 Rule::expression => {
1822 condition = Some(Box::new(
1823 self.build_node(p)?.unwrap_or(AstNode::EmptyExpression),
1824 ));
1825 }
1826 Rule::block => {
1827 block = Some(Box::new(
1828 self.build_node(p)?.unwrap_or(AstNode::EmptyExpression),
1829 ));
1830 }
1831 _ => {}
1832 }
1833 }
1834
1835 Ok(Some(AstNode::UntilStatement {
1836 label,
1837 condition: condition.unwrap_or_else(|| Box::new(AstNode::EmptyExpression)),
1838 block: block.unwrap_or_else(|| Box::new(AstNode::EmptyExpression)),
1839 }))
1840 }
1841 Rule::unless_statement => {
1842 let mut inner = pair.into_inner();
1843 let cond_pair = inner.next().ok_or("Missing condition in unless statement")?;
1844 let condition =
1845 Box::new(self.build_node(cond_pair)?.unwrap_or(AstNode::EmptyExpression));
1846 let block_pair = inner.next().ok_or("Missing block in unless statement")?;
1847 let block =
1848 Box::new(self.build_node(block_pair)?.unwrap_or(AstNode::EmptyExpression));
1849 let mut else_block = None;
1850
1851 if let Some(else_clause) = inner.next()
1853 && else_clause.as_rule() == Rule::else_clause
1854 {
1855 let mut else_inner = else_clause.into_inner();
1856 if let Some(else_block_pair) = else_inner.next() {
1857 else_block = Some(Box::new(
1858 self.build_node(else_block_pair)?.unwrap_or(AstNode::EmptyExpression),
1859 ));
1860 }
1861 }
1862
1863 Ok(Some(AstNode::UnlessStatement { condition, block, else_block }))
1864 }
1865 Rule::foreach_statement => {
1866 let inner = pair.into_inner();
1867 let mut label = None;
1868 let mut variable = None;
1869 let mut list = None;
1870 let mut block = None;
1871 let mut declarator = None;
1872
1873 for p in inner {
1874 match p.as_rule() {
1875 Rule::label => {
1876 label = Some(Arc::from(p.as_str().trim_end_matches(':')));
1877 }
1878 Rule::loop_variable_declarator => {
1879 declarator = Some(Arc::from(p.as_str()));
1880 }
1881 Rule::variable => {
1882 variable = Some(Box::new(
1883 self.build_node(p)?.unwrap_or(AstNode::EmptyExpression),
1884 ));
1885 }
1886 Rule::expression => {
1887 list = Some(Box::new(
1888 self.build_node(p)?.unwrap_or(AstNode::EmptyExpression),
1889 ));
1890 }
1891 Rule::block => {
1892 block = Some(Box::new(
1893 self.build_node(p)?.unwrap_or(AstNode::EmptyExpression),
1894 ));
1895 }
1896 _ => {}
1897 }
1898 }
1899
1900 let final_variable = if let Some(decl) = declarator {
1901 variable.map(|var| {
1902 Box::new(AstNode::VariableDeclaration {
1903 scope: decl,
1904 variables: vec![*var],
1905 initializer: None,
1906 })
1907 })
1908 } else {
1909 variable
1910 };
1911
1912 Ok(Some(AstNode::ForeachStatement {
1913 label,
1914 variable: final_variable,
1915 list: list.unwrap_or_else(|| Box::new(AstNode::EmptyExpression)),
1916 block: block.unwrap_or_else(|| Box::new(AstNode::EmptyExpression)),
1917 }))
1918 }
1919 Rule::for_statement => {
1920 let inner = pair.into_inner();
1921 let mut label = None;
1922 let mut init = None;
1923 let mut condition = None;
1924 let mut update = None;
1925 let mut block = None;
1926 let mut variable = None;
1927 let mut list = None;
1928 let mut declarator = None;
1929
1930 let is_c_style = inner.clone().any(|p| p.as_rule() == Rule::for_init);
1932
1933 for p in inner {
1935 match p.as_rule() {
1936 Rule::label => {
1937 label = Some(Arc::from(p.as_str().trim_end_matches(':')));
1938 }
1939 Rule::for_init => {
1940 init = self.build_node(p)?.map(Box::new);
1941 }
1942 Rule::assignment_expression => {
1943 if init.is_none() && condition.is_none() {
1945 init = self.build_node(p)?.map(Box::new);
1946 }
1947 }
1948 Rule::expression => {
1949 if is_c_style {
1950 if condition.is_none() {
1952 condition = self.build_node(p)?.map(Box::new);
1953 } else if update.is_none() {
1954 update = self.build_node(p)?.map(Box::new);
1955 }
1956 } else {
1957 list = Some(Box::new(
1959 self.build_node(p)?.unwrap_or(AstNode::EmptyExpression),
1960 ));
1961 }
1962 }
1963 Rule::variable => {
1964 variable = Some(Box::new(
1965 self.build_node(p)?.unwrap_or(AstNode::EmptyExpression),
1966 ));
1967 }
1968 Rule::loop_variable_declarator => {
1969 declarator = Some(Arc::from(p.as_str()));
1970 }
1971 Rule::block => {
1972 block = self.build_node(p)?.map(Box::new);
1973 }
1974 _ => {}
1975 }
1976 }
1977
1978 if is_c_style || init.is_some() || condition.is_some() || update.is_some() {
1979 Ok(Some(AstNode::ForStatement {
1981 label,
1982 init,
1983 condition,
1984 update,
1985 block: block.unwrap_or_else(|| Box::new(AstNode::Block(vec![]))),
1986 }))
1987 } else {
1988 let final_variable = if let Some(decl) = declarator {
1991 variable.map(|var| {
1992 Box::new(AstNode::VariableDeclaration {
1993 scope: decl,
1994 variables: vec![*var],
1995 initializer: None,
1996 })
1997 })
1998 } else {
1999 variable
2000 };
2001
2002 Ok(Some(AstNode::ForeachStatement {
2003 label,
2004 variable: final_variable,
2005 list: list.unwrap_or_else(|| Box::new(AstNode::EmptyExpression)),
2006 block: block.unwrap_or_else(|| Box::new(AstNode::Block(vec![]))),
2007 }))
2008 }
2009 }
2010 Rule::package_declaration => {
2011 let inner = pair.into_inner();
2012 let mut name = Arc::from("");
2013 let mut version = None;
2014 let mut block = None;
2015
2016 for p in inner {
2017 match p.as_rule() {
2018 Rule::qualified_name => name = Arc::from(p.as_str()),
2019 Rule::version => version = Some(Arc::from(p.as_str())),
2020 Rule::block => block = self.build_node(p)?.map(Box::new),
2021 Rule::semicolon => {}
2022 _ => {}
2023 }
2024 }
2025 Ok(Some(AstNode::PackageDeclaration { name, version, block }))
2026 }
2027 Rule::use_statement => {
2028 let inner = pair.into_inner();
2029
2030 let mut module = Arc::from("");
2031 let mut version = None;
2032 let mut import_list = Vec::new();
2033
2034 for p in inner {
2035 match p.as_rule() {
2036 Rule::module_name => module = Arc::from(p.as_str()),
2037 Rule::version => version = Some(Arc::from(p.as_str())),
2038 Rule::import_list => {
2039 for item in p.into_inner() {
2040 if item.as_rule() == Rule::import_items {
2041 for import_item in item.into_inner() {
2042 if import_item.as_rule() == Rule::import_item {
2043 import_list.push(Arc::from(import_item.as_str()));
2044 }
2045 }
2046 }
2047 }
2048 }
2049 _ => {}
2050 }
2051 }
2052
2053 Ok(Some(AstNode::UseStatement { module, version, import_list }))
2054 }
2055 Rule::require_statement => {
2056 let inner = pair.into_inner();
2057
2058 let mut module = Arc::from("");
2059
2060 for p in inner {
2061 match p.as_rule() {
2062 Rule::module_name => module = Arc::from(p.as_str()),
2063 Rule::expression => {
2064 if let Some(expr) = self.build_node(p)? {
2066 module = Arc::from(Self::node_to_sexp(&expr));
2067 }
2068 }
2069 _ => {}
2070 }
2071 }
2072
2073 Ok(Some(AstNode::RequireStatement { module }))
2074 }
2075 Rule::interpolation => {
2076 let inner = pair.into_inner().next().ok_or("Empty interpolation")?;
2078 self.build_node(inner)
2079 }
2080 Rule::complex_scalar_interpolation => {
2081 let inner = pair.into_inner().next().ok_or("Empty complex scalar interpolation")?;
2083 self.build_node(inner)
2084 }
2085 Rule::complex_array_interpolation => {
2086 let inner = pair.into_inner().next().ok_or("Empty complex array interpolation")?;
2088 self.build_node(inner)
2089 }
2090 Rule::reference => {
2091 let inner = pair.into_inner().next().ok_or("Empty reference")?;
2093 self.build_node(inner)
2094 }
2095 Rule::dereference => {
2096 let inner = pair.into_inner().next().ok_or("Empty dereference")?;
2097 self.build_node(inner)
2098 }
2099 Rule::scalar_reference => {
2100 Ok(Some(AstNode::ScalarReference(Arc::from(pair.as_str()))))
2102 }
2103 Rule::array_reference => {
2104 Ok(Some(AstNode::ArrayReference(Arc::from(pair.as_str()))))
2106 }
2107 Rule::hash_reference => {
2108 Ok(Some(AstNode::HashReference(Arc::from(pair.as_str()))))
2110 }
2111 Rule::subroutine_reference => {
2112 Ok(Some(AstNode::SubroutineReference(Arc::from(pair.as_str()))))
2114 }
2115 Rule::glob_reference => {
2116 Ok(Some(AstNode::GlobReference(Arc::from(pair.as_str()))))
2118 }
2119 Rule::scalar_dereference => self.build_dereference_node(pair, "$"),
2120 Rule::array_dereference => self.build_dereference_node(pair, "@"),
2121 Rule::hash_dereference => self.build_dereference_node(pair, "%"),
2122 Rule::code_dereference => self.build_dereference_node(pair, "&"),
2123 Rule::glob_dereference => self.build_dereference_node(pair, "*"),
2124 Rule::primary_expression => {
2125 let inner: Vec<_> = pair.into_inner().collect();
2127 if inner.is_empty() {
2128 Ok(None)
2129 } else if inner.len() == 1 {
2130 let first = inner.into_iter().next().ok_or("Expected exactly one element")?;
2131 match first.as_rule() {
2133 Rule::expression => {
2134 self.build_node(first)
2136 }
2137 _ => self.build_node(first),
2138 }
2139 } else {
2140 let mut result = None;
2142 for p in inner {
2143 if p.as_str() == "(" || p.as_str() == ")" {
2144 continue;
2146 }
2147 if let Some(node) = self.build_node(p)? {
2148 result = Some(node);
2149 }
2150 }
2151 Ok(result)
2152 }
2153 }
2154 _ => {
2155 let inner: Vec<_> = pair.into_inner().collect();
2157 if inner.is_empty() {
2158 Ok(None)
2159 } else if inner.len() == 1 {
2160 let first = inner.into_iter().next().ok_or("Expected exactly one element")?;
2161 self.build_node(first)
2162 } else {
2163 let mut nodes = Vec::new();
2164 for p in inner {
2165 if let Some(node) = self.build_node(p)? {
2166 nodes.push(node);
2167 }
2168 }
2169 if nodes.is_empty() {
2170 Ok(None)
2171 } else if nodes.len() == 1 {
2172 Ok(nodes.into_iter().next())
2173 } else {
2174 Ok(Some(AstNode::List(nodes)))
2175 }
2176 }
2177 }
2178 }
2179 }
2180
2181 fn build_expression(
2182 &mut self,
2183 pair: Pair<Rule>,
2184 ) -> Result<Option<AstNode>, Box<dyn std::error::Error>> {
2185 let inner = pair.into_inner().next().ok_or("Empty expression")?;
2186 match inner.as_rule() {
2187 Rule::assignment_expression => self.build_node(inner),
2188 Rule::ternary_expression => self.build_ternary_expression(inner),
2189 _ => self.build_node(inner),
2190 }
2191 }
2192
2193 #[inline]
2194 fn build_ternary_expression(
2195 &mut self,
2196 pair: Pair<Rule>,
2197 ) -> Result<Option<AstNode>, Box<dyn std::error::Error>> {
2198 let inner: Vec<_> = pair.into_inner().collect();
2199 if inner.len() == 1 {
2200 let first = inner.into_iter().next().ok_or("Expected exactly one element")?;
2202 self.build_node(first)
2203 } else if inner.len() == 3 {
2204 let condition = Box::new(
2205 self.build_node(inner[0].clone())?.ok_or("Failed to build ternary condition")?,
2206 );
2207 let then_expr = Box::new(
2208 self.build_node(inner[1].clone())?
2209 .ok_or("Failed to build ternary then expression")?,
2210 );
2211 let else_expr = Box::new(
2212 self.build_node(inner[2].clone())?
2213 .ok_or("Failed to build ternary else expression")?,
2214 );
2215 Ok(Some(AstNode::TernaryOp { condition, true_expr: then_expr, false_expr: else_expr }))
2216 } else {
2217 let first = inner.into_iter().next().ok_or("Expected exactly one element")?;
2218 self.build_node(first)
2219 }
2220 }
2221
2222 #[inline]
2223 fn build_binary_expression(
2224 &mut self,
2225 pair: Pair<Rule>,
2226 _op_rule: Rule,
2227 ) -> Result<Option<AstNode>, Box<dyn std::error::Error>> {
2228 let inner: Vec<_> = pair.into_inner().collect();
2229 if inner.len() == 1 {
2230 let first = inner.into_iter().next().ok_or("Expected exactly one element")?;
2231 self.build_node(first)
2232 } else if inner.len() >= 3 {
2233 self.build_binary_expr_with_precedence(inner)
2235 } else {
2236 let first = inner.into_iter().next().ok_or("Expected exactly one element")?;
2237 self.build_node(first)
2238 }
2239 }
2240
2241 fn build_binary_expr_with_precedence(
2242 &mut self,
2243 pairs: Vec<Pair<Rule>>,
2244 ) -> Result<Option<AstNode>, Box<dyn std::error::Error>> {
2245 if pairs.is_empty() {
2246 return Ok(None);
2247 }
2248
2249 let mut result = self.build_node(pairs[0].clone())?.unwrap_or(AstNode::EmptyExpression);
2251 let mut i = 1;
2252
2253 while i < pairs.len() - 1 {
2254 let op_str = pairs[i].as_str();
2256 let op = Arc::from(if op_str == "_DIV_" { "/" } else { op_str });
2257 let right = self.build_node(pairs[i + 1].clone())?.unwrap_or(AstNode::EmptyExpression);
2259
2260 result = AstNode::BinaryOp { op, left: Box::new(result), right: Box::new(right) };
2261
2262 i += 2;
2263 }
2264
2265 Ok(Some(result))
2266 }
2267
2268 fn _apply_precedence(&self, left: AstNode, op: Arc<str>, right: AstNode, _prec: u8) -> AstNode {
2269 AstNode::BinaryOp { op, left: Box::new(left), right: Box::new(right) }
2271 }
2272
2273 fn parse_arg_list(
2274 &mut self,
2275 pair: Pair<Rule>,
2276 ) -> Result<Vec<AstNode>, Box<dyn std::error::Error>> {
2277 let mut args = Vec::new();
2278 for arg in pair.into_inner() {
2279 if let Some(node) = self.build_node(arg)? {
2280 args.push(node);
2281 }
2282 }
2283 Ok(args)
2284 }
2285
2286 fn build_dereference_node(
2287 &mut self,
2288 pair: Pair<Rule>,
2289 deref_type: &'static str,
2290 ) -> Result<Option<AstNode>, Box<dyn std::error::Error>> {
2291 let mut inner = pair.into_inner();
2292 let expr = if let Some(inner_pair) = inner.next() {
2293 match inner_pair.as_rule() {
2294 Rule::expression => {
2295 Box::new(self.build_node(inner_pair)?.unwrap_or(AstNode::EmptyExpression))
2296 }
2297 Rule::variable_name => {
2298 let variable = Arc::<str>::from(format!("${}", inner_pair.as_str()));
2299 Box::new(AstNode::ScalarVariable(variable))
2300 }
2301 _ => Box::new(self.build_node(inner_pair)?.unwrap_or(AstNode::EmptyExpression)),
2302 }
2303 } else {
2304 Box::new(AstNode::EmptyExpression)
2305 };
2306
2307 Ok(Some(AstNode::Dereference { expr, deref_type: Arc::from(deref_type) }))
2308 }
2309
2310 pub fn to_sexp(&self, node: &AstNode) -> String {
2311 Self::node_to_sexp(node)
2312 }
2313
2314 pub fn node_to_sexp(node: &AstNode) -> String {
2315 match node {
2316 AstNode::Program(children) => {
2317 let mut flat_children = vec![];
2319 for c in children {
2320 let sexp = Self::node_to_sexp(c);
2321 if sexp.starts_with("(source_file ") {
2322 let inner = sexp.trim_start_matches("(source_file ").trim_end_matches(")");
2323 flat_children.push(inner.to_string());
2324 } else {
2325 flat_children.push(sexp);
2326 }
2327 }
2328 if flat_children.is_empty() {
2329 "(source_file)".to_string()
2330 } else {
2331 format!("(source_file {})", flat_children.join(" "))
2332 }
2333 }
2334 AstNode::Statement(expr) => Self::node_to_sexp(expr),
2335 AstNode::Block(statements) => {
2336 let stmt_sexps: Vec<String> = statements.iter().map(Self::node_to_sexp).collect();
2337 format!("(block {})", stmt_sexps.join(" "))
2338 }
2339 AstNode::VariableDeclaration { scope, variables, initializer } => {
2340 let var_sexps: Vec<String> = variables.iter().map(Self::node_to_sexp).collect();
2341 if let Some(init) = initializer {
2342 format!(
2343 "(variable_declaration {} {} = {})",
2344 scope,
2345 var_sexps.join(" "),
2346 Self::node_to_sexp(init)
2347 )
2348 } else {
2349 format!("(variable_declaration {} {})", scope, var_sexps.join(" "))
2350 }
2351 }
2352 AstNode::SubDeclaration { name, prototype, body, .. } => {
2353 let mut parts = vec![format!("(identifier {})", name)];
2354 if let Some(proto) = prototype {
2355 parts.push(format!("(signature {})", proto));
2356 }
2357 parts.push(Self::node_to_sexp(body));
2358 format!("(subroutine {})", parts.join(" "))
2359 }
2360 AstNode::AnonymousSub { body, .. } => {
2361 format!("(anonymous_subroutine {})", Self::node_to_sexp(body))
2362 }
2363 AstNode::FormatDeclaration { name, format_lines } => {
2364 let lines_sexp = format_lines
2365 .iter()
2366 .map(|line| format!("(format_line \"{}\")", line.replace("\"", "\\\"")))
2367 .collect::<Vec<_>>()
2368 .join(" ");
2369 if name.is_empty() {
2370 format!("(format_declaration {})", lines_sexp)
2371 } else {
2372 format!("(format_declaration (identifier {}) {})", name, lines_sexp)
2373 }
2374 }
2375 AstNode::IfStatement { condition, then_block, .. } => {
2376 format!(
2377 "(if_statement {} {})",
2378 Self::node_to_sexp(condition),
2379 Self::node_to_sexp(then_block)
2380 )
2381 }
2382 AstNode::GivenStatement { expression, when_clauses, default_block } => {
2383 let mut result = format!("(given_statement {}", Self::node_to_sexp(expression));
2384 for (cond, block) in when_clauses {
2385 result.push_str(&format!(
2386 " (when_clause {} {})",
2387 Self::node_to_sexp(cond),
2388 Self::node_to_sexp(block)
2389 ));
2390 }
2391 if let Some(default) = default_block {
2392 result.push_str(&format!(" (default_clause {})", Self::node_to_sexp(default)));
2393 }
2394 result.push(')');
2395 result
2396 }
2397 AstNode::TieStatement { variable, class, args } => {
2398 let args_str = args.iter().map(Self::node_to_sexp).collect::<Vec<_>>().join(" ");
2399 format!(
2400 "(tie_statement {} {} {})",
2401 Self::node_to_sexp(variable),
2402 Self::node_to_sexp(class),
2403 args_str
2404 )
2405 }
2406 AstNode::UntieStatement { variable } => {
2407 format!("(untie_statement {})", Self::node_to_sexp(variable))
2408 }
2409 AstNode::TiedExpression { variable } => {
2410 format!("(tied_expression {})", Self::node_to_sexp(variable))
2411 }
2412 AstNode::PostfixDereference { expr, deref_type } => {
2413 format!("(postfix_deref {} {})", Self::node_to_sexp(expr), deref_type)
2414 }
2415 AstNode::Dereference { expr, deref_type } => {
2416 format!("(dereference {} {})", Self::node_to_sexp(expr), deref_type)
2417 }
2418 AstNode::TypeglobSlotAccess { typeglob, slot } => {
2419 format!("(typeglob_slot_access {} {})", Self::node_to_sexp(typeglob), slot)
2420 }
2421 AstNode::ArrayAccess { array, index } => {
2422 format!(
2423 "(array_access {} {})",
2424 Self::node_to_sexp(array),
2425 Self::node_to_sexp(index)
2426 )
2427 }
2428 AstNode::HashAccess { hash, key } => {
2429 format!("(hash_access {} {})", Self::node_to_sexp(hash), Self::node_to_sexp(key))
2430 }
2431 AstNode::MethodCall { object, method, args } => {
2432 let args_str = if args.is_empty() {
2433 "( )".to_string()
2434 } else {
2435 format!(
2436 "( {} )",
2437 args.iter().map(Self::node_to_sexp).collect::<Vec<_>>().join(" ")
2438 )
2439 };
2440 format!(
2441 "(method_call_expression {} -> (method {}) {})",
2442 Self::node_to_sexp(object),
2443 method,
2444 args_str
2445 )
2446 }
2447 AstNode::Assignment { target, op, value } => {
2448 format!(
2449 "(assignment {} ({}) {})",
2450 Self::node_to_sexp(target),
2451 op,
2452 Self::node_to_sexp(value)
2453 )
2454 }
2455 AstNode::FunctionCall { function, args } => {
2456 let args_str = if args.is_empty() {
2457 "".to_string()
2458 } else {
2459 format!(
2460 " {}",
2461 args.iter().map(Self::node_to_sexp).collect::<Vec<_>>().join(" ")
2462 )
2463 };
2464 format!("(function_call {}{})", Self::node_to_sexp(function), args_str)
2465 }
2466 AstNode::BuiltinListOp { name, args } => {
2467 let args_str = if args.is_empty() {
2468 "".to_string()
2469 } else {
2470 format!(
2471 " {}",
2472 args.iter().map(Self::node_to_sexp).collect::<Vec<_>>().join(" ")
2473 )
2474 };
2475 format!("(function_call (identifier {}){})", name, args_str)
2476 }
2477 AstNode::BinaryOp { op, left, right } => {
2478 format!(
2479 "(binary_expression {} ({}) {})",
2480 Self::node_to_sexp(left),
2481 op,
2482 Self::node_to_sexp(right)
2483 )
2484 }
2485 AstNode::ScalarVariable(name) => {
2486 format!("(scalar_variable {})", name)
2487 }
2488 AstNode::ArrayVariable(name) => {
2489 format!("(array_variable {})", name)
2490 }
2491 AstNode::HashVariable(name) => {
2492 format!("(hash_variable {})", name)
2493 }
2494 AstNode::TypeglobVariable(name) => {
2495 format!("(typeglob_variable {})", name)
2496 }
2497 AstNode::ScalarReference(name) => {
2498 format!("(scalar_reference {})", name)
2499 }
2500 AstNode::ArrayReference(name) => {
2501 format!("(array_reference {})", name)
2502 }
2503 AstNode::HashReference(name) => {
2504 format!("(hash_reference {})", name)
2505 }
2506 AstNode::SubroutineReference(name) => {
2507 format!("(subroutine_reference {})", name)
2508 }
2509 AstNode::GlobReference(name) => {
2510 format!("(glob_reference {})", name)
2511 }
2512 AstNode::Number(value) => {
2513 format!("(number {})", value)
2514 }
2515 AstNode::String(value) => {
2516 format!("(string_literal {})", value)
2517 }
2518 AstNode::Bareword(value) => {
2519 format!("(bareword {})", value)
2520 }
2521 AstNode::Regex { pattern, flags, .. } => {
2522 if flags.is_empty() {
2523 format!("(regex /{}/ )", pattern)
2524 } else {
2525 format!("(regex /{}/{} )", pattern, flags)
2526 }
2527 }
2528 AstNode::InterpolatedString(parts) => {
2529 let parts_str = parts.iter().map(Self::node_to_sexp).collect::<Vec<_>>().join(" ");
2530 format!("(interpolated_string {})", parts_str)
2531 }
2532 AstNode::Identifier(name) => {
2533 format!("(identifier {})", name)
2534 }
2535 AstNode::SpecialLiteral(name) => {
2536 format!("(special_literal {})", name)
2537 }
2538 AstNode::EmptyExpression => "(empty_expression)".to_string(),
2539 AstNode::Comment(content) => {
2540 format!("(comment {})", content)
2541 }
2542 AstNode::List(items) => {
2543 let item_sexps: Vec<String> = items.iter().map(Self::node_to_sexp).collect();
2544 item_sexps.join(" ")
2545 }
2546 AstNode::ArrayRef(items) => {
2547 let item_sexps: Vec<String> = items.iter().map(Self::node_to_sexp).collect();
2548 format!("(array_ref {})", item_sexps.join(" "))
2549 }
2550 AstNode::HashRef(items) => {
2551 let item_sexps: Vec<String> = items.iter().map(Self::node_to_sexp).collect();
2552 format!("(hash_ref {})", item_sexps.join(" "))
2553 }
2554 AstNode::WhileStatement { label, condition, block } => {
2555 let label_str =
2556 if let Some(l) = label { format!(" (label {})", l) } else { String::new() };
2557 format!(
2558 "(while_statement{} {} {})",
2559 label_str,
2560 Self::node_to_sexp(condition),
2561 Self::node_to_sexp(block)
2562 )
2563 }
2564 AstNode::UntilStatement { label, condition, block } => {
2565 let label_str =
2566 if let Some(l) = label { format!(" (label {})", l) } else { String::new() };
2567 format!(
2568 "(until_statement{} {} {})",
2569 label_str,
2570 Self::node_to_sexp(condition),
2571 Self::node_to_sexp(block)
2572 )
2573 }
2574 AstNode::UnlessStatement { condition, block, else_block } => {
2575 let else_str = if let Some(e) = else_block {
2576 format!(" (else {})", Self::node_to_sexp(e))
2577 } else {
2578 String::new()
2579 };
2580 format!(
2581 "(unless_statement {} {}{}",
2582 Self::node_to_sexp(condition),
2583 Self::node_to_sexp(block),
2584 else_str
2585 )
2586 }
2587 AstNode::ForStatement { init, condition, update, block, .. } => {
2588 let mut parts = vec![];
2589 if let Some(i) = init {
2590 parts.push(format!("(init {})", Self::node_to_sexp(i)));
2591 }
2592 if let Some(c) = condition {
2593 parts.push(format!("(condition {})", Self::node_to_sexp(c)));
2594 }
2595 if let Some(u) = update {
2596 parts.push(format!("(update {})", Self::node_to_sexp(u)));
2597 }
2598 parts.push(format!("(body {})", Self::node_to_sexp(block)));
2599 format!("(for_statement {})", parts.join(" "))
2600 }
2601 AstNode::ForeachStatement { label, variable, list, block } => {
2602 let label_str =
2603 if let Some(l) = label { format!(" (label {})", l) } else { String::new() };
2604 let var_str = if let Some(v) = variable {
2605 format!(" (variable {})", Self::node_to_sexp(v))
2606 } else {
2607 String::new()
2608 };
2609 format!(
2610 "(foreach_statement{}{} {} {})",
2611 label_str,
2612 var_str,
2613 Self::node_to_sexp(list),
2614 Self::node_to_sexp(block)
2615 )
2616 }
2617 AstNode::PackageDeclaration { name, version, block } => {
2618 let mut parts = vec![format!("(name {})", name)];
2619 if let Some(v) = version {
2620 parts.push(format!("(version {})", v));
2621 }
2622 if let Some(b) = block {
2623 parts.push(format!("(body {})", Self::node_to_sexp(b)));
2624 }
2625 format!("(package_declaration {})", parts.join(" "))
2626 }
2627 AstNode::UseStatement { module, version, import_list } => {
2628 let mut parts = vec![format!("use (package {})", module)];
2629 if let Some(v) = version {
2630 parts.push(format!("(version {})", v));
2631 }
2632 if !import_list.is_empty() {
2633 parts.push(format!("(import_list {})", import_list.join(" ")));
2634 }
2635 parts.push(";".to_string());
2636 format!("(use_statement {})", parts.join(" "))
2637 }
2638 AstNode::RequireStatement { module } => {
2639 format!("(require_statement require (package {}) ;)", module)
2640 }
2641 AstNode::Substitution { pattern, replacement, flags } => {
2642 if flags.is_empty() {
2643 format!("(substitution s/{}/{}/ )", pattern, replacement)
2644 } else {
2645 format!("(substitution s/{}/{}/{} )", pattern, replacement, flags)
2646 }
2647 }
2648 AstNode::Transliteration { search_list, replace_list, flags } => {
2649 if flags.is_empty() {
2650 format!("(transliteration tr/{}/{}/ )", search_list, replace_list)
2651 } else {
2652 format!("(transliteration tr/{}/{}/{} )", search_list, replace_list, flags)
2653 }
2654 }
2655 AstNode::BeginBlock(block) => {
2656 format!("(begin_block {})", Self::node_to_sexp(block))
2657 }
2658 AstNode::EndBlock(block) => {
2659 format!("(end_block {})", Self::node_to_sexp(block))
2660 }
2661 AstNode::CheckBlock(block) => {
2662 format!("(check_block {})", Self::node_to_sexp(block))
2663 }
2664 AstNode::InitBlock(block) => {
2665 format!("(init_block {})", Self::node_to_sexp(block))
2666 }
2667 AstNode::UnitcheckBlock(block) => {
2668 format!("(unitcheck_block {})", Self::node_to_sexp(block))
2669 }
2670 AstNode::QwList(words) => {
2671 let word_list =
2672 words.iter().map(|w| format!("(word {})", w)).collect::<Vec<_>>().join(" ");
2673 format!("(qw_list {})", word_list)
2674 }
2675 AstNode::DoBlock(expr) => {
2676 format!("(do_block {})", Self::node_to_sexp(expr))
2677 }
2678 AstNode::EvalBlock(block) => {
2679 format!("(eval_block {})", Self::node_to_sexp(block))
2680 }
2681 AstNode::EvalString(expr) => {
2682 format!("(eval_string {})", Self::node_to_sexp(expr))
2683 }
2684 AstNode::GotoStatement { target } => {
2685 format!("(goto_statement {})", target)
2686 }
2687 AstNode::ReturnStatement { value } => {
2688 if let Some(v) = value {
2689 format!("(return_statement {})", Self::node_to_sexp(v))
2690 } else {
2691 "(return_statement)".to_string()
2692 }
2693 }
2694 AstNode::LabeledBlock { label, block } => {
2695 format!("(labeled_block {} {})", label, Self::node_to_sexp(block))
2696 }
2697 AstNode::Heredoc { marker, indented, quoted, content } => {
2698 let flags = format!(
2699 "{}{}",
2700 if *indented { "~" } else { "" },
2701 if *quoted { "'" } else { "" }
2702 );
2703 format!("(heredoc {} {} \"{}\")", marker, flags, content.escape_default())
2704 }
2705 AstNode::Pod(content) => {
2706 format!("(pod {})", content)
2707 }
2708 AstNode::DataSection(content) => {
2709 format!("(data_section {})", content)
2710 }
2711 AstNode::EndSection(content) => {
2712 format!("(end_section {})", content)
2713 }
2714 AstNode::Glob(pattern) => {
2715 format!("(glob <{}>)", pattern)
2716 }
2717 AstNode::QqString(content) => {
2718 format!("(string_literal {})", content)
2719 }
2720 AstNode::QxString(content) => {
2721 format!("(command_substitution {})", content)
2722 }
2723 AstNode::Readline { filehandle } => {
2724 if let Some(fh) = filehandle {
2725 format!("(readline <{}>)", fh)
2726 } else {
2727 "(readline <>)".to_string()
2728 }
2729 }
2730 AstNode::TryCatch { try_block, catch_clauses, finally_block } => {
2731 let mut result = format!("(try_catch_statement {}", Self::node_to_sexp(try_block));
2732 for (param, block) in catch_clauses {
2733 if let Some(p) = param {
2734 result.push_str(&format!(" (catch ({}) {})", p, Self::node_to_sexp(block)));
2735 } else {
2736 result.push_str(&format!(" (catch {})", Self::node_to_sexp(block)));
2737 }
2738 }
2739 if let Some(finally) = finally_block {
2740 result.push_str(&format!(" (finally {})", Self::node_to_sexp(finally)));
2741 }
2742 result.push(')');
2743 result
2744 }
2745 AstNode::DeferStatement(block) => {
2746 format!("(defer_statement {})", Self::node_to_sexp(block))
2747 }
2748 AstNode::ClassDeclaration { name, version, superclass, body } => {
2749 let mut result = format!("(class_declaration {}", name);
2750 if let Some(v) = version {
2751 result.push_str(&format!(" (version {})", v));
2752 }
2753 if let Some(s) = superclass {
2754 result.push_str(&format!(" (superclass {})", s));
2755 }
2756 for member in body {
2757 result.push_str(&format!(" {}", Self::node_to_sexp(member)));
2758 }
2759 result.push(')');
2760 result
2761 }
2762 AstNode::MethodDeclaration { name, signature, attributes, body } => {
2763 let mut result = format!("(method_declaration {}", name);
2764 if let Some(sig) = signature {
2765 result.push_str(&format!(" (signature {})", sig));
2766 }
2767 if !attributes.is_empty() {
2768 result.push_str(&format!(" (attributes {})", attributes.join(" ")));
2769 }
2770 result.push_str(&format!(" {}", Self::node_to_sexp(body)));
2771 result.push(')');
2772 result
2773 }
2774 AstNode::FieldDeclaration { name, attributes, default } => {
2775 let mut result = format!("(field_declaration {}", name);
2776 if !attributes.is_empty() {
2777 result.push_str(&format!(" (attributes {})", attributes.join(" ")));
2778 }
2779 if let Some(d) = default {
2780 result.push_str(&format!(" (default {})", Self::node_to_sexp(d)));
2781 }
2782 result.push(')');
2783 result
2784 }
2785 AstNode::RoleDeclaration { name, body } => {
2786 format!("(role_declaration {} {})", name, Self::node_to_sexp(body))
2787 }
2788 _ => format!("(unhandled_node {:?})", node),
2789 }
2790 }
2791}
2792
2793impl Default for PureRustPerlParser {
2794 fn default() -> Self {
2795 Self::new()
2796 }
2797}
2798
2799#[cfg(test)]
2800mod tests {
2801 use super::*;
2802 use perl_tdd_support::must;
2803
2804 #[test]
2805 fn test_basic_parsing() {
2806 let mut parser = PureRustPerlParser::new();
2807 let source = "$var";
2808 let result = parser.parse(source);
2809 assert!(result.is_ok());
2810 let ast = must(result);
2811 let sexp = parser.to_sexp(&ast);
2812 println!("AST: {:?}", ast);
2813 println!("S-expression: {}", sexp);
2814 }
2815
2816 #[test]
2817 fn test_variable_parsing() {
2818 use perl_tdd_support::must;
2819 let mut parser = PureRustPerlParser::new();
2820 let source = "$scalar @array %hash";
2821 let result = parser.parse(source);
2822 assert!(result.is_ok());
2823 let ast = must(result);
2824 let sexp = parser.to_sexp(&ast);
2825 println!("S-expression: {}", sexp);
2826 }
2827
2828 #[test]
2829 fn test_assignment_parsing() {
2830 let mut parser = PureRustPerlParser::new();
2831 let source = "my $var = 42;";
2832 let result = parser.parse(source);
2833 let ast = must(result);
2834 let sexp = parser.to_sexp(&ast);
2835 println!("Success! AST: {:?}", ast);
2836 println!("S-expression: {}", sexp);
2837 }
2838
2839 #[test]
2840 fn test_function_declaration() {
2841 let mut parser = PureRustPerlParser::new();
2842 let source = "sub hello { print 'Hello'; }";
2843 let result = parser.parse(source);
2844 let ast = must(result);
2845 let sexp = parser.to_sexp(&ast);
2846 println!("S-expression: {}", sexp);
2847 }
2848
2849 #[test]
2850 fn test_if_statement() {
2851 let mut parser = PureRustPerlParser::new();
2852 let source = "if ($x > 0) { print 'positive'; }";
2853 let result = parser.parse(source);
2854 let ast = must(result);
2855 let sexp = parser.to_sexp(&ast);
2856 println!("S-expression: {}", sexp);
2857 }
2858
2859 #[test]
2860 fn test_regression_percent_string_in_if_assignment() {
2861 let mut parser = PureRustPerlParser::new();
2862 let source = r#"if ($a > 0) { $a = "%"; }"#;
2863 let result = parser.parse(source);
2864 assert!(result.is_ok(), "Failed to parse regression input: {source}");
2865 }
2866
2867 #[test]
2868 fn test_array_assignment() {
2869 let mut parser = PureRustPerlParser::new();
2870 let source = "@array = (1, 2, 3);";
2871 let result = parser.parse(source);
2872 let ast = must(result);
2873 let sexp = parser.to_sexp(&ast);
2874 println!("Array assignment AST: {:?}", ast);
2875 println!("S-expression: {}", sexp);
2876 }
2877
2878 #[test]
2879 fn test_hash_assignment() {
2880 let mut parser = PureRustPerlParser::new();
2881 let source = "%hash = (a => 1, b => 2);";
2882 let result = parser.parse(source);
2883 let ast = must(result);
2884 let sexp = parser.to_sexp(&ast);
2885 println!("Hash assignment AST: {:?}", ast);
2886 println!("S-expression: {}", sexp);
2887 }
2888}