Skip to main content

perl_parser_pest/
pratt_parser.rs

1use crate::pure_rust_parser::AstNode;
2use crate::pure_rust_parser::Rule;
3use pest::iterators::Pair;
4use std::collections::HashMap;
5use std::sync::Arc;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
8pub struct Precedence(pub u8);
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum Associativity {
12    Left,
13    Right,
14    None,
15}
16
17pub struct OpInfo {
18    pub precedence: Precedence,
19    pub associativity: Associativity,
20}
21
22pub struct PrattParser {
23    operators: HashMap<&'static str, OpInfo>,
24}
25
26impl Default for PrattParser {
27    fn default() -> Self {
28        Self::new()
29    }
30}
31
32impl PrattParser {
33    pub fn new() -> Self {
34        let mut operators = HashMap::new();
35
36        // Perl operator precedence (from lowest to highest)
37        // Level 1: List operators (rightward)
38        operators
39            .insert(",", OpInfo { precedence: Precedence(1), associativity: Associativity::Left });
40        operators
41            .insert("=>", OpInfo { precedence: Precedence(1), associativity: Associativity::Left });
42
43        // Level 2: List operators (leftward)
44        // These are handled specially in Perl
45
46        // Level 3: Assignment operators
47        operators
48            .insert("=", OpInfo { precedence: Precedence(3), associativity: Associativity::Right });
49        operators.insert(
50            "+=",
51            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
52        );
53        operators.insert(
54            "-=",
55            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
56        );
57        operators.insert(
58            "*=",
59            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
60        );
61        operators.insert(
62            "/=",
63            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
64        );
65        operators.insert(
66            "%=",
67            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
68        );
69        operators.insert(
70            "**=",
71            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
72        );
73        operators.insert(
74            "&=",
75            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
76        );
77        operators.insert(
78            "|=",
79            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
80        );
81        operators.insert(
82            "^=",
83            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
84        );
85        operators.insert(
86            "&.=",
87            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
88        );
89        operators.insert(
90            "|.=",
91            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
92        );
93        operators.insert(
94            "^.=",
95            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
96        );
97        operators.insert(
98            "<<=",
99            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
100        );
101        operators.insert(
102            ">>=",
103            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
104        );
105        operators.insert(
106            ".=",
107            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
108        );
109        operators.insert(
110            "//=",
111            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
112        );
113        operators.insert(
114            "&&=",
115            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
116        );
117        operators.insert(
118            "||=",
119            OpInfo { precedence: Precedence(3), associativity: Associativity::Right },
120        );
121
122        // Level 4: Ternary conditional
123        operators
124            .insert("?", OpInfo { precedence: Precedence(4), associativity: Associativity::Right });
125        operators
126            .insert(":", OpInfo { precedence: Precedence(4), associativity: Associativity::Right });
127
128        // Level 5: Range operators
129        operators
130            .insert("..", OpInfo { precedence: Precedence(5), associativity: Associativity::None });
131        operators.insert(
132            "...",
133            OpInfo { precedence: Precedence(5), associativity: Associativity::None },
134        );
135
136        // Level 6: Logical or
137        operators
138            .insert("||", OpInfo { precedence: Precedence(6), associativity: Associativity::Left });
139
140        // Level 7: Defined-or
141        operators
142            .insert("//", OpInfo { precedence: Precedence(7), associativity: Associativity::Left });
143
144        // Level 8: Logical and
145        operators
146            .insert("&&", OpInfo { precedence: Precedence(8), associativity: Associativity::Left });
147
148        // Level 9: Low precedence logical or/xor/and
149        operators
150            .insert("or", OpInfo { precedence: Precedence(9), associativity: Associativity::Left });
151        operators.insert(
152            "xor",
153            OpInfo { precedence: Precedence(9), associativity: Associativity::Left },
154        );
155
156        // Level 10: Low precedence logical and
157        operators.insert(
158            "and",
159            OpInfo { precedence: Precedence(10), associativity: Associativity::Left },
160        );
161
162        // Level 11: Low precedence not
163        operators.insert(
164            "not",
165            OpInfo { precedence: Precedence(11), associativity: Associativity::Right },
166        );
167
168        // Level 12: Comma and list operators
169        // Already added above
170
171        // Level 13: Named unary operators
172        // These are prefix operators handled separately
173
174        // Level 14: Relational operators
175        operators
176            .insert("<", OpInfo { precedence: Precedence(14), associativity: Associativity::None });
177        operators
178            .insert(">", OpInfo { precedence: Precedence(14), associativity: Associativity::None });
179        operators.insert(
180            "<=",
181            OpInfo { precedence: Precedence(14), associativity: Associativity::None },
182        );
183        operators.insert(
184            ">=",
185            OpInfo { precedence: Precedence(14), associativity: Associativity::None },
186        );
187        operators.insert(
188            "lt",
189            OpInfo { precedence: Precedence(14), associativity: Associativity::None },
190        );
191        operators.insert(
192            "gt",
193            OpInfo { precedence: Precedence(14), associativity: Associativity::None },
194        );
195        operators.insert(
196            "le",
197            OpInfo { precedence: Precedence(14), associativity: Associativity::None },
198        );
199        operators.insert(
200            "ge",
201            OpInfo { precedence: Precedence(14), associativity: Associativity::None },
202        );
203
204        // Level 15: Equality operators
205        operators.insert(
206            "==",
207            OpInfo { precedence: Precedence(15), associativity: Associativity::None },
208        );
209        operators.insert(
210            "!=",
211            OpInfo { precedence: Precedence(15), associativity: Associativity::None },
212        );
213        operators.insert(
214            "<=>",
215            OpInfo { precedence: Precedence(15), associativity: Associativity::None },
216        );
217        operators.insert(
218            "eq",
219            OpInfo { precedence: Precedence(15), associativity: Associativity::None },
220        );
221        operators.insert(
222            "ne",
223            OpInfo { precedence: Precedence(15), associativity: Associativity::None },
224        );
225        operators.insert(
226            "cmp",
227            OpInfo { precedence: Precedence(15), associativity: Associativity::None },
228        );
229        operators.insert(
230            "~~",
231            OpInfo { precedence: Precedence(15), associativity: Associativity::None },
232        );
233
234        // Level 16: ISA operator
235        operators.insert(
236            "isa",
237            OpInfo { precedence: Precedence(16), associativity: Associativity::None },
238        );
239
240        // Level 17: Bitwise and
241        operators
242            .insert("&", OpInfo { precedence: Precedence(17), associativity: Associativity::Left });
243        operators.insert(
244            "&.",
245            OpInfo { precedence: Precedence(17), associativity: Associativity::Left },
246        );
247
248        // Level 18: Bitwise or/xor
249        operators
250            .insert("|", OpInfo { precedence: Precedence(18), associativity: Associativity::Left });
251        operators
252            .insert("^", OpInfo { precedence: Precedence(18), associativity: Associativity::Left });
253        operators.insert(
254            "|.",
255            OpInfo { precedence: Precedence(18), associativity: Associativity::Left },
256        );
257        operators.insert(
258            "^.",
259            OpInfo { precedence: Precedence(18), associativity: Associativity::Left },
260        );
261
262        // Level 19: C-style logical and
263        // Already added &&
264
265        // Level 20: C-style logical or
266        // Already added ||
267
268        // Level 21: Range
269        // Already added .. and ...
270
271        // Level 22: Additive operators
272        operators
273            .insert("+", OpInfo { precedence: Precedence(22), associativity: Associativity::Left });
274        operators
275            .insert("-", OpInfo { precedence: Precedence(22), associativity: Associativity::Left });
276        operators
277            .insert(".", OpInfo { precedence: Precedence(22), associativity: Associativity::Left });
278
279        // Level 23: Multiplicative operators
280        operators
281            .insert("*", OpInfo { precedence: Precedence(23), associativity: Associativity::Left });
282        operators
283            .insert("/", OpInfo { precedence: Precedence(23), associativity: Associativity::Left });
284        operators
285            .insert("%", OpInfo { precedence: Precedence(23), associativity: Associativity::Left });
286        operators
287            .insert("x", OpInfo { precedence: Precedence(23), associativity: Associativity::Left });
288
289        // Level 24: Shift operators
290        operators.insert(
291            "<<",
292            OpInfo { precedence: Precedence(24), associativity: Associativity::Left },
293        );
294        operators.insert(
295            ">>",
296            OpInfo { precedence: Precedence(24), associativity: Associativity::Left },
297        );
298
299        // Level 25: Named unary operators and filetest operators
300        // These are prefix operators
301
302        // Level 26: Bitwise not
303        operators.insert(
304            "~",
305            OpInfo { precedence: Precedence(26), associativity: Associativity::Right },
306        );
307        operators.insert(
308            "~.",
309            OpInfo { precedence: Precedence(26), associativity: Associativity::Right },
310        );
311
312        // Level 27: Unary plus/minus and logical negation
313        // These are prefix operators
314
315        // Level 28: Exponentiation
316        operators.insert(
317            "**",
318            OpInfo { precedence: Precedence(28), associativity: Associativity::Right },
319        );
320
321        // Level 29: Pattern match and substitution
322        operators.insert(
323            "=~",
324            OpInfo { precedence: Precedence(29), associativity: Associativity::Left },
325        );
326        operators.insert(
327            "!~",
328            OpInfo { precedence: Precedence(29), associativity: Associativity::Left },
329        );
330
331        // Level 30: Dereference and postfix operators
332        // These are handled specially
333
334        PrattParser { operators }
335    }
336
337    pub fn get_operator_info(&self, op: &str) -> Option<&OpInfo> {
338        self.operators.get(op)
339    }
340
341    pub fn is_prefix_operator(op: &str) -> bool {
342        matches!(
343            op,
344            "!" | "not"
345                | "~"
346                | "~."
347                | "+"
348                | "-"
349                | "++"
350                | "--"
351                | "\\"
352                | "defined"
353                | "undef"
354                | "scalar"
355                | "my"
356                | "our"
357                | "local"
358                | "state"
359        )
360    }
361
362    pub fn is_postfix_operator(op: &str) -> bool {
363        matches!(op, "++" | "--")
364    }
365
366    pub fn parse_expression_from_pairs<'a>(
367        &self,
368        pairs: Vec<Pair<'a, Rule>>,
369        parser: &mut crate::pure_rust_parser::PureRustPerlParser,
370    ) -> Result<AstNode, Box<dyn std::error::Error>> {
371        if pairs.is_empty() {
372            return Err("Empty expression".into());
373        }
374
375        // Simple implementation for now - handle binary expressions
376        if pairs.len() == 1 {
377            // Single element, just parse it
378            parser
379                .build_node(pairs.into_iter().next().ok_or(crate::error::ParseError::ParseFailed)?)?
380                .ok_or_else(|| "Failed to parse".into())
381        } else if pairs.len() >= 3 {
382            // Binary expression - use precedence parsing
383            self.parse_binary_expr(pairs, 0, parser)
384        } else {
385            // Fallback
386            parser
387                .build_node(pairs.into_iter().next().ok_or(crate::error::ParseError::ParseFailed)?)?
388                .ok_or_else(|| "Failed to parse".into())
389        }
390    }
391
392    fn parse_binary_expr(
393        &self,
394        pairs: Vec<Pair<'_, Rule>>,
395        index: usize,
396        parser: &mut crate::pure_rust_parser::PureRustPerlParser,
397    ) -> Result<AstNode, Box<dyn std::error::Error>> {
398        if index >= pairs.len() {
399            return Err("Invalid expression".into());
400        }
401
402        // Parse left operand
403        let mut left =
404            parser.build_node(pairs[index].clone())?.ok_or("Failed to parse left operand")?;
405
406        let mut i = index + 1;
407        while i + 1 < pairs.len() {
408            // Get operator
409            let op = pairs[i].as_str();
410
411            // Get operator info
412            if let Some(_op_info) = self.get_operator_info(op) {
413                // Parse right operand
414                let right = parser
415                    .build_node(pairs[i + 1].clone())?
416                    .ok_or("Failed to parse right operand")?;
417
418                // Create binary op node
419                left = AstNode::BinaryOp {
420                    op: Arc::from(op),
421                    left: Box::new(left),
422                    right: Box::new(right),
423                };
424
425                i += 2;
426            } else {
427                break;
428            }
429        }
430
431        Ok(left)
432    }
433}