Skip to main content

pipa/compiler/
mir.rs

1use crate::compiler::ast::*;
2use crate::util::FxHashMap;
3
4#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
5pub struct ValueId(pub u32);
6
7#[derive(Debug, Clone, PartialEq, Eq, Hash)]
8pub enum MirConst {
9    NumberBits(u64),
10    String(String),
11    Boolean(bool),
12    Null,
13    Undefined,
14    BigInt(String),
15}
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
18pub enum MirUnaryOp {
19    Minus,
20    Plus,
21    Not,
22    BitNot,
23    TypeOf,
24    Void,
25    Delete,
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
29pub enum MirBinaryOp {
30    Add,
31    Sub,
32    Mul,
33    Div,
34    Mod,
35    Pow,
36    Eq,
37    Neq,
38    StrictEq,
39    StrictNeq,
40    Lt,
41    Lte,
42    Gt,
43    Gte,
44    BitAnd,
45    BitOr,
46    BitXor,
47    Shl,
48    Shr,
49    UShr,
50    In,
51    InstanceOf,
52}
53
54impl From<&UnaryOp> for MirUnaryOp {
55    fn from(value: &UnaryOp) -> Self {
56        match value {
57            UnaryOp::Minus => Self::Minus,
58            UnaryOp::Plus => Self::Plus,
59            UnaryOp::Not => Self::Not,
60            UnaryOp::BitNot => Self::BitNot,
61            UnaryOp::TypeOf => Self::TypeOf,
62            UnaryOp::Void => Self::Void,
63            UnaryOp::Delete => Self::Delete,
64        }
65    }
66}
67
68impl From<&BinaryOp> for MirBinaryOp {
69    fn from(value: &BinaryOp) -> Self {
70        match value {
71            BinaryOp::Add => Self::Add,
72            BinaryOp::Sub => Self::Sub,
73            BinaryOp::Mul => Self::Mul,
74            BinaryOp::Div => Self::Div,
75            BinaryOp::Mod => Self::Mod,
76            BinaryOp::Pow => Self::Pow,
77            BinaryOp::Eq => Self::Eq,
78            BinaryOp::Neq => Self::Neq,
79            BinaryOp::StrictEq => Self::StrictEq,
80            BinaryOp::StrictNeq => Self::StrictNeq,
81            BinaryOp::Lt => Self::Lt,
82            BinaryOp::Lte => Self::Lte,
83            BinaryOp::Gt => Self::Gt,
84            BinaryOp::Gte => Self::Gte,
85            BinaryOp::BitAnd => Self::BitAnd,
86            BinaryOp::BitOr => Self::BitOr,
87            BinaryOp::BitXor => Self::BitXor,
88            BinaryOp::Shl => Self::Shl,
89            BinaryOp::Shr => Self::Shr,
90            BinaryOp::UShr => Self::UShr,
91            BinaryOp::In => Self::In,
92            BinaryOp::InstanceOf => Self::InstanceOf,
93        }
94    }
95}
96
97#[derive(Debug, Clone, PartialEq, Eq, Hash)]
98enum ValueKey {
99    Const(MirConst),
100    Local {
101        name: String,
102        version: u32,
103    },
104    Unary {
105        op: MirUnaryOp,
106        arg: ValueId,
107    },
108    Binary {
109        op: MirBinaryOp,
110        lhs: ValueId,
111        rhs: ValueId,
112    },
113}
114
115#[derive(Debug, Clone)]
116pub enum MirInstruction {
117    Const {
118        id: ValueId,
119        value: MirConst,
120    },
121    ReadLocal {
122        id: ValueId,
123        name: String,
124        version: u32,
125    },
126    Unary {
127        id: ValueId,
128        op: MirUnaryOp,
129        arg: ValueId,
130    },
131    Binary {
132        id: ValueId,
133        op: MirBinaryOp,
134        lhs: ValueId,
135        rhs: ValueId,
136    },
137    WriteLocal {
138        name: String,
139        version: u32,
140        value: ValueId,
141    },
142    Return {
143        value: Option<ValueId>,
144    },
145    Unknown {
146        id: ValueId,
147    },
148}
149
150#[derive(Debug, Clone, Default)]
151pub struct SsaStats {
152    pub reused_values: u32,
153    pub reused_unary: u32,
154    pub reused_binary: u32,
155}
156
157#[derive(Debug, Clone)]
158pub struct SsaFunction {
159    pub instructions: Vec<MirInstruction>,
160    pub current_versions: FxHashMap<String, u32>,
161    pub stats: SsaStats,
162}
163
164pub fn build_ssa(block: &BlockStatement) -> SsaFunction {
165    SsaBuilder::new().build_block(block)
166}
167
168struct SsaBuilder {
169    next_value: u32,
170    instructions: Vec<MirInstruction>,
171    current_versions: FxHashMap<String, u32>,
172    value_numbers: FxHashMap<ValueKey, ValueId>,
173    stats: SsaStats,
174}
175
176impl SsaBuilder {
177    fn new() -> Self {
178        Self {
179            next_value: 0,
180            instructions: Vec::new(),
181            current_versions: FxHashMap::default(),
182            value_numbers: FxHashMap::default(),
183            stats: SsaStats::default(),
184        }
185    }
186
187    fn build_block(mut self, block: &BlockStatement) -> SsaFunction {
188        for node in &block.body {
189            self.build_node(node);
190        }
191        SsaFunction {
192            instructions: self.instructions,
193            current_versions: self.current_versions,
194            stats: self.stats,
195        }
196    }
197
198    fn build_node(&mut self, node: &ASTNode) {
199        match node {
200            ASTNode::VariableDeclaration(decl) => self.build_var_decl(decl),
201            ASTNode::ExpressionStatement(stmt) => {
202                self.build_expr(&stmt.expression);
203            }
204            ASTNode::ReturnStatement(stmt) => {
205                let value = stmt.argument.as_ref().map(|expr| self.build_expr(expr));
206                self.instructions.push(MirInstruction::Return { value });
207            }
208            _ => {
209                self.invalidate_value_numbers();
210            }
211        }
212    }
213
214    fn build_var_decl(&mut self, decl: &VariableDeclaration) {
215        for d in &decl.declarations {
216            match &d.id {
217                BindingPattern::Identifier(name) => {
218                    let init = d
219                        .init
220                        .as_ref()
221                        .map(|expr| self.build_expr(expr))
222                        .unwrap_or_else(|| self.emit_const(MirConst::Undefined));
223                    self.write_local(name, init);
224                    if decl.kind == VariableKind::Const {}
225                }
226                _ => {
227                    if let Some(init) = &d.init {
228                        self.build_expr(init);
229                    }
230                    self.invalidate_value_numbers();
231                }
232            }
233        }
234    }
235
236    fn build_expr(&mut self, expr: &Expression) -> ValueId {
237        match expr {
238            Expression::Literal(lit) => self.build_literal(lit),
239            Expression::Identifier(id) => self.read_local(&id.name),
240            Expression::UnaryExpression(unary) => {
241                let arg = self.build_expr(&unary.argument);
242                let op = MirUnaryOp::from(&unary.op);
243                let key = ValueKey::Unary { op, arg };
244                self.intern_value(key, |id| MirInstruction::Unary { id, op, arg })
245            }
246            Expression::BinaryExpression(bin) => {
247                let lhs = self.build_expr(&bin.left);
248                let rhs = self.build_expr(&bin.right);
249                let op = MirBinaryOp::from(&bin.op);
250                let key = ValueKey::Binary { op, lhs, rhs };
251                self.intern_value(key, |id| MirInstruction::Binary { id, op, lhs, rhs })
252            }
253            Expression::AssignmentExpression(assign) => self.build_assignment(assign),
254            Expression::UpdateExpression(update) => self.build_update(update),
255            Expression::SequenceExpression(seq) => {
256                let mut last = self.emit_const(MirConst::Undefined);
257                for expr in &seq.expressions {
258                    last = self.build_expr(expr);
259                }
260                last
261            }
262            _ => {
263                self.invalidate_value_numbers();
264                self.emit_unknown()
265            }
266        }
267    }
268
269    fn build_literal(&mut self, lit: &Literal) -> ValueId {
270        let value = match lit {
271            Literal::Number(n) => MirConst::NumberBits(n.to_bits()),
272            Literal::String(s, _) => MirConst::String(s.clone()),
273            Literal::Boolean(b) => MirConst::Boolean(*b),
274            Literal::Null => MirConst::Null,
275            Literal::Undefined => MirConst::Undefined,
276            Literal::BigInt(s) => MirConst::BigInt(s.clone()),
277            Literal::LegacyOctal(n) => MirConst::NumberBits((*n as f64).to_bits()),
278        };
279        self.emit_const(value)
280    }
281
282    fn build_assignment(&mut self, assign: &AssignmentExpression) -> ValueId {
283        let rhs = self.build_expr(&assign.right);
284        if let AssignmentTarget::Identifier(name) = &assign.left {
285            let value = match assign.op {
286                AssignOp::Assign => rhs,
287                _ => {
288                    if let Some(op) = Self::assign_op_to_binary(assign.op.clone()) {
289                        let lhs = self.read_local(name);
290                        let key = ValueKey::Binary { op, lhs, rhs };
291                        self.intern_value(key, |id| MirInstruction::Binary { id, op, lhs, rhs })
292                    } else {
293                        self.invalidate_value_numbers();
294                        rhs
295                    }
296                }
297            };
298            self.write_local(name, value);
299            return value;
300        }
301        self.invalidate_value_numbers();
302        self.emit_unknown()
303    }
304
305    fn build_update(&mut self, update: &UpdateExpression) -> ValueId {
306        if let Expression::Identifier(id) = &*update.argument {
307            let old = self.read_local(&id.name);
308            let one = self.emit_const(MirConst::NumberBits(1.0f64.to_bits()));
309            let op = match update.op {
310                UpdateOp::Increment => MirBinaryOp::Add,
311                UpdateOp::Decrement => MirBinaryOp::Sub,
312            };
313            let key = ValueKey::Binary {
314                op,
315                lhs: old,
316                rhs: one,
317            };
318            let next = self.intern_value(key, |idv| MirInstruction::Binary {
319                id: idv,
320                op,
321                lhs: old,
322                rhs: one,
323            });
324            self.write_local(&id.name, next);
325            return if update.prefix { next } else { old };
326        }
327        self.invalidate_value_numbers();
328        self.emit_unknown()
329    }
330
331    fn assign_op_to_binary(op: AssignOp) -> Option<MirBinaryOp> {
332        match op {
333            AssignOp::AddAssign => Some(MirBinaryOp::Add),
334            AssignOp::SubAssign => Some(MirBinaryOp::Sub),
335            AssignOp::MulAssign => Some(MirBinaryOp::Mul),
336            AssignOp::DivAssign => Some(MirBinaryOp::Div),
337            AssignOp::ModAssign => Some(MirBinaryOp::Mod),
338            AssignOp::PowAssign => Some(MirBinaryOp::Pow),
339            AssignOp::BitAndAssign => Some(MirBinaryOp::BitAnd),
340            AssignOp::BitOrAssign => Some(MirBinaryOp::BitOr),
341            AssignOp::BitXorAssign => Some(MirBinaryOp::BitXor),
342            AssignOp::ShlAssign => Some(MirBinaryOp::Shl),
343            AssignOp::ShrAssign => Some(MirBinaryOp::Shr),
344            AssignOp::UShrAssign => Some(MirBinaryOp::UShr),
345            AssignOp::Assign
346            | AssignOp::LogicalAndAssign
347            | AssignOp::LogicalOrAssign
348            | AssignOp::NullishAssign => None,
349        }
350    }
351
352    fn read_local(&mut self, name: &str) -> ValueId {
353        let version = self.current_versions.get(name).copied().unwrap_or(0);
354        let key = ValueKey::Local {
355            name: name.to_string(),
356            version,
357        };
358        self.intern_value(key, |id| MirInstruction::ReadLocal {
359            id,
360            name: name.to_string(),
361            version,
362        })
363    }
364
365    fn write_local(&mut self, name: &str, value: ValueId) {
366        let version = self.current_versions.get(name).copied().unwrap_or(0) + 1;
367        self.current_versions.insert(name.to_string(), version);
368        self.instructions.push(MirInstruction::WriteLocal {
369            name: name.to_string(),
370            version,
371            value,
372        });
373    }
374
375    fn emit_const(&mut self, value: MirConst) -> ValueId {
376        let key = ValueKey::Const(value.clone());
377        self.intern_value(key, |id| MirInstruction::Const { id, value })
378    }
379
380    fn emit_unknown(&mut self) -> ValueId {
381        let id = self.alloc_value();
382        self.instructions.push(MirInstruction::Unknown { id });
383        id
384    }
385
386    fn intern_value<F>(&mut self, key: ValueKey, make_inst: F) -> ValueId
387    where
388        F: FnOnce(ValueId) -> MirInstruction,
389    {
390        if let Some(existing) = self.value_numbers.get(&key).copied() {
391            self.stats.reused_values += 1;
392            if matches!(key, ValueKey::Unary { .. }) {
393                self.stats.reused_unary += 1;
394            }
395            if matches!(key, ValueKey::Binary { .. }) {
396                self.stats.reused_binary += 1;
397            }
398            return existing;
399        }
400
401        let id = self.alloc_value();
402        self.instructions.push(make_inst(id));
403        self.value_numbers.insert(key, id);
404        id
405    }
406
407    fn invalidate_value_numbers(&mut self) {
408        self.value_numbers
409            .retain(|key, _| matches!(key, ValueKey::Const(_)));
410    }
411
412    fn alloc_value(&mut self) -> ValueId {
413        let id = ValueId(self.next_value);
414        self.next_value += 1;
415        id
416    }
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    fn parse_function_body(code: &str) -> BlockStatement {
424        let program = crate::compiler::parser::Parser::new(code).parse().unwrap();
425        match program.body.first() {
426            Some(ASTNode::FunctionDeclaration(func)) => func.body.clone(),
427            _ => panic!("expected function declaration"),
428        }
429    }
430
431    #[test]
432    fn ssa_versions_increment_on_reassignment() {
433        let body = parse_function_body("function f() { let x = 1; x = x + 2; return x; }");
434        let ssa = build_ssa(&body);
435        assert_eq!(ssa.current_versions.get("x"), Some(&2));
436    }
437
438    #[test]
439    fn lvn_reuses_identical_binary_expr() {
440        let body = parse_function_body(
441            "function f() { let a = 1; let b = 2; let x = a + b; let y = a + b; return y; }",
442        );
443        let ssa = build_ssa(&body);
444        assert!(
445            ssa.stats.reused_binary >= 1,
446            "expected binary value reuse, got {:?}",
447            ssa.stats
448        );
449    }
450
451    #[test]
452    fn lvn_does_not_reuse_after_version_change() {
453        let body = parse_function_body(
454            "function f() { let a = 1; let b = 2; let x = a + b; a = 3; let y = a + b; return y; }",
455        );
456        let ssa = build_ssa(&body);
457        assert_eq!(ssa.stats.reused_binary, 0);
458    }
459}